5158: Indexer edition 2024 fix facet fst r=Kerollmops a=ManyTheFish

# Pull Request
Fix a regression in the new indexer; when several filterable attributes containing strings were set, all the field IDs were shifted, and the last one was overwriting the previous FST.

## What does this PR do?
- Add a test reproducing the bug
- fix the bug

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-12-12 14:14:44 +00:00 committed by GitHub
commit 7a95fed23f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 120 additions and 5 deletions

View File

@ -57,6 +57,116 @@ async fn simple_facet_search() {
assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test]
async fn simple_facet_search_on_movies() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"id": 1,
"title": "Carol",
"genres": [
"Romance",
"Drama"
],
"color": [
"red"
],
"platforms": [
"MacOS",
"Linux",
"Windows"
]
},
{
"id": 2,
"title": "Wonder Woman",
"genres": [
"Action",
"Adventure"
],
"color": [
"green"
],
"platforms": [
"MacOS"
]
},
{
"id": 3,
"title": "Life of Pi",
"genres": [
"Adventure",
"Drama"
],
"color": [
"blue"
],
"platforms": [
"Windows"
]
},
{
"id": 4,
"title": "Mad Max: Fury Road",
"genres": [
"Adventure",
"Science Fiction"
],
"color": [
"red"
],
"platforms": [
"MacOS",
"Linux"
]
},
{
"id": 5,
"title": "Moana",
"genres": [
"Fantasy",
"Action"
],
"color": [
"red"
],
"platforms": [
"Windows"
]
},
{
"id": 6,
"title": "Philadelphia",
"genres": [
"Drama"
],
"color": [
"blue"
],
"platforms": [
"MacOS",
"Linux",
"Windows"
]
}
]);
let (response, code) =
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, _code) = index.add_documents(documents, None).await;
index.wait_task(response.uid()).await;
let (response, code) =
index.facet_search(json!({"facetQuery": "", "facetName": "genres", "q": "" })).await;
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":2},{"value":"Adventure","count":3},{"value":"Drama","count":3},{"value":"Fantasy","count":1},{"value":"Romance","count":1},{"value":"Science Fiction","count":1}]"###);
}
#[actix_rt::test]
async fn advanced_facet_search() {
let server = Server::new().await;

View File

@ -103,6 +103,8 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")]
pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> {
tracing::trace!("merge facet strings for facet search: {:?}", self.registered_facets);
let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?;
let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString);
builder.extend(reader);
@ -118,12 +120,15 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;
if current_field_id != Some(field_id) {
if let Some(fst_merger_builder) = fst_merger_builder {
if let (Some(current_field_id), Some(fst_merger_builder)) =
(current_field_id, fst_merger_builder)
{
let mmap = fst_merger_builder.build(&mut callback)?;
index
.facet_id_string_fst
.remap_data_type::<Bytes>()
.put(wtxn, &field_id, &mmap)?;
index.facet_id_string_fst.remap_data_type::<Bytes>().put(
wtxn,
&current_field_id,
&mmap,
)?;
}
fst = index.facet_id_string_fst.get(rtxn, &field_id)?;