From 4f7a7ea0bba2a5aa17946b0d9255b8540ede668f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Jan 2020 16:16:42 +0100 Subject: [PATCH] Faster intersection group by --- meilisearch-core/src/bucket_sort.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 113359501..ba024da57 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -15,7 +15,7 @@ use levenshtein_automata::DFA; use log::debug; use meilisearch_tokenizer::{is_cjk, split_query_string}; use meilisearch_types::DocIndex; -use sdset::{Set, SetBuf, SetOperation}; +use sdset::{Set, SetBuf}; use slice_group_by::{GroupBy, GroupByMut}; use crate::automaton::NGRAMS; @@ -64,18 +64,15 @@ where let operation = create_query_tree(reader, &context, query).unwrap(); println!("{:?}", operation); - let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap(); println!("found {} documents", docids.len()); println!("number of postings {:?}", queries.len()); let before = Instant::now(); for ((query, input), matches) in queries { - let op = sdset::duo::IntersectionByKey::new(&matches, &docids, |d| d.document_id, Clone::clone); - let buf: SetBuf = op.into_set_buf(); - if !buf.is_empty() { - let input = std::str::from_utf8(&input); - println!("({:?}, {:?}) gives {} matches", query, input, buf.len()); + // TODO optimize the filter by skipping docids that have already been seen + for matches in matches.linear_group_by_key(|m| m.document_id).filter(|ms| docids.contains(&ms[0].document_id)) { + // ... } }