diff --git a/src/lib.rs b/src/lib.rs index 113203adb..a4eecbcaf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -228,20 +228,32 @@ impl Index { for positions in positions { let before = Instant::now(); - let mut intersect_docids: Option = None; - for (word, pos) in positions.iter().enumerate() { - let before = Instant::now(); - let union_docids = union_cache.entry((word, *pos)).or_insert_with(|| unions_word_pos(word, *pos)); + let mut to_intersect: Vec<_> = positions.iter() + .enumerate() + .map(|(word, pos)| { + let docids = union_cache.entry((word, *pos)).or_insert_with(|| unions_word_pos(word, *pos)); + // FIXME don't clone here + (docids.len(), docids.clone()) + }) + .collect(); - let before_intersect = Instant::now(); - match &mut intersect_docids { - Some(left) => left.intersect_with(&union_docids), - None => intersect_docids = Some(union_docids.clone()), - } + to_intersect.sort_unstable_by_key(|(l, _)| *l); + let elapsed_retrieving = before.elapsed(); - eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect", - before.elapsed(), before_intersect.elapsed()); - } + let before_intersect = Instant::now(); + let intersect_docids: Option = to_intersect.into_iter() + .fold(None, |acc, (_, union_docids)| { + match acc { + Some(mut left) => { + left.intersect_with(&union_docids); + Some(left) + }, + None => Some(union_docids.clone()), + } + }); + + eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect", + elapsed_retrieving, before_intersect.elapsed()); eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents", proximity, positions, before.elapsed(),