From 6ca3579cc0252a99c1081049b09dbaa4bbf4aedf Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 10 Jun 2020 21:35:01 +0200 Subject: [PATCH] Add more time debug measurements --- src/best_proximity.rs | 6 ++++++ src/lib.rs | 26 ++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/best_proximity.rs b/src/best_proximity.rs index e6fae9f81..572b5a06e 100644 --- a/src/best_proximity.rs +++ b/src/best_proximity.rs @@ -1,4 +1,6 @@ use std::cmp; +use std::time::Instant; + use pathfinding::directed::dijkstra::dijkstra; const ONE_ATTRIBUTE: u32 = 1000; @@ -95,6 +97,8 @@ impl Iterator for BestProximity { fn next(&mut self) -> Option { let mut output: Option<(u32, Vec>)> = None; + let before = Instant::now(); + loop { let result = dijkstra( &Path::new(&self.positions)?, @@ -128,6 +132,8 @@ impl Iterator for BestProximity { } } + eprintln!("BestProximity::next() took {:.02?}", before.elapsed()); + if let Some((proximity, _)) = output.as_ref() { self.best_proximity = proximity + 1; } diff --git a/src/lib.rs b/src/lib.rs index e160d13b7..79e78b25a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ mod query_tokens; use std::borrow::Cow; use std::collections::HashMap; use std::hash::BuildHasherDefault; +use std::time::Instant; use cow_utils::CowUtils; use fst::{IntoStreamer, Streamer}; @@ -90,6 +91,7 @@ impl Index { let mut words_positions = Vec::new(); let mut positions = Vec::new(); + let before = Instant::now(); for (word, is_prefix, dfa) in dfas { let mut count = 0; @@ -117,17 +119,24 @@ impl Index { positions.push(union_positions.iter().collect()); } + eprintln!("Retrieving words positions took {:.02?}", before.elapsed()); + let mut documents = Vec::new(); - for (_proximity, positions) in BestProximity::new(positions) { + 'outer: for (proximity, positions) in BestProximity::new(positions) { + let same_prox_before = Instant::now(); let mut same_proximity_union = RoaringBitmap::default(); for positions in positions { + let before = Instant::now(); + let mut intersect_docids: Option = None; - for ((word, is_prefix, dfa), pos) in words_positions.iter().zip(positions) { + for ((word, is_prefix, dfa), pos) in words_positions.iter().zip(positions.clone()) { let mut count = 0; let mut union_docids = RoaringBitmap::default(); + let before = Instant::now(); + // TODO re-enable the prefixes system if false && word.len() <= 4 && *is_prefix { let mut key = word.as_bytes()[..word.len().min(5)].to_vec(); @@ -151,24 +160,33 @@ impl Index { } } - let _ = count; + let before_intersect = Instant::now(); match &mut intersect_docids { Some(left) => left.intersect_with(&union_docids), None => intersect_docids = Some(union_docids), } + + eprintln!("retrieving {} word took {:.02?} and took {:.02?} to intersect", + count, before.elapsed(), before_intersect.elapsed()); } + eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents", + proximity, positions, before.elapsed(), + intersect_docids.as_ref().map_or(0, |rb| rb.len())); + if let Some(intersect_docids) = intersect_docids { same_proximity_union.union_with(&intersect_docids); } } + eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed()); + documents.push(same_proximity_union); // We found enough documents we can stop here if documents.iter().map(RoaringBitmap::len).sum::() >= 20 { - break + break 'outer; } }