Introduce the distance data

This commit is contained in:
Clément Renault 2020-01-14 11:38:04 +01:00
parent 8acbdcbbad
commit 21c1473e0c
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 12 additions and 7 deletions

View File

@ -96,7 +96,7 @@ where
let mut bare_matches = Vec::new(); let mut bare_matches = Vec::new();
mk_arena!(arena); mk_arena!(arena);
for ((query, input), matches) in queries { for ((query, input, distance), matches) in queries {
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
// TODO optimize the filter by skipping docids that have already been seen // TODO optimize the filter by skipping docids that have already been seen
@ -109,7 +109,7 @@ where
let bare_match = BareMatch { let bare_match = BareMatch {
document_id, document_id,
query_index: u16::try_from(query.id).unwrap(), query_index: u16::try_from(query.id).unwrap(),
distance: 0, distance: distance,
is_exact: true, // TODO where can I find this info? is_exact: true, // TODO where can I find this info?
postings_list: posting_list_index, postings_list: posting_list_index,
}; };

View File

@ -266,7 +266,8 @@ pub fn create_query_tree(
Ok((operation, mapping)) Ok((operation, mapping))
} }
pub type Postings<'o, 'txn> = HashMap<(&'o Query, Vec<u8>), Cow<'txn, Set<DocIndex>>>; pub type Distance = u8;
pub type Postings<'o, 'txn> = HashMap<(&'o Query, Vec<u8>, Distance), Cow<'txn, Set<DocIndex>>>;
pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>; pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>;
pub struct QueryResult<'o, 'txn> { pub struct QueryResult<'o, 'txn> {
@ -372,7 +373,8 @@ pub fn traverse_query_tree<'o, 'txn>(
if *prefix && word.len() == 1 { if *prefix && word.len() == 1 {
let prefix = [word.as_bytes()[0], 0, 0, 0]; let prefix = [word.as_bytes()[0], 0, 0, 0];
let result = ctx.prefix_postings_lists.prefix_postings_list(reader, prefix)?.unwrap_or_default(); let result = ctx.prefix_postings_lists.prefix_postings_list(reader, prefix)?.unwrap_or_default();
postings.insert((query, word.clone().into_bytes()), result.matches); let distance = 0;
postings.insert((query, word.clone().into_bytes(), distance), result.matches);
result.docids result.docids
} else { } else {
let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) }; let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) };
@ -387,9 +389,10 @@ pub fn traverse_query_tree<'o, 'txn>(
let before = Instant::now(); let before = Instant::now();
let mut docids = Vec::new(); let mut docids = Vec::new();
while let Some(input) = stream.next() { while let Some(input) = stream.next() {
let distance = dfa.eval(input).to_u8();
if let Some(result) = ctx.postings_lists.postings_list(reader, input)? { if let Some(result) = ctx.postings_lists.postings_list(reader, input)? {
docids.extend_from_slice(&result.docids); docids.extend_from_slice(&result.docids);
postings.insert((query, input.to_owned()), result.matches); postings.insert((query, input.to_owned(), distance), result.matches);
} }
} }
println!("{:3$}docids extend ({:?}) took {:.02?}", "", docids.len(), before.elapsed(), depth * 2); println!("{:3$}docids extend ({:?}) took {:.02?}", "", docids.len(), before.elapsed(), depth * 2);
@ -414,9 +417,10 @@ pub fn traverse_query_tree<'o, 'txn>(
let mut docids = Vec::new(); let mut docids = Vec::new();
while let Some(input) = stream.next() { while let Some(input) = stream.next() {
let distance = dfa.eval(input).to_u8();
if let Some(result) = ctx.postings_lists.postings_list(reader, input)? { if let Some(result) = ctx.postings_lists.postings_list(reader, input)? {
docids.extend_from_slice(&result.docids); docids.extend_from_slice(&result.docids);
postings.insert((query, input.to_owned()), result.matches); postings.insert((query, input.to_owned(), distance), result.matches);
} }
} }
@ -446,7 +450,8 @@ pub fn traverse_query_tree<'o, 'txn>(
println!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2); println!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
let matches = Cow::Owned(SetBuf::new(matches).unwrap()); let matches = Cow::Owned(SetBuf::new(matches).unwrap());
postings.insert((query, vec![]), matches); let distance = 0;
postings.insert((query, vec![], distance), matches);
Cow::Owned(docids) Cow::Owned(docids)
} else { } else {