Use different algorithms for different documents ratios

This commit is contained in:
Clément Renault 2020-01-14 17:10:35 +01:00
parent 6edb460bea
commit 54dacb362d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -94,37 +94,65 @@ where
let before = Instant::now(); let before = Instant::now();
let docidslen = docids.len() as f32;
let mut bare_matches = Vec::new(); let mut bare_matches = Vec::new();
mk_arena!(arena); mk_arena!(arena);
for ((query, input, distance), matches) in queries { for ((query, input, distance), matches) in queries {
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
let mut offset = 0; let pllen = postings_list_view.len() as f32;
for id in docids.as_slice() {
let di = DocIndex { document_id: *id, ..DocIndex::default() };
let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x);
let group = postings_list_view[offset + pos..] if docidslen / pllen >= 0.8 {
.linear_group_by_key(|m| m.document_id) let mut offset = 0;
.next() for matches in postings_list_view.linear_group_by_key(|m| m.document_id) {
.filter(|matches| matches[0].document_id == *id); let document_id = matches[0].document_id;
if docids.contains(&document_id) {
let range = postings_list_view.range(offset, matches.len());
let posting_list_index = arena.add(range);
offset += pos; let bare_match = BareMatch {
document_id,
query_index: query.id,
distance,
is_exact: true, // TODO where can I find this info?
postings_list: posting_list_index,
};
if let Some(matches) = group { bare_matches.push(bare_match);
let range = postings_list_view.range(pos, matches.len()); }
let posting_list_index = arena.add(range);
let bare_match = BareMatch { offset += matches.len();
document_id: *id, }
query_index: query.id,
distance: distance,
is_exact: true, // TODO where can I find this info?
postings_list: posting_list_index,
};
bare_matches.push(bare_match); } else {
let mut offset = 0;
for id in docids.as_slice() {
let di = DocIndex { document_id: *id, ..DocIndex::default() };
let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x);
offset += pos;
let group = postings_list_view[offset..]
.linear_group_by_key(|m| m.document_id)
.next()
.filter(|matches| matches[0].document_id == *id);
if let Some(matches) = group {
let range = postings_list_view.range(offset, matches.len());
let posting_list_index = arena.add(range);
let bare_match = BareMatch {
document_id: *id,
query_index: query.id,
distance,
is_exact: true, // TODO where can I find this info?
postings_list: posting_list_index,
};
bare_matches.push(bare_match);
}
} }
} }
} }