2019-12-11 17:02:10 +01:00
|
|
|
use compact_arena::SmallArena;
|
2019-07-07 19:55:15 +02:00
|
|
|
use sdset::SetBuf;
|
2019-12-13 13:22:54 +01:00
|
|
|
use crate::DocIndex;
|
2019-12-11 17:02:10 +01:00
|
|
|
use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsListView};
|
2019-12-13 13:22:54 +01:00
|
|
|
use crate::reordered_attrs::ReorderedAttrs;
|
2019-07-07 19:55:15 +02:00
|
|
|
|
2019-12-11 17:02:10 +01:00
|
|
|
pub struct RawDocument<'a, 'tag> {
|
|
|
|
pub id: crate::DocumentId,
|
2019-12-13 12:38:54 +01:00
|
|
|
pub bare_matches: &'a mut [BareMatch<'tag>],
|
2019-12-11 17:02:10 +01:00
|
|
|
pub processed_matches: Vec<SimpleMatch>,
|
|
|
|
/// The list of minimum `distance` found
|
|
|
|
pub processed_distances: Vec<Option<u8>>,
|
2019-12-13 11:14:12 +01:00
|
|
|
/// Does this document contains a field
|
|
|
|
/// with one word that is exactly matching
|
|
|
|
pub contains_one_word_field: bool,
|
2019-07-07 19:55:15 +02:00
|
|
|
}
|
|
|
|
|
2019-12-11 17:02:10 +01:00
|
|
|
impl<'a, 'tag> RawDocument<'a, 'tag> {
|
|
|
|
pub fn new<'txn>(
|
2019-12-13 12:38:54 +01:00
|
|
|
bare_matches: &'a mut [BareMatch<'tag>],
|
2019-12-11 17:02:10 +01:00
|
|
|
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
2019-12-13 13:22:54 +01:00
|
|
|
searchable_attrs: Option<&ReorderedAttrs>,
|
2020-01-13 14:36:06 +01:00
|
|
|
) -> RawDocument<'a, 'tag>
|
2019-12-11 17:02:10 +01:00
|
|
|
{
|
2019-12-13 13:22:54 +01:00
|
|
|
if let Some(reordered_attrs) = searchable_attrs {
|
|
|
|
for bm in bare_matches.iter() {
|
|
|
|
let postings_list = &postings_lists[bm.postings_list];
|
|
|
|
|
|
|
|
let mut rewritten = Vec::new();
|
|
|
|
for di in postings_list.iter() {
|
|
|
|
if let Some(attribute) = reordered_attrs.get(di.attribute) {
|
|
|
|
rewritten.push(DocIndex { attribute, ..*di });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let new_postings = SetBuf::from_dirty(rewritten);
|
|
|
|
postings_lists[bm.postings_list].rewrite_with(new_postings);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-13 12:38:54 +01:00
|
|
|
bare_matches.sort_unstable_by_key(|m| m.query_index);
|
2019-12-11 17:02:10 +01:00
|
|
|
|
2020-01-13 14:36:06 +01:00
|
|
|
RawDocument {
|
2019-12-13 12:38:54 +01:00
|
|
|
id: bare_matches[0].document_id,
|
|
|
|
bare_matches,
|
2019-12-11 17:02:10 +01:00
|
|
|
processed_matches: Vec::new(),
|
|
|
|
processed_distances: Vec::new(),
|
2019-12-13 11:14:12 +01:00
|
|
|
contains_one_word_field: false,
|
2020-01-13 14:36:06 +01:00
|
|
|
}
|
2019-07-07 19:55:15 +02:00
|
|
|
}
|
|
|
|
}
|