Rename raw_matches into bare_matches

This commit is contained in:
Clément Renault 2019-12-13 12:38:54 +01:00
parent 48e8778881
commit a4dd033ccf
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
8 changed files with 27 additions and 27 deletions

View File

@ -82,9 +82,9 @@ where
let before_raw_documents_building = Instant::now(); let before_raw_documents_building = Instant::now();
let mut prefiltered_documents = 0; let mut prefiltered_documents = 0;
let mut raw_documents = Vec::new(); let mut raw_documents = Vec::new();
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) { for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
prefiltered_documents += 1; prefiltered_documents += 1;
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) { if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
raw_documents.push(raw_document); raw_documents.push(raw_document);
} }
} }
@ -180,9 +180,9 @@ where
let before_raw_documents_building = Instant::now(); let before_raw_documents_building = Instant::now();
let mut prefiltered_documents = 0; let mut prefiltered_documents = 0;
let mut raw_documents = Vec::new(); let mut raw_documents = Vec::new();
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) { for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
prefiltered_documents += 1; prefiltered_documents += 1;
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) { if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
raw_documents.push(raw_document); raw_documents.push(raw_document);
} }
} }

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::{RawDocument, MResult}; use crate::{RawDocument, MResult};
use crate::bucket_sort::SimpleMatch; use crate::bucket_sort::SimpleMatch;
use super::{Criterion, Context, ContextMut, prepare_raw_matches}; use super::{Criterion, Context, ContextMut, prepare_bare_matches};
pub struct Attribute; pub struct Attribute;
@ -15,7 +15,7 @@ impl Criterion for Attribute {
documents: &mut [RawDocument<'r, 'tag>], documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()> ) -> MResult<()>
{ {
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer); prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
Ok(()) Ok(())
} }

View File

@ -21,11 +21,11 @@ impl Criterion for Exact {
let reader = ctx.reader; let reader = ctx.reader;
'documents: for doc in documents { 'documents: for doc in documents {
doc.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact))); doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
// mark the document if we find a "one word field" that matches // mark the document if we find a "one word field" that matches
let mut fields_counts = HashMap::new(); let mut fields_counts = HashMap::new();
for group in doc.raw_matches.linear_group_by_key(|bm| bm.query_index) { for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
for group in group.linear_group_by_key(|bm| bm.is_exact) { for group in group.linear_group_by_key(|bm| bm.is_exact) {
if !group[0].is_exact { break } if !group[0].is_exact { break }
@ -70,8 +70,8 @@ impl Criterion for Exact {
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse() lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
// if not, with document contains the more exact words // if not, with document contains the more exact words
.then_with(|| { .then_with(|| {
let lhs = sum_exact_query_words(&lhs.raw_matches); let lhs = sum_exact_query_words(&lhs.bare_matches);
let rhs = sum_exact_query_words(&rhs.raw_matches); let rhs = sum_exact_query_words(&rhs.bare_matches);
lhs.cmp(&rhs).reverse() lhs.cmp(&rhs).reverse()
}) })
} }

View File

@ -145,7 +145,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
if !document.processed_distances.is_empty() { continue } if !document.processed_distances.is_empty() { continue }
let mut processed = Vec::new(); let mut processed = Vec::new();
for m in document.raw_matches.iter() { for m in document.bare_matches.iter() {
if postings_lists[m.postings_list].is_empty() { continue } if postings_lists[m.postings_list].is_empty() { continue }
let range = query_enhancer.replacement(m.query_index as u32); let range = query_enhancer.replacement(m.query_index as u32);
@ -166,7 +166,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
} }
} }
fn prepare_raw_matches<'a, 'tag, 'txn>( fn prepare_bare_matches<'a, 'tag, 'txn>(
documents: &mut [RawDocument<'a, 'tag>], documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer, query_enhancer: &QueryEnhancer,
@ -175,7 +175,7 @@ fn prepare_raw_matches<'a, 'tag, 'txn>(
if !document.processed_matches.is_empty() { continue } if !document.processed_matches.is_empty() { continue }
let mut processed = Vec::new(); let mut processed = Vec::new();
for m in document.raw_matches.iter() { for m in document.bare_matches.iter() {
let postings_list = &postings_lists[m.postings_list]; let postings_list = &postings_lists[m.postings_list];
processed.reserve(postings_list.len()); processed.reserve(postings_list.len());
for di in postings_list.as_ref() { for di in postings_list.as_ref() {

View File

@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::bucket_sort::{SimpleMatch}; use crate::bucket_sort::{SimpleMatch};
use crate::{RawDocument, MResult}; use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_raw_matches}; use super::{Criterion, Context, ContextMut, prepare_bare_matches};
const MAX_DISTANCE: u16 = 8; const MAX_DISTANCE: u16 = 8;
@ -17,7 +17,7 @@ impl Criterion for Proximity {
documents: &mut [RawDocument<'r, 'tag>], documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()> ) -> MResult<()>
{ {
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer); prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
Ok(()) Ok(())
} }

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::bucket_sort::SimpleMatch; use crate::bucket_sort::SimpleMatch;
use crate::{RawDocument, MResult}; use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_raw_matches}; use super::{Criterion, Context, ContextMut, prepare_bare_matches};
pub struct WordsPosition; pub struct WordsPosition;
@ -15,7 +15,7 @@ impl Criterion for WordsPosition {
documents: &mut [RawDocument<'r, 'tag>], documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()> ) -> MResult<()>
{ {
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer); prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
Ok(()) Ok(())
} }

View File

@ -48,7 +48,7 @@ impl Document {
arena: &SmallArena<'tag, PostingsListView<'txn>>, arena: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Document ) -> Document
{ {
let highlights = raw_document.raw_matches.iter().flat_map(|sm| { let highlights = raw_document.bare_matches.iter().flat_map(|sm| {
let postings_list = &arena[sm.postings_list]; let postings_list = &arena[sm.postings_list];
let input = postings_list.input(); let input = postings_list.input();
let query = &automatons[sm.query_index as usize].query; let query = &automatons[sm.query_index as usize].query;

View File

@ -5,7 +5,7 @@ use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsLis
pub struct RawDocument<'a, 'tag> { pub struct RawDocument<'a, 'tag> {
pub id: crate::DocumentId, pub id: crate::DocumentId,
pub raw_matches: &'a mut [BareMatch<'tag>], pub bare_matches: &'a mut [BareMatch<'tag>],
pub processed_matches: Vec<SimpleMatch>, pub processed_matches: Vec<SimpleMatch>,
/// The list of minimum `distance` found /// The list of minimum `distance` found
pub processed_distances: Vec<Option<u8>>, pub processed_distances: Vec<Option<u8>>,
@ -16,21 +16,21 @@ pub struct RawDocument<'a, 'tag> {
impl<'a, 'tag> RawDocument<'a, 'tag> { impl<'a, 'tag> RawDocument<'a, 'tag> {
pub fn new<'txn>( pub fn new<'txn>(
raw_matches: &'a mut [BareMatch<'tag>], bare_matches: &'a mut [BareMatch<'tag>],
automatons: &[QueryWordAutomaton], automatons: &[QueryWordAutomaton],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
) -> Option<RawDocument<'a, 'tag>> ) -> Option<RawDocument<'a, 'tag>>
{ {
raw_matches.sort_unstable_by_key(|m| m.query_index); bare_matches.sort_unstable_by_key(|m| m.query_index);
let mut previous_word = None; let mut previous_word = None;
for i in 0..raw_matches.len() { for i in 0..bare_matches.len() {
let a = &raw_matches[i]; let a = &bare_matches[i];
let auta = &automatons[a.query_index as usize]; let auta = &automatons[a.query_index as usize];
match auta.phrase_query { match auta.phrase_query {
Some((0, _)) => { Some((0, _)) => {
let b = match raw_matches.get(i + 1) { let b = match bare_matches.get(i + 1) {
Some(b) => b, Some(b) => b,
None => { None => {
postings_lists[a.postings_list].rewrite_with(SetBuf::default()); postings_lists[a.postings_list].rewrite_with(SetBuf::default());
@ -77,13 +77,13 @@ impl<'a, 'tag> RawDocument<'a, 'tag> {
} }
} }
if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) { if bare_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
return None return None
} }
Some(RawDocument { Some(RawDocument {
id: raw_matches[0].document_id, id: bare_matches[0].document_id,
raw_matches, bare_matches,
processed_matches: Vec::new(), processed_matches: Vec::new(),
processed_distances: Vec::new(), processed_distances: Vec::new(),
contains_one_word_field: false, contains_one_word_field: false,