Rename raw_matches into bare_matches

This commit is contained in:
Clément Renault 2019-12-13 12:38:54 +01:00
parent 48e8778881
commit a4dd033ccf
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
8 changed files with 27 additions and 27 deletions

View File

@ -82,9 +82,9 @@ where
let before_raw_documents_building = Instant::now();
let mut prefiltered_documents = 0;
let mut raw_documents = Vec::new();
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
prefiltered_documents += 1;
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) {
if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
raw_documents.push(raw_document);
}
}
@ -180,9 +180,9 @@ where
let before_raw_documents_building = Instant::now();
let mut prefiltered_documents = 0;
let mut raw_documents = Vec::new();
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
prefiltered_documents += 1;
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) {
if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
raw_documents.push(raw_document);
}
}

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::{RawDocument, MResult};
use crate::bucket_sort::SimpleMatch;
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
pub struct Attribute;
@ -15,7 +15,7 @@ impl Criterion for Attribute {
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
Ok(())
}

View File

@ -21,11 +21,11 @@ impl Criterion for Exact {
let reader = ctx.reader;
'documents: for doc in documents {
doc.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
// mark the document if we find a "one word field" that matches
let mut fields_counts = HashMap::new();
for group in doc.raw_matches.linear_group_by_key(|bm| bm.query_index) {
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
for group in group.linear_group_by_key(|bm| bm.is_exact) {
if !group[0].is_exact { break }
@ -70,8 +70,8 @@ impl Criterion for Exact {
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
// if not, with document contains the more exact words
.then_with(|| {
let lhs = sum_exact_query_words(&lhs.raw_matches);
let rhs = sum_exact_query_words(&rhs.raw_matches);
let lhs = sum_exact_query_words(&lhs.bare_matches);
let rhs = sum_exact_query_words(&rhs.bare_matches);
lhs.cmp(&rhs).reverse()
})
}

View File

@ -145,7 +145,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
if !document.processed_distances.is_empty() { continue }
let mut processed = Vec::new();
for m in document.raw_matches.iter() {
for m in document.bare_matches.iter() {
if postings_lists[m.postings_list].is_empty() { continue }
let range = query_enhancer.replacement(m.query_index as u32);
@ -166,7 +166,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
}
}
fn prepare_raw_matches<'a, 'tag, 'txn>(
fn prepare_bare_matches<'a, 'tag, 'txn>(
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
@ -175,7 +175,7 @@ fn prepare_raw_matches<'a, 'tag, 'txn>(
if !document.processed_matches.is_empty() { continue }
let mut processed = Vec::new();
for m in document.raw_matches.iter() {
for m in document.bare_matches.iter() {
let postings_list = &postings_lists[m.postings_list];
processed.reserve(postings_list.len());
for di in postings_list.as_ref() {

View File

@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
use slice_group_by::GroupBy;
use crate::bucket_sort::{SimpleMatch};
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
const MAX_DISTANCE: u16 = 8;
@ -17,7 +17,7 @@ impl Criterion for Proximity {
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
Ok(())
}

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::bucket_sort::SimpleMatch;
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
pub struct WordsPosition;
@ -15,7 +15,7 @@ impl Criterion for WordsPosition {
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
Ok(())
}

View File

@ -48,7 +48,7 @@ impl Document {
arena: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Document
{
let highlights = raw_document.raw_matches.iter().flat_map(|sm| {
let highlights = raw_document.bare_matches.iter().flat_map(|sm| {
let postings_list = &arena[sm.postings_list];
let input = postings_list.input();
let query = &automatons[sm.query_index as usize].query;

View File

@ -5,7 +5,7 @@ use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsLis
pub struct RawDocument<'a, 'tag> {
pub id: crate::DocumentId,
pub raw_matches: &'a mut [BareMatch<'tag>],
pub bare_matches: &'a mut [BareMatch<'tag>],
pub processed_matches: Vec<SimpleMatch>,
/// The list of minimum `distance` found
pub processed_distances: Vec<Option<u8>>,
@ -16,21 +16,21 @@ pub struct RawDocument<'a, 'tag> {
impl<'a, 'tag> RawDocument<'a, 'tag> {
pub fn new<'txn>(
raw_matches: &'a mut [BareMatch<'tag>],
bare_matches: &'a mut [BareMatch<'tag>],
automatons: &[QueryWordAutomaton],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
) -> Option<RawDocument<'a, 'tag>>
{
raw_matches.sort_unstable_by_key(|m| m.query_index);
bare_matches.sort_unstable_by_key(|m| m.query_index);
let mut previous_word = None;
for i in 0..raw_matches.len() {
let a = &raw_matches[i];
for i in 0..bare_matches.len() {
let a = &bare_matches[i];
let auta = &automatons[a.query_index as usize];
match auta.phrase_query {
Some((0, _)) => {
let b = match raw_matches.get(i + 1) {
let b = match bare_matches.get(i + 1) {
Some(b) => b,
None => {
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
@ -77,13 +77,13 @@ impl<'a, 'tag> RawDocument<'a, 'tag> {
}
}
if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
if bare_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
return None
}
Some(RawDocument {
id: raw_matches[0].document_id,
raw_matches,
id: bare_matches[0].document_id,
bare_matches,
processed_matches: Vec::new(),
processed_distances: Vec::new(),
contains_one_word_field: false,