mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 11:47:28 +01:00
Rename raw_matches into bare_matches
This commit is contained in:
parent
48e8778881
commit
a4dd033ccf
@ -82,9 +82,9 @@ where
|
||||
let before_raw_documents_building = Instant::now();
|
||||
let mut prefiltered_documents = 0;
|
||||
let mut raw_documents = Vec::new();
|
||||
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
prefiltered_documents += 1;
|
||||
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) {
|
||||
if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
|
||||
raw_documents.push(raw_document);
|
||||
}
|
||||
}
|
||||
@ -180,9 +180,9 @@ where
|
||||
let before_raw_documents_building = Instant::now();
|
||||
let mut prefiltered_documents = 0;
|
||||
let mut raw_documents = Vec::new();
|
||||
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
prefiltered_documents += 1;
|
||||
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) {
|
||||
if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
|
||||
raw_documents.push(raw_document);
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{RawDocument, MResult};
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
pub struct Attribute;
|
||||
|
||||
@ -15,7 +15,7 @@ impl Criterion for Attribute {
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -21,11 +21,11 @@ impl Criterion for Exact {
|
||||
let reader = ctx.reader;
|
||||
|
||||
'documents: for doc in documents {
|
||||
doc.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||
|
||||
// mark the document if we find a "one word field" that matches
|
||||
let mut fields_counts = HashMap::new();
|
||||
for group in doc.raw_matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
||||
if !group[0].is_exact { break }
|
||||
|
||||
@ -70,8 +70,8 @@ impl Criterion for Exact {
|
||||
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
||||
// if not, with document contains the more exact words
|
||||
.then_with(|| {
|
||||
let lhs = sum_exact_query_words(&lhs.raw_matches);
|
||||
let rhs = sum_exact_query_words(&rhs.raw_matches);
|
||||
let lhs = sum_exact_query_words(&lhs.bare_matches);
|
||||
let rhs = sum_exact_query_words(&rhs.bare_matches);
|
||||
lhs.cmp(&rhs).reverse()
|
||||
})
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
|
||||
if !document.processed_distances.is_empty() { continue }
|
||||
|
||||
let mut processed = Vec::new();
|
||||
for m in document.raw_matches.iter() {
|
||||
for m in document.bare_matches.iter() {
|
||||
if postings_lists[m.postings_list].is_empty() { continue }
|
||||
|
||||
let range = query_enhancer.replacement(m.query_index as u32);
|
||||
@ -166,7 +166,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_raw_matches<'a, 'tag, 'txn>(
|
||||
fn prepare_bare_matches<'a, 'tag, 'txn>(
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
@ -175,7 +175,7 @@ fn prepare_raw_matches<'a, 'tag, 'txn>(
|
||||
if !document.processed_matches.is_empty() { continue }
|
||||
|
||||
let mut processed = Vec::new();
|
||||
for m in document.raw_matches.iter() {
|
||||
for m in document.bare_matches.iter() {
|
||||
let postings_list = &postings_lists[m.postings_list];
|
||||
processed.reserve(postings_list.len());
|
||||
for di in postings_list.as_ref() {
|
||||
|
@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::bucket_sort::{SimpleMatch};
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
@ -17,7 +17,7 @@ impl Criterion for Proximity {
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
pub struct WordsPosition;
|
||||
|
||||
@ -15,7 +15,7 @@ impl Criterion for WordsPosition {
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,7 @@ impl Document {
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) -> Document
|
||||
{
|
||||
let highlights = raw_document.raw_matches.iter().flat_map(|sm| {
|
||||
let highlights = raw_document.bare_matches.iter().flat_map(|sm| {
|
||||
let postings_list = &arena[sm.postings_list];
|
||||
let input = postings_list.input();
|
||||
let query = &automatons[sm.query_index as usize].query;
|
||||
|
@ -5,7 +5,7 @@ use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsLis
|
||||
|
||||
pub struct RawDocument<'a, 'tag> {
|
||||
pub id: crate::DocumentId,
|
||||
pub raw_matches: &'a mut [BareMatch<'tag>],
|
||||
pub bare_matches: &'a mut [BareMatch<'tag>],
|
||||
pub processed_matches: Vec<SimpleMatch>,
|
||||
/// The list of minimum `distance` found
|
||||
pub processed_distances: Vec<Option<u8>>,
|
||||
@ -16,21 +16,21 @@ pub struct RawDocument<'a, 'tag> {
|
||||
|
||||
impl<'a, 'tag> RawDocument<'a, 'tag> {
|
||||
pub fn new<'txn>(
|
||||
raw_matches: &'a mut [BareMatch<'tag>],
|
||||
bare_matches: &'a mut [BareMatch<'tag>],
|
||||
automatons: &[QueryWordAutomaton],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) -> Option<RawDocument<'a, 'tag>>
|
||||
{
|
||||
raw_matches.sort_unstable_by_key(|m| m.query_index);
|
||||
bare_matches.sort_unstable_by_key(|m| m.query_index);
|
||||
|
||||
let mut previous_word = None;
|
||||
for i in 0..raw_matches.len() {
|
||||
let a = &raw_matches[i];
|
||||
for i in 0..bare_matches.len() {
|
||||
let a = &bare_matches[i];
|
||||
let auta = &automatons[a.query_index as usize];
|
||||
|
||||
match auta.phrase_query {
|
||||
Some((0, _)) => {
|
||||
let b = match raw_matches.get(i + 1) {
|
||||
let b = match bare_matches.get(i + 1) {
|
||||
Some(b) => b,
|
||||
None => {
|
||||
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
|
||||
@ -77,13 +77,13 @@ impl<'a, 'tag> RawDocument<'a, 'tag> {
|
||||
}
|
||||
}
|
||||
|
||||
if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
|
||||
if bare_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
|
||||
return None
|
||||
}
|
||||
|
||||
Some(RawDocument {
|
||||
id: raw_matches[0].document_id,
|
||||
raw_matches,
|
||||
id: bare_matches[0].document_id,
|
||||
bare_matches,
|
||||
processed_matches: Vec::new(),
|
||||
processed_distances: Vec::new(),
|
||||
contains_one_word_field: false,
|
||||
|
Loading…
x
Reference in New Issue
Block a user