mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Rename raw_matches into bare_matches
This commit is contained in:
parent
48e8778881
commit
a4dd033ccf
@ -82,9 +82,9 @@ where
|
|||||||
let before_raw_documents_building = Instant::now();
|
let before_raw_documents_building = Instant::now();
|
||||||
let mut prefiltered_documents = 0;
|
let mut prefiltered_documents = 0;
|
||||||
let mut raw_documents = Vec::new();
|
let mut raw_documents = Vec::new();
|
||||||
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||||
prefiltered_documents += 1;
|
prefiltered_documents += 1;
|
||||||
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) {
|
if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
|
||||||
raw_documents.push(raw_document);
|
raw_documents.push(raw_document);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -180,9 +180,9 @@ where
|
|||||||
let before_raw_documents_building = Instant::now();
|
let before_raw_documents_building = Instant::now();
|
||||||
let mut prefiltered_documents = 0;
|
let mut prefiltered_documents = 0;
|
||||||
let mut raw_documents = Vec::new();
|
let mut raw_documents = Vec::new();
|
||||||
for raw_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||||
prefiltered_documents += 1;
|
prefiltered_documents += 1;
|
||||||
if let Some(raw_document) = RawDocument::new(raw_matches, &automatons, &mut arena) {
|
if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) {
|
||||||
raw_documents.push(raw_document);
|
raw_documents.push(raw_document);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use crate::{RawDocument, MResult};
|
use crate::{RawDocument, MResult};
|
||||||
use crate::bucket_sort::SimpleMatch;
|
use crate::bucket_sort::SimpleMatch;
|
||||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||||
|
|
||||||
pub struct Attribute;
|
pub struct Attribute;
|
||||||
|
|
||||||
@ -15,7 +15,7 @@ impl Criterion for Attribute {
|
|||||||
documents: &mut [RawDocument<'r, 'tag>],
|
documents: &mut [RawDocument<'r, 'tag>],
|
||||||
) -> MResult<()>
|
) -> MResult<()>
|
||||||
{
|
{
|
||||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,11 +21,11 @@ impl Criterion for Exact {
|
|||||||
let reader = ctx.reader;
|
let reader = ctx.reader;
|
||||||
|
|
||||||
'documents: for doc in documents {
|
'documents: for doc in documents {
|
||||||
doc.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||||
|
|
||||||
// mark the document if we find a "one word field" that matches
|
// mark the document if we find a "one word field" that matches
|
||||||
let mut fields_counts = HashMap::new();
|
let mut fields_counts = HashMap::new();
|
||||||
for group in doc.raw_matches.linear_group_by_key(|bm| bm.query_index) {
|
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
|
||||||
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
||||||
if !group[0].is_exact { break }
|
if !group[0].is_exact { break }
|
||||||
|
|
||||||
@ -70,8 +70,8 @@ impl Criterion for Exact {
|
|||||||
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
||||||
// if not, with document contains the more exact words
|
// if not, with document contains the more exact words
|
||||||
.then_with(|| {
|
.then_with(|| {
|
||||||
let lhs = sum_exact_query_words(&lhs.raw_matches);
|
let lhs = sum_exact_query_words(&lhs.bare_matches);
|
||||||
let rhs = sum_exact_query_words(&rhs.raw_matches);
|
let rhs = sum_exact_query_words(&rhs.bare_matches);
|
||||||
lhs.cmp(&rhs).reverse()
|
lhs.cmp(&rhs).reverse()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
|
|||||||
if !document.processed_distances.is_empty() { continue }
|
if !document.processed_distances.is_empty() { continue }
|
||||||
|
|
||||||
let mut processed = Vec::new();
|
let mut processed = Vec::new();
|
||||||
for m in document.raw_matches.iter() {
|
for m in document.bare_matches.iter() {
|
||||||
if postings_lists[m.postings_list].is_empty() { continue }
|
if postings_lists[m.postings_list].is_empty() { continue }
|
||||||
|
|
||||||
let range = query_enhancer.replacement(m.query_index as u32);
|
let range = query_enhancer.replacement(m.query_index as u32);
|
||||||
@ -166,7 +166,7 @@ fn prepare_query_distances<'a, 'tag, 'txn>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_raw_matches<'a, 'tag, 'txn>(
|
fn prepare_bare_matches<'a, 'tag, 'txn>(
|
||||||
documents: &mut [RawDocument<'a, 'tag>],
|
documents: &mut [RawDocument<'a, 'tag>],
|
||||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
query_enhancer: &QueryEnhancer,
|
query_enhancer: &QueryEnhancer,
|
||||||
@ -175,7 +175,7 @@ fn prepare_raw_matches<'a, 'tag, 'txn>(
|
|||||||
if !document.processed_matches.is_empty() { continue }
|
if !document.processed_matches.is_empty() { continue }
|
||||||
|
|
||||||
let mut processed = Vec::new();
|
let mut processed = Vec::new();
|
||||||
for m in document.raw_matches.iter() {
|
for m in document.bare_matches.iter() {
|
||||||
let postings_list = &postings_lists[m.postings_list];
|
let postings_list = &postings_lists[m.postings_list];
|
||||||
processed.reserve(postings_list.len());
|
processed.reserve(postings_list.len());
|
||||||
for di in postings_list.as_ref() {
|
for di in postings_list.as_ref() {
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
|
|||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use crate::bucket_sort::{SimpleMatch};
|
use crate::bucket_sort::{SimpleMatch};
|
||||||
use crate::{RawDocument, MResult};
|
use crate::{RawDocument, MResult};
|
||||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||||
|
|
||||||
const MAX_DISTANCE: u16 = 8;
|
const MAX_DISTANCE: u16 = 8;
|
||||||
|
|
||||||
@ -17,7 +17,7 @@ impl Criterion for Proximity {
|
|||||||
documents: &mut [RawDocument<'r, 'tag>],
|
documents: &mut [RawDocument<'r, 'tag>],
|
||||||
) -> MResult<()>
|
) -> MResult<()>
|
||||||
{
|
{
|
||||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use crate::bucket_sort::SimpleMatch;
|
use crate::bucket_sort::SimpleMatch;
|
||||||
use crate::{RawDocument, MResult};
|
use crate::{RawDocument, MResult};
|
||||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||||
|
|
||||||
pub struct WordsPosition;
|
pub struct WordsPosition;
|
||||||
|
|
||||||
@ -15,7 +15,7 @@ impl Criterion for WordsPosition {
|
|||||||
documents: &mut [RawDocument<'r, 'tag>],
|
documents: &mut [RawDocument<'r, 'tag>],
|
||||||
) -> MResult<()>
|
) -> MResult<()>
|
||||||
{
|
{
|
||||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ impl Document {
|
|||||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
) -> Document
|
) -> Document
|
||||||
{
|
{
|
||||||
let highlights = raw_document.raw_matches.iter().flat_map(|sm| {
|
let highlights = raw_document.bare_matches.iter().flat_map(|sm| {
|
||||||
let postings_list = &arena[sm.postings_list];
|
let postings_list = &arena[sm.postings_list];
|
||||||
let input = postings_list.input();
|
let input = postings_list.input();
|
||||||
let query = &automatons[sm.query_index as usize].query;
|
let query = &automatons[sm.query_index as usize].query;
|
||||||
|
@ -5,7 +5,7 @@ use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsLis
|
|||||||
|
|
||||||
pub struct RawDocument<'a, 'tag> {
|
pub struct RawDocument<'a, 'tag> {
|
||||||
pub id: crate::DocumentId,
|
pub id: crate::DocumentId,
|
||||||
pub raw_matches: &'a mut [BareMatch<'tag>],
|
pub bare_matches: &'a mut [BareMatch<'tag>],
|
||||||
pub processed_matches: Vec<SimpleMatch>,
|
pub processed_matches: Vec<SimpleMatch>,
|
||||||
/// The list of minimum `distance` found
|
/// The list of minimum `distance` found
|
||||||
pub processed_distances: Vec<Option<u8>>,
|
pub processed_distances: Vec<Option<u8>>,
|
||||||
@ -16,21 +16,21 @@ pub struct RawDocument<'a, 'tag> {
|
|||||||
|
|
||||||
impl<'a, 'tag> RawDocument<'a, 'tag> {
|
impl<'a, 'tag> RawDocument<'a, 'tag> {
|
||||||
pub fn new<'txn>(
|
pub fn new<'txn>(
|
||||||
raw_matches: &'a mut [BareMatch<'tag>],
|
bare_matches: &'a mut [BareMatch<'tag>],
|
||||||
automatons: &[QueryWordAutomaton],
|
automatons: &[QueryWordAutomaton],
|
||||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
) -> Option<RawDocument<'a, 'tag>>
|
) -> Option<RawDocument<'a, 'tag>>
|
||||||
{
|
{
|
||||||
raw_matches.sort_unstable_by_key(|m| m.query_index);
|
bare_matches.sort_unstable_by_key(|m| m.query_index);
|
||||||
|
|
||||||
let mut previous_word = None;
|
let mut previous_word = None;
|
||||||
for i in 0..raw_matches.len() {
|
for i in 0..bare_matches.len() {
|
||||||
let a = &raw_matches[i];
|
let a = &bare_matches[i];
|
||||||
let auta = &automatons[a.query_index as usize];
|
let auta = &automatons[a.query_index as usize];
|
||||||
|
|
||||||
match auta.phrase_query {
|
match auta.phrase_query {
|
||||||
Some((0, _)) => {
|
Some((0, _)) => {
|
||||||
let b = match raw_matches.get(i + 1) {
|
let b = match bare_matches.get(i + 1) {
|
||||||
Some(b) => b,
|
Some(b) => b,
|
||||||
None => {
|
None => {
|
||||||
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
|
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
|
||||||
@ -77,13 +77,13 @@ impl<'a, 'tag> RawDocument<'a, 'tag> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
|
if bare_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
|
||||||
return None
|
return None
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(RawDocument {
|
Some(RawDocument {
|
||||||
id: raw_matches[0].document_id,
|
id: bare_matches[0].document_id,
|
||||||
raw_matches,
|
bare_matches,
|
||||||
processed_matches: Vec::new(),
|
processed_matches: Vec::new(),
|
||||||
processed_distances: Vec::new(),
|
processed_distances: Vec::new(),
|
||||||
contains_one_word_field: false,
|
contains_one_word_field: false,
|
||||||
|
Loading…
Reference in New Issue
Block a user