mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Fix several warnings in extractors and remove unreachable macros
This commit is contained in:
parent
8fd0afaaaa
commit
f69688e8f7
@ -87,11 +87,11 @@ pub trait FacetedExtractor {
|
|||||||
where
|
where
|
||||||
MF: MergeFunction,
|
MF: MergeFunction,
|
||||||
MF::Error: Debug,
|
MF::Error: Debug,
|
||||||
|
grenad::Error<MF::Error>: Into<crate::Error>,
|
||||||
{
|
{
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
match Self::build_key(fid, value, buffer) {
|
match Self::build_key(fid, value, buffer) {
|
||||||
// TODO manage errors
|
Some(key) => cache_fn(cached_sorter, &key, docid).map_err(Into::into),
|
||||||
Some(key) => Ok(cache_fn(cached_sorter, &key, docid).unwrap()),
|
|
||||||
None => Ok(()),
|
None => Ok(()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
@ -25,12 +24,6 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This case is unreachable because extract_document_change has been reimplemented to not call this function.
|
|
||||||
fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> {
|
|
||||||
/// TODO remove this
|
|
||||||
unreachable!()
|
|
||||||
}
|
|
||||||
|
|
||||||
// This method is reimplemented to count the number of words in the document in each field
|
// This method is reimplemented to count the number of words in the document in each field
|
||||||
// and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS.
|
// and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS.
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
@ -59,8 +52,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
for (fid, count) in fid_word_count.iter() {
|
for (fid, count) in fid_word_count.iter() {
|
||||||
if *count <= MAX_COUNTED_WORDS {
|
if *count <= MAX_COUNTED_WORDS {
|
||||||
let key = build_key(*fid, *count as u8, &mut key_buffer);
|
let key = build_key(*fid, *count as u8, &mut key_buffer);
|
||||||
/// TODO manage the error
|
cached_sorter.insert_del_u32(key, inner.docid())?;
|
||||||
cached_sorter.insert_del_u32(key, inner.docid()).unwrap();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -93,13 +85,11 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
if *current_count != *new_count {
|
if *current_count != *new_count {
|
||||||
if *current_count <= MAX_COUNTED_WORDS {
|
if *current_count <= MAX_COUNTED_WORDS {
|
||||||
let key = build_key(*fid, *current_count as u8, &mut key_buffer);
|
let key = build_key(*fid, *current_count as u8, &mut key_buffer);
|
||||||
/// TODO manage the error
|
cached_sorter.insert_del_u32(key, inner.docid())?;
|
||||||
cached_sorter.insert_del_u32(key, inner.docid()).unwrap();
|
|
||||||
}
|
}
|
||||||
if *new_count <= MAX_COUNTED_WORDS {
|
if *new_count <= MAX_COUNTED_WORDS {
|
||||||
let key = build_key(*fid, *new_count as u8, &mut key_buffer);
|
let key = build_key(*fid, *new_count as u8, &mut key_buffer);
|
||||||
/// TODO manage the error
|
cached_sorter.insert_add_u32(key, inner.docid())?;
|
||||||
cached_sorter.insert_add_u32(key, inner.docid()).unwrap();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -116,8 +106,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
for (fid, count) in fid_word_count.iter() {
|
for (fid, count) in fid_word_count.iter() {
|
||||||
if *count <= MAX_COUNTED_WORDS {
|
if *count <= MAX_COUNTED_WORDS {
|
||||||
let key = build_key(*fid, *count as u8, &mut key_buffer);
|
let key = build_key(*fid, *count as u8, &mut key_buffer);
|
||||||
/// TODO manage the error
|
cached_sorter.insert_add_u32(key, inner.docid())?;
|
||||||
cached_sorter.insert_add_u32(key, inner.docid()).unwrap();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,11 +2,93 @@ use std::borrow::Cow;
|
|||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
|
||||||
use super::SearchableExtractor;
|
use super::{tokenize_document::DocumentTokenizer, SearchableExtractor};
|
||||||
use crate::{bucketed_position, FieldId, Index, Result};
|
use crate::{
|
||||||
|
bucketed_position,
|
||||||
|
update::{
|
||||||
|
new::{extract::cache::CboCachedSorter, DocumentChange},
|
||||||
|
MergeDeladdCboRoaringBitmaps,
|
||||||
|
},
|
||||||
|
FieldId, GlobalFieldsIdsMap, Index, Result,
|
||||||
|
};
|
||||||
|
|
||||||
|
trait ProtoWordDocidsExtractor {
|
||||||
|
fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>;
|
||||||
|
fn attributes_to_extract<'a>(
|
||||||
|
_rtxn: &'a RoTxn,
|
||||||
|
_index: &'a Index,
|
||||||
|
) -> Result<Option<Vec<&'a str>>>;
|
||||||
|
|
||||||
|
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> SearchableExtractor for T
|
||||||
|
where
|
||||||
|
T: ProtoWordDocidsExtractor,
|
||||||
|
{
|
||||||
|
fn extract_document_change(
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
index: &Index,
|
||||||
|
document_tokenizer: &DocumentTokenizer,
|
||||||
|
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||||
|
cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||||
|
document_change: DocumentChange,
|
||||||
|
) -> Result<()> {
|
||||||
|
match document_change {
|
||||||
|
DocumentChange::Deletion(inner) => {
|
||||||
|
let mut token_fn = |fid, pos: u16, word: &str| {
|
||||||
|
let key = Self::build_key(fid, pos, word);
|
||||||
|
cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from)
|
||||||
|
};
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
inner.current(rtxn, index)?.unwrap(),
|
||||||
|
fields_ids_map,
|
||||||
|
&mut token_fn,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
DocumentChange::Update(inner) => {
|
||||||
|
let mut token_fn = |fid, pos, word: &str| {
|
||||||
|
let key = Self::build_key(fid, pos, word);
|
||||||
|
cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from)
|
||||||
|
};
|
||||||
|
document_tokenizer.tokenize_document(
|
||||||
|
inner.current(rtxn, index)?.unwrap(),
|
||||||
|
fields_ids_map,
|
||||||
|
&mut token_fn,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let mut token_fn = |fid, pos, word: &str| {
|
||||||
|
let key = Self::build_key(fid, pos, word);
|
||||||
|
cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from)
|
||||||
|
};
|
||||||
|
document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
|
||||||
|
}
|
||||||
|
DocumentChange::Insertion(inner) => {
|
||||||
|
let mut token_fn = |fid, pos, word: &str| {
|
||||||
|
let key = Self::build_key(fid, pos, word);
|
||||||
|
cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from)
|
||||||
|
};
|
||||||
|
document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn attributes_to_extract<'a>(
|
||||||
|
rtxn: &'a RoTxn,
|
||||||
|
index: &'a Index,
|
||||||
|
) -> Result<Option<Vec<&'a str>>> {
|
||||||
|
Self::attributes_to_extract(rtxn, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
|
||||||
|
Self::attributes_to_skip(rtxn, index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct WordDocidsExtractor;
|
pub struct WordDocidsExtractor;
|
||||||
impl SearchableExtractor for WordDocidsExtractor {
|
impl ProtoWordDocidsExtractor for WordDocidsExtractor {
|
||||||
fn attributes_to_extract<'a>(
|
fn attributes_to_extract<'a>(
|
||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
@ -26,7 +108,7 @@ impl SearchableExtractor for WordDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct ExactWordDocidsExtractor;
|
pub struct ExactWordDocidsExtractor;
|
||||||
impl SearchableExtractor for ExactWordDocidsExtractor {
|
impl ProtoWordDocidsExtractor for ExactWordDocidsExtractor {
|
||||||
fn attributes_to_extract<'a>(
|
fn attributes_to_extract<'a>(
|
||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
@ -55,7 +137,7 @@ impl SearchableExtractor for ExactWordDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct WordFidDocidsExtractor;
|
pub struct WordFidDocidsExtractor;
|
||||||
impl SearchableExtractor for WordFidDocidsExtractor {
|
impl ProtoWordDocidsExtractor for WordFidDocidsExtractor {
|
||||||
fn attributes_to_extract<'a>(
|
fn attributes_to_extract<'a>(
|
||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
@ -77,7 +159,7 @@ impl SearchableExtractor for WordFidDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct WordPositionDocidsExtractor;
|
pub struct WordPositionDocidsExtractor;
|
||||||
impl SearchableExtractor for WordPositionDocidsExtractor {
|
impl ProtoWordDocidsExtractor for WordPositionDocidsExtractor {
|
||||||
fn attributes_to_extract<'a>(
|
fn attributes_to_extract<'a>(
|
||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::{BTreeMap, VecDeque};
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
@ -26,12 +25,6 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This case is unreachable because extract_document_change has been reimplemented to not call this function.
|
|
||||||
fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> {
|
|
||||||
/// TODO remove this
|
|
||||||
unreachable!()
|
|
||||||
}
|
|
||||||
|
|
||||||
// This method is reimplemented to count the number of words in the document in each field
|
// This method is reimplemented to count the number of words in the document in each field
|
||||||
// and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS.
|
// and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS.
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
@ -100,18 +93,18 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
match eob {
|
match eob {
|
||||||
Left(((w1, w2), prox)) => {
|
Left(((w1, w2), prox)) => {
|
||||||
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
cached_sorter.insert_del_u32(key, docid).unwrap();
|
cached_sorter.insert_del_u32(key, docid)?;
|
||||||
}
|
}
|
||||||
Right(((w1, w2), prox)) => {
|
Right(((w1, w2), prox)) => {
|
||||||
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
cached_sorter.insert_add_u32(key, docid).unwrap();
|
cached_sorter.insert_add_u32(key, docid)?;
|
||||||
}
|
}
|
||||||
Both(((w1, w2), del_prox), (_, add_prox)) => {
|
Both(((w1, w2), del_prox), (_, add_prox)) => {
|
||||||
if del_prox != add_prox {
|
if del_prox != add_prox {
|
||||||
let key = build_key(*del_prox, w1, w2, &mut key_buffer);
|
let key = build_key(*del_prox, w1, w2, &mut key_buffer);
|
||||||
cached_sorter.insert_del_u32(key, docid).unwrap();
|
cached_sorter.insert_del_u32(key, docid)?;
|
||||||
let key = build_key(*add_prox, w1, w2, &mut key_buffer);
|
let key = build_key(*add_prox, w1, w2, &mut key_buffer);
|
||||||
cached_sorter.insert_add_u32(key, docid).unwrap();
|
cached_sorter.insert_add_u32(key, docid)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -3,7 +3,6 @@ mod extract_word_docids;
|
|||||||
mod extract_word_pair_proximity_docids;
|
mod extract_word_pair_proximity_docids;
|
||||||
mod tokenize_document;
|
mod tokenize_document;
|
||||||
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
pub use extract_fid_word_count_docids::FidWordCountDocidsExtractor;
|
pub use extract_fid_word_count_docids::FidWordCountDocidsExtractor;
|
||||||
@ -20,7 +19,7 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
|||||||
use super::cache::CboCachedSorter;
|
use super::cache::CboCachedSorter;
|
||||||
use crate::update::new::{DocumentChange, ItemsPool};
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::{FieldId, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::{GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||||
|
|
||||||
pub trait SearchableExtractor {
|
pub trait SearchableExtractor {
|
||||||
fn run_extraction(
|
fn run_extraction(
|
||||||
@ -109,60 +108,10 @@ pub trait SearchableExtractor {
|
|||||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||||
cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
) -> Result<()> {
|
) -> Result<()>;
|
||||||
match document_change {
|
|
||||||
DocumentChange::Deletion(inner) => {
|
|
||||||
let mut token_fn = |fid, pos: u16, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
/// TODO manage the error
|
|
||||||
cached_sorter.insert_del_u32(&key, inner.docid()).unwrap();
|
|
||||||
Ok(())
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(
|
|
||||||
inner.current(rtxn, index)?.unwrap(),
|
|
||||||
fields_ids_map,
|
|
||||||
&mut token_fn,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
DocumentChange::Update(inner) => {
|
|
||||||
let mut token_fn = |fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
/// TODO manage the error
|
|
||||||
cached_sorter.insert_del_u32(&key, inner.docid()).unwrap();
|
|
||||||
Ok(())
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(
|
|
||||||
inner.current(rtxn, index)?.unwrap(),
|
|
||||||
fields_ids_map,
|
|
||||||
&mut token_fn,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut token_fn = |fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
/// TODO manage the error
|
|
||||||
cached_sorter.insert_add_u32(&key, inner.docid()).unwrap();
|
|
||||||
Ok(())
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
|
|
||||||
}
|
|
||||||
DocumentChange::Insertion(inner) => {
|
|
||||||
let mut token_fn = |fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
/// TODO manage the error
|
|
||||||
cached_sorter.insert_add_u32(&key, inner.docid()).unwrap();
|
|
||||||
Ok(())
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
|
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
|
||||||
-> Result<Option<Vec<&'a str>>>;
|
-> Result<Option<Vec<&'a str>>>;
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
|
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
|
||||||
|
|
||||||
fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>;
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user