mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-12 14:26:29 +01:00
Remove dead code
This commit is contained in:
parent
3d8024fb2b
commit
2b51a63418
@ -1,4 +1,3 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::num::NonZero;
|
use std::num::NonZero;
|
||||||
@ -8,7 +7,6 @@ use heed::RoTxn;
|
|||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
|
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
use super::SearchableExtractor;
|
|
||||||
use crate::update::new::extract::cache::CboCachedSorter;
|
use crate::update::new::extract::cache::CboCachedSorter;
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
use crate::update::new::{DocumentChange, ItemsPool};
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
@ -20,178 +18,6 @@ use crate::{
|
|||||||
|
|
||||||
const MAX_COUNTED_WORDS: usize = 30;
|
const MAX_COUNTED_WORDS: usize = 30;
|
||||||
|
|
||||||
trait ProtoWordDocidsExtractor {
|
|
||||||
fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>;
|
|
||||||
fn attributes_to_extract<'a>(
|
|
||||||
_rtxn: &'a RoTxn,
|
|
||||||
_index: &'a Index,
|
|
||||||
) -> Result<Option<Vec<&'a str>>>;
|
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> SearchableExtractor for T
|
|
||||||
where
|
|
||||||
T: ProtoWordDocidsExtractor,
|
|
||||||
{
|
|
||||||
fn extract_document_change(
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
index: &Index,
|
|
||||||
document_tokenizer: &DocumentTokenizer,
|
|
||||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
|
||||||
cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
|
||||||
document_change: DocumentChange,
|
|
||||||
) -> Result<()> {
|
|
||||||
match document_change {
|
|
||||||
DocumentChange::Deletion(inner) => {
|
|
||||||
let mut token_fn = |_fname: &str, fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from)
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(
|
|
||||||
inner.current(rtxn, index)?.unwrap(),
|
|
||||||
fields_ids_map,
|
|
||||||
&mut token_fn,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
DocumentChange::Update(inner) => {
|
|
||||||
let mut token_fn = |_fname: &str, fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from)
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(
|
|
||||||
inner.current(rtxn, index)?.unwrap(),
|
|
||||||
fields_ids_map,
|
|
||||||
&mut token_fn,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut token_fn = |_fname: &str, fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from)
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
|
|
||||||
}
|
|
||||||
DocumentChange::Insertion(inner) => {
|
|
||||||
let mut token_fn = |_fname: &str, fid, pos, word: &str| {
|
|
||||||
let key = Self::build_key(fid, pos, word);
|
|
||||||
cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from)
|
|
||||||
};
|
|
||||||
document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_extract<'a>(
|
|
||||||
rtxn: &'a RoTxn,
|
|
||||||
index: &'a Index,
|
|
||||||
) -> Result<Option<Vec<&'a str>>> {
|
|
||||||
Self::attributes_to_extract(rtxn, index)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
|
|
||||||
Self::attributes_to_skip(rtxn, index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct WordDocidsExtractor;
|
|
||||||
impl ProtoWordDocidsExtractor for WordDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(
|
|
||||||
rtxn: &'a RoTxn,
|
|
||||||
index: &'a Index,
|
|
||||||
) -> Result<Option<Vec<&'a str>>> {
|
|
||||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
|
|
||||||
// exact attributes must be skipped and stored in a separate DB, see `ExactWordDocidsExtractor`.
|
|
||||||
index.exact_attributes(rtxn).map_err(Into::into)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// TODO write in an external Vec buffer
|
|
||||||
fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
|
|
||||||
Cow::Borrowed(word.as_bytes())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ExactWordDocidsExtractor;
|
|
||||||
impl ProtoWordDocidsExtractor for ExactWordDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(
|
|
||||||
rtxn: &'a RoTxn,
|
|
||||||
index: &'a Index,
|
|
||||||
) -> Result<Option<Vec<&'a str>>> {
|
|
||||||
let exact_attributes = index.exact_attributes(rtxn)?;
|
|
||||||
// If there are no user-defined searchable fields, we return all exact attributes.
|
|
||||||
// Otherwise, we return the intersection of exact attributes and user-defined searchable fields.
|
|
||||||
if let Some(searchable_attributes) = index.user_defined_searchable_fields(rtxn)? {
|
|
||||||
let attributes = exact_attributes
|
|
||||||
.into_iter()
|
|
||||||
.filter(|attr| searchable_attributes.contains(attr))
|
|
||||||
.collect();
|
|
||||||
Ok(Some(attributes))
|
|
||||||
} else {
|
|
||||||
Ok(Some(exact_attributes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
|
||||||
Ok(vec![])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
|
|
||||||
Cow::Borrowed(word.as_bytes())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct WordFidDocidsExtractor;
|
|
||||||
impl ProtoWordDocidsExtractor for WordFidDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(
|
|
||||||
rtxn: &'a RoTxn,
|
|
||||||
index: &'a Index,
|
|
||||||
) -> Result<Option<Vec<&'a str>>> {
|
|
||||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
|
||||||
Ok(vec![])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key(field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
|
|
||||||
let mut key = Vec::new();
|
|
||||||
key.extend_from_slice(word.as_bytes());
|
|
||||||
key.push(0);
|
|
||||||
key.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
Cow::Owned(key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct WordPositionDocidsExtractor;
|
|
||||||
impl ProtoWordDocidsExtractor for WordPositionDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(
|
|
||||||
rtxn: &'a RoTxn,
|
|
||||||
index: &'a Index,
|
|
||||||
) -> Result<Option<Vec<&'a str>>> {
|
|
||||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
|
||||||
Ok(vec![])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key(_field_id: FieldId, position: u16, word: &str) -> Cow<[u8]> {
|
|
||||||
// position must be bucketed to reduce the number of keys in the DB.
|
|
||||||
let position = bucketed_position(position);
|
|
||||||
let mut key = Vec::new();
|
|
||||||
key.extend_from_slice(word.as_bytes());
|
|
||||||
key.push(0);
|
|
||||||
key.extend_from_slice(&position.to_be_bytes());
|
|
||||||
Cow::Owned(key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// V2
|
|
||||||
|
|
||||||
struct WordDocidsCachedSorters {
|
struct WordDocidsCachedSorters {
|
||||||
word_fid_docids: CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
word_fid_docids: CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||||
word_docids: CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
word_docids: CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||||
@ -340,6 +166,7 @@ impl WordDocidsCachedSorters {
|
|||||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||||
self.word_fid_docids.insert_del_u32(buffer, docid)?;
|
self.word_fid_docids.insert_del_u32(buffer, docid)?;
|
||||||
|
|
||||||
|
let position = bucketed_position(position);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.extend_from_slice(word.as_bytes());
|
buffer.extend_from_slice(word.as_bytes());
|
||||||
buffer.push(0);
|
buffer.push(0);
|
||||||
|
@ -2,7 +2,6 @@ use std::collections::VecDeque;
|
|||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use itertools::merge_join_by;
|
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
|
|
||||||
use super::tokenize_document::DocumentTokenizer;
|
use super::tokenize_document::DocumentTokenizer;
|
||||||
|
@ -133,42 +133,6 @@ where
|
|||||||
extractor_sender.send_searchable::<FidWordCountDocids>(fid_word_count_docids).unwrap();
|
extractor_sender.send_searchable::<FidWordCountDocids>(fid_word_count_docids).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
// {
|
|
||||||
// let span = tracing::trace_span!(target: "indexing::documents::extract", "exact_word_docids");
|
|
||||||
// let _entered = span.enter();
|
|
||||||
// extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
|
|
||||||
// index,
|
|
||||||
// &global_fields_ids_map,
|
|
||||||
// grenad_parameters,
|
|
||||||
// document_changes.clone(),
|
|
||||||
// &extractor_sender,
|
|
||||||
// )?;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// {
|
|
||||||
// let span = tracing::trace_span!(target: "indexing::documents::extract", "word_position_docids");
|
|
||||||
// let _entered = span.enter();
|
|
||||||
// extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
|
|
||||||
// index,
|
|
||||||
// &global_fields_ids_map,
|
|
||||||
// grenad_parameters,
|
|
||||||
// document_changes.clone(),
|
|
||||||
// &extractor_sender,
|
|
||||||
// )?;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// {
|
|
||||||
// let span = tracing::trace_span!(target: "indexing::documents::extract", "fid_word_count_docids");
|
|
||||||
// let _entered = span.enter();
|
|
||||||
// extract_and_send_docids::<FidWordCountDocidsExtractor, FidWordCountDocids>(
|
|
||||||
// index,
|
|
||||||
// &global_fields_ids_map,
|
|
||||||
// GrenadParameters::default(),
|
|
||||||
// document_changes.clone(),
|
|
||||||
// &extractor_sender,
|
|
||||||
// )?;
|
|
||||||
// }
|
|
||||||
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
Loading…
Reference in New Issue
Block a user