mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Split position DB into fid and relative position DB
This commit is contained in:
parent
56b7209f26
commit
9b2653427d
11 changed files with 162 additions and 135 deletions
|
@ -7,14 +7,17 @@ use super::helpers::{
|
|||
};
|
||||
use crate::error::SerializationError;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
use crate::{DocumentId, Result};
|
||||
use crate::{
|
||||
absolute_from_relative_position, bucketed_position, relative_from_absolute_position,
|
||||
DocumentId, Result,
|
||||
};
|
||||
|
||||
/// Extracts the word positions and the documents ids where this word appear.
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted words at positions and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
pub fn extract_word_fid_and_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
|
@ -39,11 +42,15 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||
for position in read_u32_ne_bytes(value) {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(word_bytes);
|
||||
let (fid, position) = relative_from_absolute_position(position);
|
||||
let position = bucketed_position(position);
|
||||
let position = absolute_from_relative_position(fid, position);
|
||||
key_buffer.extend_from_slice(&position.to_be_bytes());
|
||||
|
||||
word_position_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
|
||||
}
|
||||
}
|
||||
|
||||
sorter_into_reader(word_position_docids_sorter, indexer)
|
||||
let word_position_docids_reader = sorter_into_reader(word_position_docids_sorter, indexer)?;
|
||||
|
||||
Ok(word_position_docids_reader)
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
|
|||
use self::extract_geo_points::extract_geo_points;
|
||||
use self::extract_word_docids::extract_word_docids;
|
||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||
use self::extract_word_position_docids::extract_word_position_docids;
|
||||
use self::extract_word_position_docids::extract_word_fid_and_position_docids;
|
||||
use super::helpers::{
|
||||
as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
|
||||
GrenadParameters, MergeFn, MergeableReader,
|
||||
|
@ -133,7 +133,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||
docid_word_positions_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_word_position_docids,
|
||||
extract_word_fid_and_position_docids,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
TypedChunk::WordPositionDocids,
|
||||
"word-position-docids",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue