mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-05 02:55:46 +01:00
clean PR warnings
This commit is contained in:
parent
20394fda04
commit
26ef0b3a07
@ -11,9 +11,7 @@ use super::interner::Interned;
|
|||||||
use super::Word;
|
use super::Word;
|
||||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||||
use crate::{
|
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext};
|
||||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A cache storing pointers to values in the LMDB databases.
|
/// A cache storing pointers to values in the LMDB databases.
|
||||||
///
|
///
|
||||||
|
@ -16,9 +16,7 @@ use crate::facet::FacetType;
|
|||||||
use crate::heed_codec::facet::FieldDocIdFacetCodec;
|
use crate::heed_codec::facet::FieldDocIdFacetCodec;
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::Hnsw;
|
use crate::index::Hnsw;
|
||||||
use crate::{
|
use crate::{ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, BEU32};
|
||||||
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
|
@ -11,9 +11,7 @@ use serde_json::Value;
|
|||||||
use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
|
use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
|
||||||
use crate::error::{InternalError, SerializationError};
|
use crate::error::{InternalError, SerializationError};
|
||||||
use crate::update::index_documents::MergeFn;
|
use crate::update::index_documents::MergeFn;
|
||||||
use crate::{
|
use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
|
||||||
absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;
|
pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;
|
||||||
|
|
||||||
|
@ -1,15 +1,13 @@
|
|||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::iter::FromIterator;
|
|
||||||
|
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use obkv::KvReaderU16;
|
use obkv::KvReaderU16;
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, create_writer, merge_cbo_roaring_bitmaps, serialize_roaring_bitmap,
|
create_sorter, create_writer, merge_cbo_roaring_bitmaps, sorter_into_reader,
|
||||||
sorter_into_reader, try_split_array_at, writer_into_reader, GrenadParameters,
|
try_split_array_at, writer_into_reader, GrenadParameters,
|
||||||
};
|
};
|
||||||
use crate::error::SerializationError;
|
use crate::error::SerializationError;
|
||||||
use crate::heed_codec::StrBEU16Codec;
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
@ -43,7 +41,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
max_memory.map(|x| x / 3),
|
max_memory.map(|x| x / 3),
|
||||||
);
|
);
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
let mut value_buffer = Vec::new();
|
|
||||||
let mut words = BTreeSet::new();
|
let mut words = BTreeSet::new();
|
||||||
let mut cursor = docid_word_positions.into_cursor()?;
|
let mut cursor = docid_word_positions.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
@ -62,7 +59,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
document_id,
|
document_id,
|
||||||
fid,
|
fid,
|
||||||
&mut key_buffer,
|
&mut key_buffer,
|
||||||
&mut value_buffer,
|
|
||||||
&mut words,
|
&mut words,
|
||||||
&mut word_fid_docids_sorter,
|
&mut word_fid_docids_sorter,
|
||||||
)?;
|
)?;
|
||||||
@ -120,7 +116,6 @@ fn words_into_sorter(
|
|||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
fid: FieldId,
|
fid: FieldId,
|
||||||
key_buffer: &mut Vec<u8>,
|
key_buffer: &mut Vec<u8>,
|
||||||
value_buffer: &mut Vec<u8>,
|
|
||||||
words: &mut BTreeSet<Vec<u8>>,
|
words: &mut BTreeSet<Vec<u8>>,
|
||||||
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
@ -1,53 +0,0 @@
|
|||||||
use std::fs::File;
|
|
||||||
use std::io;
|
|
||||||
|
|
||||||
use super::helpers::{
|
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
|
||||||
try_split_array_at, GrenadParameters,
|
|
||||||
};
|
|
||||||
use crate::error::SerializationError;
|
|
||||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
|
||||||
use crate::{relative_from_absolute_position, DocumentId, Result};
|
|
||||||
|
|
||||||
/// Extracts the word, field id, and the documents ids where this word appear at this field id.
|
|
||||||
#[logging_timer::time]
|
|
||||||
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
|
||||||
docid_word_positions: grenad::Reader<R>,
|
|
||||||
indexer: GrenadParameters,
|
|
||||||
) -> Result<grenad::Reader<File>> {
|
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
todo!("remove me");
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
|
||||||
|
|
||||||
let mut word_fid_docids_sorter = create_sorter(
|
|
||||||
grenad::SortAlgorithm::Unstable,
|
|
||||||
merge_cbo_roaring_bitmaps,
|
|
||||||
indexer.chunk_compression_type,
|
|
||||||
indexer.chunk_compression_level,
|
|
||||||
indexer.max_nb_chunks,
|
|
||||||
max_memory,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut key_buffer = Vec::new();
|
|
||||||
let mut cursor = docid_word_positions.into_cursor()?;
|
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
|
||||||
let (document_id_bytes, word_bytes) = try_split_array_at(key)
|
|
||||||
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
|
||||||
let document_id = DocumentId::from_be_bytes(document_id_bytes);
|
|
||||||
|
|
||||||
for position in read_u32_ne_bytes(value) {
|
|
||||||
key_buffer.clear();
|
|
||||||
key_buffer.extend_from_slice(word_bytes);
|
|
||||||
key_buffer.push(0);
|
|
||||||
let (fid, _) = relative_from_absolute_position(position);
|
|
||||||
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
|
||||||
word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let word_fid_docids_reader = sorter_into_reader(word_fid_docids_sorter, indexer)?;
|
|
||||||
|
|
||||||
Ok(word_fid_docids_reader)
|
|
||||||
}
|
|
@ -6,7 +6,6 @@ mod extract_fid_word_count_docids;
|
|||||||
mod extract_geo_points;
|
mod extract_geo_points;
|
||||||
mod extract_vector_points;
|
mod extract_vector_points;
|
||||||
mod extract_word_docids;
|
mod extract_word_docids;
|
||||||
mod extract_word_fid_docids;
|
|
||||||
mod extract_word_pair_proximity_docids;
|
mod extract_word_pair_proximity_docids;
|
||||||
mod extract_word_position_docids;
|
mod extract_word_position_docids;
|
||||||
|
|
||||||
@ -25,12 +24,11 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
|
|||||||
use self::extract_geo_points::extract_geo_points;
|
use self::extract_geo_points::extract_geo_points;
|
||||||
use self::extract_vector_points::extract_vector_points;
|
use self::extract_vector_points::extract_vector_points;
|
||||||
use self::extract_word_docids::extract_word_docids;
|
use self::extract_word_docids::extract_word_docids;
|
||||||
use self::extract_word_fid_docids::extract_word_fid_docids;
|
|
||||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||||
use self::extract_word_position_docids::extract_word_position_docids;
|
use self::extract_word_position_docids::extract_word_position_docids;
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
|
as_cloneable_grenad, merge_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
|
||||||
GrenadParameters, MergeFn, MergeableReader,
|
MergeableReader,
|
||||||
};
|
};
|
||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
@ -201,15 +199,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
TypedChunk::WordPositionDocids,
|
TypedChunk::WordPositionDocids,
|
||||||
"word-position-docids",
|
"word-position-docids",
|
||||||
);
|
);
|
||||||
// spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
|
||||||
// docid_word_positions_chunks,
|
|
||||||
// indexer,
|
|
||||||
// lmdb_writer_sx.clone(),
|
|
||||||
// extract_word_fid_docids,
|
|
||||||
// merge_cbo_roaring_bitmaps,
|
|
||||||
// TypedChunk::WordFidDocids,
|
|
||||||
// "word-fid-docids",
|
|
||||||
// );
|
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_fid_facet_strings_chunks,
|
docid_fid_facet_strings_chunks,
|
||||||
|
@ -11,6 +11,7 @@ use crate::Result;
|
|||||||
|
|
||||||
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
|
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||||
if values.len() == 1 {
|
if values.len() == 1 {
|
||||||
Ok(values[0].clone())
|
Ok(values[0].clone())
|
||||||
|
@ -44,6 +44,7 @@ where
|
|||||||
Some((head, tail))
|
Some((head, tail))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ {
|
pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ {
|
||||||
bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes)
|
bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes)
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,7 @@ use crate::update::{
|
|||||||
self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
|
self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
|
||||||
WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec};
|
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||||
|
|
||||||
static MERGED_DATABASE_COUNT: usize = 7;
|
static MERGED_DATABASE_COUNT: usize = 7;
|
||||||
static PREFIX_DATABASE_COUNT: usize = 5;
|
static PREFIX_DATABASE_COUNT: usize = 5;
|
||||||
@ -434,11 +434,6 @@ where
|
|||||||
word_position_docids = Some(cloneable_chunk);
|
word_position_docids = Some(cloneable_chunk);
|
||||||
TypedChunk::WordPositionDocids(chunk)
|
TypedChunk::WordPositionDocids(chunk)
|
||||||
}
|
}
|
||||||
TypedChunk::WordFidDocids(chunk) => {
|
|
||||||
let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
|
|
||||||
word_fid_docids = Some(cloneable_chunk);
|
|
||||||
TypedChunk::WordFidDocids(chunk)
|
|
||||||
}
|
|
||||||
otherwise => otherwise,
|
otherwise => otherwise,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -35,7 +35,6 @@ pub(crate) enum TypedChunk {
|
|||||||
word_fid_docids_reader: grenad::Reader<File>,
|
word_fid_docids_reader: grenad::Reader<File>,
|
||||||
},
|
},
|
||||||
WordPositionDocids(grenad::Reader<File>),
|
WordPositionDocids(grenad::Reader<File>),
|
||||||
WordFidDocids(grenad::Reader<File>),
|
|
||||||
WordPairProximityDocids(grenad::Reader<File>),
|
WordPairProximityDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||||
@ -78,9 +77,6 @@ impl TypedChunk {
|
|||||||
TypedChunk::WordPositionDocids(grenad) => {
|
TypedChunk::WordPositionDocids(grenad) => {
|
||||||
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
}
|
}
|
||||||
TypedChunk::WordFidDocids(grenad) => {
|
|
||||||
format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::WordPairProximityDocids(grenad) => {
|
TypedChunk::WordPairProximityDocids(grenad) => {
|
||||||
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
}
|
}
|
||||||
@ -202,17 +198,6 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
)?;
|
)?;
|
||||||
is_merged_database = true;
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
TypedChunk::WordFidDocids(word_fid_docids_iter) => {
|
|
||||||
append_entries_into_database(
|
|
||||||
word_fid_docids_iter,
|
|
||||||
&index.word_fid_docids,
|
|
||||||
wtxn,
|
|
||||||
index_is_empty,
|
|
||||||
|value, _buffer| Ok(value),
|
|
||||||
merge_cbo_roaring_bitmaps,
|
|
||||||
)?;
|
|
||||||
is_merged_database = true;
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
|
TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
|
||||||
let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
|
let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
|
||||||
indexer.execute(wtxn)?;
|
indexer.execute(wtxn)?;
|
||||||
|
@ -8,7 +8,7 @@ use crate::update::index_documents::{
|
|||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
|
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
|
||||||
CursorClonableMmap, MergeFn,
|
CursorClonableMmap, MergeFn,
|
||||||
};
|
};
|
||||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
use crate::{CboRoaringBitmapCodec, Result};
|
||||||
|
|
||||||
pub struct WordPrefixDocids<'t, 'u, 'i> {
|
pub struct WordPrefixDocids<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
|
Loading…
Reference in New Issue
Block a user