mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Make the search and the indexing work
This commit is contained in:
parent
d8d12d5979
commit
9c485f8563
7 changed files with 197 additions and 4 deletions
|
@ -28,6 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<&Vec<&str>>,
|
||||
dictionary: Option<&Vec<&str>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
@ -52,6 +54,14 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||
if let Some(stop_words) = stop_words {
|
||||
tokenizer_builder.stop_words(stop_words);
|
||||
}
|
||||
if let Some(dictionary) = dictionary {
|
||||
// let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect();
|
||||
tokenizer_builder.words_dict(dictionary.as_slice());
|
||||
}
|
||||
if let Some(separators) = allowed_separators {
|
||||
// let separators: Vec<_> = separators.iter().map(String::as_str).collect();
|
||||
tokenizer_builder.separators(separators.as_slice());
|
||||
}
|
||||
let tokenizer = tokenizer_builder.build();
|
||||
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
|
|
@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents(
|
|||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<Vec<&str>>,
|
||||
dictionary: Option<Vec<&str>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
) -> Result<()> {
|
||||
|
@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents(
|
|||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
&stop_words,
|
||||
&allowed_separators,
|
||||
&dictionary,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
})
|
||||
|
@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data(
|
|||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: &Option<Vec<&str>>,
|
||||
dictionary: &Option<Vec<&str>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
|
@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data(
|
|||
indexer,
|
||||
searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_ref(),
|
||||
dictionary.as_ref(),
|
||||
max_positions_per_attributes,
|
||||
)?;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue