mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Skip script,language insertion if language is undetected
This commit is contained in:
parent
2d58b28f43
commit
d8207356f4
@ -71,12 +71,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
||||||
|
|
||||||
for (index, token) in tokens {
|
for (index, token) in tokens {
|
||||||
let script = token.script;
|
if let Some(language) = token.language {
|
||||||
let language = token.language.unwrap_or_default();
|
let script = token.script;
|
||||||
let entry = script_language_pair
|
let entry = script_language_pair
|
||||||
.entry((script, language))
|
.entry((script, language))
|
||||||
.or_insert_with(RoaringBitmap::new);
|
.or_insert_with(RoaringBitmap::new);
|
||||||
entry.push(document_id);
|
entry.push(document_id);
|
||||||
|
}
|
||||||
let token = token.lemma().trim();
|
let token = token.lemma().trim();
|
||||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||||
key_buffer.truncate(mem::size_of::<u32>());
|
key_buffer.truncate(mem::size_of::<u32>());
|
||||||
|
Loading…
Reference in New Issue
Block a user