mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-01 17:15:46 +01:00
Fix typos
This commit is contained in:
parent
7e2fd82e41
commit
b4b859ec8c
@ -375,15 +375,15 @@ pub fn perform_search(
|
|||||||
&displayed_ids,
|
&displayed_ids,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut tokenizer_buidler = TokenizerBuilder::default();
|
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||||
tokenizer_buidler.create_char_map(true);
|
tokenizer_builder.create_char_map(true);
|
||||||
|
|
||||||
let script_lang_map = index.script_language(&rtxn)?;
|
let script_lang_map = index.script_language(&rtxn)?;
|
||||||
if !script_lang_map.is_empty() {
|
if !script_lang_map.is_empty() {
|
||||||
tokenizer_buidler.allow_list(&script_lang_map);
|
tokenizer_builder.allow_list(&script_lang_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_buidler.build());
|
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||||
formatter_builder.crop_marker(query.crop_marker);
|
formatter_builder.crop_marker(query.crop_marker);
|
||||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||||
|
@ -1224,7 +1224,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let threshold = total / 20; // 5% (arbitrar)
|
let threshold = total / 20; // 5% (arbitrary)
|
||||||
for (script, language, count) in script_language_doc_count {
|
for (script, language, count) in script_language_doc_count {
|
||||||
if count > threshold {
|
if count > threshold {
|
||||||
if let Some(languages) = script_language.get_mut(&script) {
|
if let Some(languages) = script_language.get_mut(&script) {
|
||||||
|
@ -89,7 +89,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
// if the allow list is empty, meaning that no Language is considered frequent,
|
// if the allow list is empty, meaning that no Language is considered frequent,
|
||||||
// then we don't rerun the extraction.
|
// then we don't rerun the extraction.
|
||||||
if !script_language.is_empty() {
|
if !script_language.is_empty() {
|
||||||
// build a new temporar tokenizer including the allow list.
|
// build a new temporary tokenizer including the allow list.
|
||||||
let mut tokenizer_builder = TokenizerBuilder::new();
|
let mut tokenizer_builder = TokenizerBuilder::new();
|
||||||
if let Some(stop_words) = stop_words {
|
if let Some(stop_words) = stop_words {
|
||||||
tokenizer_builder.stop_words(stop_words);
|
tokenizer_builder.stop_words(stop_words);
|
||||||
@ -260,7 +260,7 @@ fn process_tokens<'a>(
|
|||||||
|
|
||||||
fn potential_language_detection_error(languages_frequency: &Vec<(Language, usize)>) -> bool {
|
fn potential_language_detection_error(languages_frequency: &Vec<(Language, usize)>) -> bool {
|
||||||
if languages_frequency.len() > 1 {
|
if languages_frequency.len() > 1 {
|
||||||
let threshold = compute_laguage_frequency_threshold(languages_frequency);
|
let threshold = compute_language_frequency_threshold(languages_frequency);
|
||||||
languages_frequency.iter().any(|(_, c)| *c <= threshold)
|
languages_frequency.iter().any(|(_, c)| *c <= threshold)
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
@ -271,7 +271,7 @@ fn most_frequent_languages(
|
|||||||
(script, languages_frequency): (&Script, &Vec<(Language, usize)>),
|
(script, languages_frequency): (&Script, &Vec<(Language, usize)>),
|
||||||
) -> Option<(Script, Vec<Language>)> {
|
) -> Option<(Script, Vec<Language>)> {
|
||||||
if languages_frequency.len() > 1 {
|
if languages_frequency.len() > 1 {
|
||||||
let threshold = compute_laguage_frequency_threshold(languages_frequency);
|
let threshold = compute_language_frequency_threshold(languages_frequency);
|
||||||
|
|
||||||
let languages: Vec<_> =
|
let languages: Vec<_> =
|
||||||
languages_frequency.iter().filter(|(_, c)| *c > threshold).map(|(l, _)| *l).collect();
|
languages_frequency.iter().filter(|(_, c)| *c > threshold).map(|(l, _)| *l).collect();
|
||||||
@ -286,7 +286,7 @@ fn most_frequent_languages(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compute_laguage_frequency_threshold(languages_frequency: &[(Language, usize)]) -> usize {
|
fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)]) -> usize {
|
||||||
let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
|
let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
|
||||||
total / 10 // 10% is a completely arbitrar value.
|
total / 10 // 10% is a completely arbitrary value.
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user