mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Implement localized search
This commit is contained in:
parent
d82f8fd904
commit
90c0a6db7d
@ -256,6 +256,7 @@ InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchLocales , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
@ -7,6 +7,7 @@ pub mod features;
|
|||||||
pub mod index_uid;
|
pub mod index_uid;
|
||||||
pub mod index_uid_pattern;
|
pub mod index_uid_pattern;
|
||||||
pub mod keys;
|
pub mod keys;
|
||||||
|
pub mod locales;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
pub mod star_or;
|
pub mod star_or;
|
||||||
pub mod task_view;
|
pub mod task_view;
|
||||||
|
132
meilisearch-types/src/locales.rs
Normal file
132
meilisearch-types/src/locales.rs
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
use deserr::Deserr;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use milli::LocalizedAttributesRule;
|
||||||
|
|
||||||
|
/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
|
||||||
|
///
|
||||||
|
/// this enum implements `Deserr` in order to be used in the API.
|
||||||
|
macro_rules! make_locale {
|
||||||
|
|
||||||
|
($($language:tt), +) => {
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
|
||||||
|
#[deserr(rename_all = camelCase)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum Locale {
|
||||||
|
$($language),+,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<milli::tokenizer::Language> for Locale {
|
||||||
|
fn from(other: milli::tokenizer::Language) -> Locale {
|
||||||
|
match other {
|
||||||
|
$(milli::tokenizer::Language::$language => Locale::$language), +
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Locale> for milli::tokenizer::Language {
|
||||||
|
fn from(other: Locale) -> milli::tokenizer::Language {
|
||||||
|
match other {
|
||||||
|
$(Locale::$language => milli::tokenizer::Language::$language), +,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LocaleFormatError {
|
||||||
|
pub invalid_locale: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LocaleFormatError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
|
||||||
|
write!(f, "Unknown value `{}`, expected one of {}", self.invalid_locale, valid_locales)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for LocaleFormatError {}
|
||||||
|
|
||||||
|
impl std::str::FromStr for Locale {
|
||||||
|
type Err = LocaleFormatError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
milli::tokenizer::Language::from_code(s).map(Self::from).ok_or(LocaleFormatError {
|
||||||
|
invalid_locale: s.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
make_locale! {
|
||||||
|
Epo,
|
||||||
|
Eng,
|
||||||
|
Rus,
|
||||||
|
Cmn,
|
||||||
|
Spa,
|
||||||
|
Por,
|
||||||
|
Ita,
|
||||||
|
Ben,
|
||||||
|
Fra,
|
||||||
|
Deu,
|
||||||
|
Ukr,
|
||||||
|
Kat,
|
||||||
|
Ara,
|
||||||
|
Hin,
|
||||||
|
Jpn,
|
||||||
|
Heb,
|
||||||
|
Yid,
|
||||||
|
Pol,
|
||||||
|
Amh,
|
||||||
|
Jav,
|
||||||
|
Kor,
|
||||||
|
Nob,
|
||||||
|
Dan,
|
||||||
|
Swe,
|
||||||
|
Fin,
|
||||||
|
Tur,
|
||||||
|
Nld,
|
||||||
|
Hun,
|
||||||
|
Ces,
|
||||||
|
Ell,
|
||||||
|
Bul,
|
||||||
|
Bel,
|
||||||
|
Mar,
|
||||||
|
Kan,
|
||||||
|
Ron,
|
||||||
|
Slv,
|
||||||
|
Hrv,
|
||||||
|
Srp,
|
||||||
|
Mkd,
|
||||||
|
Lit,
|
||||||
|
Lav,
|
||||||
|
Est,
|
||||||
|
Tam,
|
||||||
|
Vie,
|
||||||
|
Urd,
|
||||||
|
Tha,
|
||||||
|
Guj,
|
||||||
|
Uzb,
|
||||||
|
Pan,
|
||||||
|
Aze,
|
||||||
|
Ind,
|
||||||
|
Tel,
|
||||||
|
Pes,
|
||||||
|
Mal,
|
||||||
|
Ori,
|
||||||
|
Mya,
|
||||||
|
Nep,
|
||||||
|
Sin,
|
||||||
|
Khm,
|
||||||
|
Tuk,
|
||||||
|
Aka,
|
||||||
|
Zul,
|
||||||
|
Sna,
|
||||||
|
Afr,
|
||||||
|
Lat,
|
||||||
|
Slk,
|
||||||
|
Cat,
|
||||||
|
Tgl,
|
||||||
|
Hye
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
@ -10,6 +10,7 @@ use actix_web::HttpRequest;
|
|||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_auth::{AuthController, AuthFilter};
|
use meilisearch_auth::{AuthController, AuthFilter};
|
||||||
|
use meilisearch_types::locales::Locale;
|
||||||
use meilisearch_types::InstanceUid;
|
use meilisearch_types::InstanceUid;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
@ -653,6 +654,9 @@ pub struct SearchAggregator {
|
|||||||
// every time a search is done, we increment the counter linked to the used settings
|
// every time a search is done, we increment the counter linked to the used settings
|
||||||
matching_strategy: HashMap<String, usize>,
|
matching_strategy: HashMap<String, usize>,
|
||||||
|
|
||||||
|
// List of the unique Locales passed as parameter
|
||||||
|
locales: BTreeSet<Locale>,
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
max_limit: usize,
|
max_limit: usize,
|
||||||
max_offset: usize,
|
max_offset: usize,
|
||||||
@ -707,6 +711,7 @@ impl SearchAggregator {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
let mut ret = Self::default();
|
let mut ret = Self::default();
|
||||||
@ -774,6 +779,10 @@ impl SearchAggregator {
|
|||||||
|
|
||||||
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
|
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
|
||||||
|
|
||||||
|
if let Some(locales) = locales {
|
||||||
|
ret.locales = locales.into_iter().copied().collect();
|
||||||
|
}
|
||||||
|
|
||||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||||
@ -859,6 +868,7 @@ impl SearchAggregator {
|
|||||||
total_degraded,
|
total_degraded,
|
||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
ref mut locales,
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
if self.timestamp.is_none() {
|
||||||
@ -947,6 +957,9 @@ impl SearchAggregator {
|
|||||||
self.show_ranking_score |= show_ranking_score;
|
self.show_ranking_score |= show_ranking_score;
|
||||||
self.show_ranking_score_details |= show_ranking_score_details;
|
self.show_ranking_score_details |= show_ranking_score_details;
|
||||||
self.ranking_score_threshold |= ranking_score_threshold;
|
self.ranking_score_threshold |= ranking_score_threshold;
|
||||||
|
|
||||||
|
// locales
|
||||||
|
self.locales.append(locales);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||||
@ -991,6 +1004,7 @@ impl SearchAggregator {
|
|||||||
total_degraded,
|
total_degraded,
|
||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if total_received == 0 {
|
if total_received == 0 {
|
||||||
@ -1060,6 +1074,7 @@ impl SearchAggregator {
|
|||||||
"matching_strategy": {
|
"matching_strategy": {
|
||||||
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
},
|
},
|
||||||
|
"locales": locales,
|
||||||
"scoring": {
|
"scoring": {
|
||||||
"show_ranking_score": show_ranking_score,
|
"show_ranking_score": show_ranking_score,
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
@ -1150,6 +1165,7 @@ impl MultiSearchAggregator {
|
|||||||
attributes_to_search_on: _,
|
attributes_to_search_on: _,
|
||||||
hybrid: _,
|
hybrid: _,
|
||||||
ranking_score_threshold: _,
|
ranking_score_threshold: _,
|
||||||
|
locales: _,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
index_uid.as_str()
|
index_uid.as_str()
|
||||||
@ -1307,6 +1323,7 @@ impl FacetSearchAggregator {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
} = query;
|
} = query;
|
||||||
|
|
||||||
let mut ret = Self::default();
|
let mut ret = Self::default();
|
||||||
@ -1322,7 +1339,8 @@ impl FacetSearchAggregator {
|
|||||||
|| *matching_strategy != MatchingStrategy::default()
|
|| *matching_strategy != MatchingStrategy::default()
|
||||||
|| attributes_to_search_on.is_some()
|
|| attributes_to_search_on.is_some()
|
||||||
|| hybrid.is_some()
|
|| hybrid.is_some()
|
||||||
|| ranking_score_threshold.is_some();
|
|| ranking_score_threshold.is_some()
|
||||||
|
|| locales.is_some();
|
||||||
|
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@ use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
|||||||
use meilisearch_types::error::deserr_codes::*;
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
|
use meilisearch_types::locales::Locale;
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
use meilisearch_types::serde_cs::vec::CS;
|
use meilisearch_types::serde_cs::vec::CS;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@ -89,6 +90,8 @@ pub struct SearchQueryGet {
|
|||||||
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||||
|
pub locales: Option<CS<Locale>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
@ -175,6 +178,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||||
|
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -380,9 +380,6 @@ pub fn perform_federated_search(
|
|||||||
|
|
||||||
let criteria = index.criteria(&rtxn)?;
|
let criteria = index.criteria(&rtxn)?;
|
||||||
|
|
||||||
// stuff we need for the hitmaker
|
|
||||||
let script_lang_map = index.script_language(&rtxn)?;
|
|
||||||
|
|
||||||
let dictionary = index.dictionary(&rtxn)?;
|
let dictionary = index.dictionary(&rtxn)?;
|
||||||
let dictionary: Option<Vec<_>> =
|
let dictionary: Option<Vec<_>> =
|
||||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
@ -494,6 +491,7 @@ pub fn perform_federated_search(
|
|||||||
sort: query.sort,
|
sort: query.sort,
|
||||||
show_ranking_score: query.show_ranking_score,
|
show_ranking_score: query.show_ranking_score,
|
||||||
show_ranking_score_details: query.show_ranking_score_details,
|
show_ranking_score_details: query.show_ranking_score_details,
|
||||||
|
locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
|
||||||
};
|
};
|
||||||
|
|
||||||
let milli::SearchResult {
|
let milli::SearchResult {
|
||||||
@ -509,11 +507,7 @@ pub fn perform_federated_search(
|
|||||||
degraded |= query_degraded;
|
degraded |= query_degraded;
|
||||||
used_negative_operator |= query_used_negative_operator;
|
used_negative_operator |= query_used_negative_operator;
|
||||||
|
|
||||||
let tokenizer = HitMaker::tokenizer(
|
let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
|
||||||
&script_lang_map,
|
|
||||||
dictionary.as_deref(),
|
|
||||||
separators.as_deref(),
|
|
||||||
);
|
|
||||||
|
|
||||||
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use core::fmt;
|
use core::fmt;
|
||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
@ -15,16 +15,17 @@ use meilisearch_types::error::deserr_codes::*;
|
|||||||
use meilisearch_types::error::{Code, ResponseError};
|
use meilisearch_types::error::{Code, ResponseError};
|
||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
|
use meilisearch_types::locales::Locale;
|
||||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||||
use meilisearch_types::milli::vector::Embedder;
|
use meilisearch_types::milli::vector::Embedder;
|
||||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::{Language, TokenizerBuilder};
|
||||||
use milli::{
|
use milli::{
|
||||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
|
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, LocalizedAttributesRule,
|
||||||
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||||
};
|
};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
@ -100,6 +101,8 @@ pub struct SearchQuery {
|
|||||||
pub attributes_to_search_on: Option<Vec<String>>,
|
pub attributes_to_search_on: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||||
|
pub locales: Option<Vec<Locale>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||||
@ -169,6 +172,7 @@ impl fmt::Debug for SearchQuery {
|
|||||||
matching_strategy,
|
matching_strategy,
|
||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
let mut debug = f.debug_struct("SearchQuery");
|
let mut debug = f.debug_struct("SearchQuery");
|
||||||
@ -250,6 +254,10 @@ impl fmt::Debug for SearchQuery {
|
|||||||
debug.field("ranking_score_threshold", &ranking_score_threshold);
|
debug.field("ranking_score_threshold", &ranking_score_threshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(locales) = locales {
|
||||||
|
debug.field("locales", &locales);
|
||||||
|
}
|
||||||
|
|
||||||
debug.finish()
|
debug.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -425,6 +433,8 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub attributes_to_search_on: Option<Vec<String>>,
|
pub attributes_to_search_on: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||||
|
pub locales: Option<Vec<Locale>>,
|
||||||
|
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub federation_options: Option<FederationOptions>,
|
pub federation_options: Option<FederationOptions>,
|
||||||
@ -477,6 +487,7 @@ impl SearchQueryWithIndex {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
} = self;
|
} = self;
|
||||||
(
|
(
|
||||||
index_uid,
|
index_uid,
|
||||||
@ -506,6 +517,7 @@ impl SearchQueryWithIndex {
|
|||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
// do not use ..Default::default() here,
|
// do not use ..Default::default() here,
|
||||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||||
},
|
},
|
||||||
@ -866,6 +878,10 @@ fn prepare_search<'t>(
|
|||||||
search.sort_criteria(sort);
|
search.sort_criteria(sort);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(ref locales) = query.locales {
|
||||||
|
search.locales(locales.iter().copied().map(Into::into).collect());
|
||||||
|
}
|
||||||
|
|
||||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -917,6 +933,7 @@ pub fn perform_search(
|
|||||||
highlight_pre_tag,
|
highlight_pre_tag,
|
||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
crop_marker,
|
crop_marker,
|
||||||
|
locales,
|
||||||
// already used in prepare_search
|
// already used in prepare_search
|
||||||
vector: _,
|
vector: _,
|
||||||
hybrid: _,
|
hybrid: _,
|
||||||
@ -941,6 +958,7 @@ pub fn perform_search(
|
|||||||
sort,
|
sort,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
|
locales: locales.map(|l| l.iter().copied().map(Into::into).collect()),
|
||||||
};
|
};
|
||||||
|
|
||||||
let documents = make_hits(
|
let documents = make_hits(
|
||||||
@ -1046,6 +1064,7 @@ struct AttributesFormat {
|
|||||||
sort: Option<Vec<String>>,
|
sort: Option<Vec<String>>,
|
||||||
show_ranking_score: bool,
|
show_ranking_score: bool,
|
||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
|
locales: Option<Vec<Language>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
@ -1093,19 +1112,16 @@ struct HitMaker<'a> {
|
|||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
sort: Option<Vec<String>>,
|
sort: Option<Vec<String>>,
|
||||||
show_matches_position: bool,
|
show_matches_position: bool,
|
||||||
|
locales: Option<Vec<Language>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> HitMaker<'a> {
|
impl<'a> HitMaker<'a> {
|
||||||
pub fn tokenizer<'b>(
|
pub fn tokenizer<'b>(
|
||||||
script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
|
|
||||||
dictionary: Option<&'b [&'b str]>,
|
dictionary: Option<&'b [&'b str]>,
|
||||||
separators: Option<&'b [&'b str]>,
|
separators: Option<&'b [&'b str]>,
|
||||||
) -> milli::tokenizer::Tokenizer<'b> {
|
) -> milli::tokenizer::Tokenizer<'b> {
|
||||||
let mut tokenizer_builder = TokenizerBuilder::default();
|
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||||
tokenizer_builder.create_char_map(true);
|
tokenizer_builder.create_char_map(true);
|
||||||
if !script_lang_map.is_empty() {
|
|
||||||
tokenizer_builder.allow_list(script_lang_map);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(separators) = separators {
|
if let Some(separators) = separators {
|
||||||
tokenizer_builder.separators(separators);
|
tokenizer_builder.separators(separators);
|
||||||
@ -1218,6 +1234,7 @@ impl<'a> HitMaker<'a> {
|
|||||||
show_ranking_score_details: format.show_ranking_score_details,
|
show_ranking_score_details: format.show_ranking_score_details,
|
||||||
show_matches_position: format.show_matches_position,
|
show_matches_position: format.show_matches_position,
|
||||||
sort: format.sort,
|
sort: format.sort,
|
||||||
|
locales: format.locales,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1280,6 +1297,7 @@ impl<'a> HitMaker<'a> {
|
|||||||
&self.formatted_options,
|
&self.formatted_options,
|
||||||
self.show_matches_position,
|
self.show_matches_position,
|
||||||
&self.displayed_ids,
|
&self.displayed_ids,
|
||||||
|
self.locales.as_deref(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(sort) = self.sort.as_ref() {
|
if let Some(sort) = self.sort.as_ref() {
|
||||||
@ -1312,8 +1330,6 @@ fn make_hits<'a>(
|
|||||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
let script_lang_map = index.script_language(rtxn)?;
|
|
||||||
|
|
||||||
let dictionary = index.dictionary(rtxn)?;
|
let dictionary = index.dictionary(rtxn)?;
|
||||||
let dictionary: Option<Vec<_>> =
|
let dictionary: Option<Vec<_>> =
|
||||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
@ -1321,8 +1337,7 @@ fn make_hits<'a>(
|
|||||||
let separators: Option<Vec<_>> =
|
let separators: Option<Vec<_>> =
|
||||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
|
||||||
let tokenizer =
|
let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
|
||||||
HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
|
|
||||||
|
|
||||||
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||||
|
|
||||||
@ -1341,6 +1356,7 @@ pub fn perform_facet_search(
|
|||||||
facet_name: String,
|
facet_name: String,
|
||||||
search_kind: SearchKind,
|
search_kind: SearchKind,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
|
locales: Option<Vec<Language>>,
|
||||||
) -> Result<FacetSearchResult, ResponseError> {
|
) -> Result<FacetSearchResult, ResponseError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
@ -1363,6 +1379,10 @@ pub fn perform_facet_search(
|
|||||||
facet_search.max_values(max_facets as usize);
|
facet_search.max_values(max_facets as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(locales) = locales {
|
||||||
|
facet_search.locales(locales);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(FacetSearchResult {
|
Ok(FacetSearchResult {
|
||||||
facet_hits: facet_search.execute()?,
|
facet_hits: facet_search.execute()?,
|
||||||
facet_query,
|
facet_query,
|
||||||
@ -1443,6 +1463,7 @@ pub fn perform_similar(
|
|||||||
sort: None,
|
sort: None,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
|
locales: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let hits = make_hits(
|
let hits = make_hits(
|
||||||
@ -1631,6 +1652,7 @@ fn format_fields(
|
|||||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
displayable_ids: &BTreeSet<FieldId>,
|
displayable_ids: &BTreeSet<FieldId>,
|
||||||
|
locales: Option<&[Language]>,
|
||||||
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
|
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
|
||||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||||
let mut document = document.clone();
|
let mut document = document.clone();
|
||||||
@ -1664,6 +1686,14 @@ fn format_fields(
|
|||||||
let mut infos = Vec::new();
|
let mut infos = Vec::new();
|
||||||
|
|
||||||
*value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
|
*value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
|
||||||
|
*value = format_value(
|
||||||
|
std::mem::take(value),
|
||||||
|
builder,
|
||||||
|
format,
|
||||||
|
&mut infos,
|
||||||
|
compute_matches,
|
||||||
|
locales,
|
||||||
|
);
|
||||||
|
|
||||||
if let Some(matches) = matches_position.as_mut() {
|
if let Some(matches) = matches_position.as_mut() {
|
||||||
if !infos.is_empty() {
|
if !infos.is_empty() {
|
||||||
@ -1688,10 +1718,11 @@ fn format_value(
|
|||||||
format_options: Option<FormatOptions>,
|
format_options: Option<FormatOptions>,
|
||||||
infos: &mut Vec<MatchBounds>,
|
infos: &mut Vec<MatchBounds>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
|
locales: Option<&[Language]>,
|
||||||
) -> Value {
|
) -> Value {
|
||||||
match value {
|
match value {
|
||||||
Value::String(old_string) => {
|
Value::String(old_string) => {
|
||||||
let mut matcher = builder.build(&old_string);
|
let mut matcher = builder.build(&old_string, locales);
|
||||||
if compute_matches {
|
if compute_matches {
|
||||||
let matches = matcher.matches();
|
let matches = matcher.matches();
|
||||||
infos.extend_from_slice(&matches[..]);
|
infos.extend_from_slice(&matches[..]);
|
||||||
@ -1718,6 +1749,7 @@ fn format_value(
|
|||||||
}),
|
}),
|
||||||
infos,
|
infos,
|
||||||
compute_matches,
|
compute_matches,
|
||||||
|
locales,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
@ -1737,6 +1769,7 @@ fn format_value(
|
|||||||
}),
|
}),
|
||||||
infos,
|
infos,
|
||||||
compute_matches,
|
compute_matches,
|
||||||
|
locales,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@ -1745,7 +1778,7 @@ fn format_value(
|
|||||||
Value::Number(number) => {
|
Value::Number(number) => {
|
||||||
let s = number.to_string();
|
let s = number.to_string();
|
||||||
|
|
||||||
let mut matcher = builder.build(&s);
|
let mut matcher = builder.build(&s, locales);
|
||||||
if compute_matches {
|
if compute_matches {
|
||||||
let matches = matcher.matches();
|
let matches = matcher.matches();
|
||||||
infos.extend_from_slice(&matches[..]);
|
infos.extend_from_slice(&matches[..]);
|
||||||
|
@ -68,6 +68,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
logger,
|
logger,
|
||||||
TimeBudget::max(),
|
TimeBudget::max(),
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
)?;
|
)?;
|
||||||
if let Some((logger, dir)) = detailed_logger {
|
if let Some((logger, dir)) = detailed_logger {
|
||||||
logger.finish(&mut ctx, Path::new(dir))?;
|
logger.finish(&mut ctx, Path::new(dir))?;
|
||||||
|
@ -3,7 +3,7 @@ use std::collections::BinaryHeap;
|
|||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use charabia::normalizer::NormalizerOption;
|
use charabia::normalizer::NormalizerOption;
|
||||||
use charabia::Normalize;
|
use charabia::{Language, Normalize, StrDetection, Token};
|
||||||
use fst::automaton::{Automaton, Str};
|
use fst::automaton::{Automaton, Str};
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -23,6 +23,7 @@ pub struct SearchForFacetValues<'a> {
|
|||||||
search_query: Search<'a>,
|
search_query: Search<'a>,
|
||||||
max_values: usize,
|
max_values: usize,
|
||||||
is_hybrid: bool,
|
is_hybrid: bool,
|
||||||
|
locales: Option<Vec<Language>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SearchForFacetValues<'a> {
|
impl<'a> SearchForFacetValues<'a> {
|
||||||
@ -37,6 +38,7 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
search_query,
|
search_query,
|
||||||
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
|
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
|
||||||
is_hybrid,
|
is_hybrid,
|
||||||
|
locales: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,6 +52,11 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn locales(&mut self, locales: Vec<Language>) -> &mut Self {
|
||||||
|
self.locales = Some(locales);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
fn one_original_value_of(
|
fn one_original_value_of(
|
||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -109,8 +116,7 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
|
|
||||||
match self.query.as_ref() {
|
match self.query.as_ref() {
|
||||||
Some(query) => {
|
Some(query) => {
|
||||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
let query = normalize_facet_string(query, self.locales.as_deref());
|
||||||
let query = query.normalize(&options);
|
|
||||||
let query = query.as_ref();
|
let query = query.as_ref();
|
||||||
|
|
||||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||||
@ -330,3 +336,15 @@ impl ValuesCollection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
|
||||||
|
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||||
|
let mut detection = StrDetection::new(facet_string, locales);
|
||||||
|
let token = Token {
|
||||||
|
lemma: std::borrow::Cow::Borrowed(facet_string),
|
||||||
|
script: detection.script(),
|
||||||
|
language: detection.language(),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
token.normalize(&options).lemma.to_string()
|
||||||
|
}
|
||||||
|
@ -174,6 +174,7 @@ impl<'a> Search<'a> {
|
|||||||
semantic: self.semantic.clone(),
|
semantic: self.semantic.clone(),
|
||||||
time_budget: self.time_budget.clone(),
|
time_budget: self.time_budget.clone(),
|
||||||
ranking_score_threshold: self.ranking_score_threshold,
|
ranking_score_threshold: self.ranking_score_threshold,
|
||||||
|
locales: self.locales.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let semantic = search.semantic.take();
|
let semantic = search.semantic.take();
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use charabia::Language;
|
||||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
@ -52,6 +53,7 @@ pub struct Search<'a> {
|
|||||||
semantic: Option<SemanticSearch>,
|
semantic: Option<SemanticSearch>,
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
|
locales: Option<Vec<Language>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Search<'a> {
|
impl<'a> Search<'a> {
|
||||||
@ -72,6 +74,7 @@ impl<'a> Search<'a> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
semantic: None,
|
semantic: None,
|
||||||
|
locales: None,
|
||||||
time_budget: TimeBudget::max(),
|
time_budget: TimeBudget::max(),
|
||||||
ranking_score_threshold: None,
|
ranking_score_threshold: None,
|
||||||
}
|
}
|
||||||
@ -160,6 +163,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn locales(&mut self, locales: Vec<Language>) -> &mut Search<'a> {
|
||||||
|
self.locales = Some(locales);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||||
if has_vector_search {
|
if has_vector_search {
|
||||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||||
@ -232,6 +240,7 @@ impl<'a> Search<'a> {
|
|||||||
&mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
self.time_budget.clone(),
|
self.time_budget.clone(),
|
||||||
self.ranking_score_threshold,
|
self.ranking_score_threshold,
|
||||||
|
self.locales.as_ref(),
|
||||||
)?,
|
)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -272,6 +281,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
semantic,
|
semantic,
|
||||||
time_budget,
|
time_budget,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
} = self;
|
} = self;
|
||||||
f.debug_struct("Search")
|
f.debug_struct("Search")
|
||||||
.field("query", query)
|
.field("query", query)
|
||||||
@ -292,6 +302,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
)
|
)
|
||||||
.field("time_budget", time_budget)
|
.field("time_budget", time_budget)
|
||||||
.field("ranking_score_threshold", ranking_score_threshold)
|
.field("ranking_score_threshold", ranking_score_threshold)
|
||||||
|
.field("locales", locales)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use charabia::{SeparatorKind, Token, Tokenizer};
|
use charabia::{Language, SeparatorKind, Token, Tokenizer};
|
||||||
pub use matching_words::MatchingWords;
|
pub use matching_words::MatchingWords;
|
||||||
use matching_words::{MatchType, PartialMatch, WordId};
|
use matching_words::{MatchType, PartialMatch, WordId};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
@ -46,7 +46,11 @@ impl<'m> MatcherBuilder<'m> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> {
|
pub fn build<'t, 'lang>(
|
||||||
|
&self,
|
||||||
|
text: &'t str,
|
||||||
|
locales: Option<&'lang [Language]>,
|
||||||
|
) -> Matcher<'t, 'm, '_, 'lang> {
|
||||||
let crop_marker = match &self.crop_marker {
|
let crop_marker = match &self.crop_marker {
|
||||||
Some(marker) => marker.as_str(),
|
Some(marker) => marker.as_str(),
|
||||||
None => DEFAULT_CROP_MARKER,
|
None => DEFAULT_CROP_MARKER,
|
||||||
@ -68,6 +72,7 @@ impl<'m> MatcherBuilder<'m> {
|
|||||||
highlight_prefix,
|
highlight_prefix,
|
||||||
highlight_suffix,
|
highlight_suffix,
|
||||||
matches: None,
|
matches: None,
|
||||||
|
locales,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -107,17 +112,18 @@ pub struct MatchBounds {
|
|||||||
|
|
||||||
/// Structure used to analyze a string, compute words that match,
|
/// Structure used to analyze a string, compute words that match,
|
||||||
/// and format the source string, returning a highlighted and cropped sub-string.
|
/// and format the source string, returning a highlighted and cropped sub-string.
|
||||||
pub struct Matcher<'t, 'tokenizer, 'b> {
|
pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
|
||||||
text: &'t str,
|
text: &'t str,
|
||||||
matching_words: &'b MatchingWords,
|
matching_words: &'b MatchingWords,
|
||||||
tokenizer: &'b Tokenizer<'tokenizer>,
|
tokenizer: &'b Tokenizer<'tokenizer>,
|
||||||
|
locales: Option<&'lang [Language]>,
|
||||||
crop_marker: &'b str,
|
crop_marker: &'b str,
|
||||||
highlight_prefix: &'b str,
|
highlight_prefix: &'b str,
|
||||||
highlight_suffix: &'b str,
|
highlight_suffix: &'b str,
|
||||||
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
|
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
|
impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||||
/// Iterates over tokens and save any of them that matches the query.
|
/// Iterates over tokens and save any of them that matches the query.
|
||||||
fn compute_matches(&mut self) -> &mut Self {
|
fn compute_matches(&mut self) -> &mut Self {
|
||||||
/// some words are counted as matches only if they are close together and in the good order,
|
/// some words are counted as matches only if they are close together and in the good order,
|
||||||
@ -173,7 +179,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
let tokens: Vec<_> = self.tokenizer.tokenize(self.text).collect();
|
let tokens: Vec<_> =
|
||||||
|
self.tokenizer.tokenize_with_allow_list(self.text, self.locales).collect();
|
||||||
let mut matches = Vec::new();
|
let mut matches = Vec::new();
|
||||||
|
|
||||||
let mut words_positions = tokens
|
let mut words_positions = tokens
|
||||||
@ -530,6 +537,7 @@ mod tests {
|
|||||||
&mut crate::DefaultSearchLogger,
|
&mut crate::DefaultSearchLogger,
|
||||||
TimeBudget::max(),
|
TimeBudget::max(),
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -553,19 +561,19 @@ mod tests {
|
|||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop and no highlight should return complete text.
|
// no crop and no highlight should return complete text.
|
||||||
assert_eq!(&matcher.format(format_options), &text);
|
assert_eq!(&matcher.format(format_options), &text);
|
||||||
|
|
||||||
// Text containing all matches.
|
// Text containing all matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop and no highlight should return complete text.
|
// no crop and no highlight should return complete text.
|
||||||
assert_eq!(&matcher.format(format_options), &text);
|
assert_eq!(&matcher.format(format_options), &text);
|
||||||
|
|
||||||
// Text containing some matches.
|
// Text containing some matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves.";
|
let text = "Natalie risk her future to build a world with the boy she loves.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop and no highlight should return complete text.
|
// no crop and no highlight should return complete text.
|
||||||
assert_eq!(&matcher.format(format_options), &text);
|
assert_eq!(&matcher.format(format_options), &text);
|
||||||
}
|
}
|
||||||
@ -580,23 +588,23 @@ mod tests {
|
|||||||
|
|
||||||
// empty text.
|
// empty text.
|
||||||
let text = "";
|
let text = "";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
assert_eq!(&matcher.format(format_options), "");
|
assert_eq!(&matcher.format(format_options), "");
|
||||||
|
|
||||||
// text containing only separators.
|
// text containing only separators.
|
||||||
let text = ":-)";
|
let text = ":-)";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
assert_eq!(&matcher.format(format_options), ":-)");
|
assert_eq!(&matcher.format(format_options), ":-)");
|
||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop should return complete text, because there is no matches.
|
// no crop should return complete text, because there is no matches.
|
||||||
assert_eq!(&matcher.format(format_options), &text);
|
assert_eq!(&matcher.format(format_options), &text);
|
||||||
|
|
||||||
// Text containing all matches.
|
// Text containing all matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -605,7 +613,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing some matches.
|
// Text containing some matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves.";
|
let text = "Natalie risk her future to build a world with the boy she loves.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -622,7 +630,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing prefix match.
|
// Text containing prefix match.
|
||||||
let text = "Ŵôřlḑôle";
|
let text = "Ŵôřlḑôle";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -631,7 +639,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
let text = "Ŵôřlḑ";
|
let text = "Ŵôřlḑ";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -643,7 +651,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
let text = "Westfália";
|
let text = "Westfália";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -661,7 +669,7 @@ mod tests {
|
|||||||
|
|
||||||
// empty text.
|
// empty text.
|
||||||
let text = "";
|
let text = "";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@""
|
@""
|
||||||
@ -669,7 +677,7 @@ mod tests {
|
|||||||
|
|
||||||
// text containing only separators.
|
// text containing only separators.
|
||||||
let text = ":-)";
|
let text = ":-)";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@":-)"
|
@":-)"
|
||||||
@ -677,7 +685,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no highlight should return 10 first words with a marker at the end.
|
// no highlight should return 10 first words with a marker at the end.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -686,7 +694,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text without any match starting by a separator.
|
// Text without any match starting by a separator.
|
||||||
let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
|
let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no highlight should return 10 first words with a marker at the end.
|
// no highlight should return 10 first words with a marker at the end.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -695,7 +703,7 @@ mod tests {
|
|||||||
|
|
||||||
// Test phrase propagation
|
// Test phrase propagation
|
||||||
let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
|
let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// should crop the phrase instead of croping around the match.
|
// should crop the phrase instead of croping around the match.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -704,7 +712,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing some matches.
|
// Text containing some matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves.";
|
let text = "Natalie risk her future to build a world with the boy she loves.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no highlight should return 10 last words with a marker at the start.
|
// no highlight should return 10 last words with a marker at the start.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -713,7 +721,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing all matches.
|
// Text containing all matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// no highlight should return 10 last words with a marker at the start.
|
// no highlight should return 10 last words with a marker at the start.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -722,7 +730,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing a match unordered and a match ordered.
|
// Text containing a match unordered and a match ordered.
|
||||||
let text = "The world split void void void void void void void void void split the world void void";
|
let text = "The world split void void void void void void void void void split the world void void";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -731,7 +739,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing matches with different density.
|
// Text containing matches with different density.
|
||||||
let text = "split void the void void world void void void void void void void void void void split the world void void";
|
let text = "split void the void void world void void void void void void void void void void split the world void void";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -740,7 +748,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing matches with same word.
|
// Text containing matches with same word.
|
||||||
let text = "split split split split split split void void void void void void void void void void split the world void void";
|
let text = "split split split split split split void void void void void void void void void void split the world void void";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -758,7 +766,7 @@ mod tests {
|
|||||||
|
|
||||||
// empty text.
|
// empty text.
|
||||||
let text = "";
|
let text = "";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@""
|
@""
|
||||||
@ -766,7 +774,7 @@ mod tests {
|
|||||||
|
|
||||||
// text containing only separators.
|
// text containing only separators.
|
||||||
let text = ":-)";
|
let text = ":-)";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@":-)"
|
@":-)"
|
||||||
@ -774,7 +782,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text without any match.
|
// Text without any match.
|
||||||
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// both should return 10 first words with a marker at the end.
|
// both should return 10 first words with a marker at the end.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -783,7 +791,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing some matches.
|
// Text containing some matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves.";
|
let text = "Natalie risk her future to build a world with the boy she loves.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// both should return 10 last words with a marker at the start and highlighted matches.
|
// both should return 10 last words with a marker at the start and highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -792,7 +800,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing all matches.
|
// Text containing all matches.
|
||||||
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// both should return 10 last words with a marker at the start and highlighted matches.
|
// both should return 10 last words with a marker at the start and highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -801,7 +809,7 @@ mod tests {
|
|||||||
|
|
||||||
// Text containing a match unordered and a match ordered.
|
// Text containing a match unordered and a match ordered.
|
||||||
let text = "The world split void void void void void void void void void split the world void void";
|
let text = "The world split void void void void void void void void void split the world void void";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// crop should return 10 last words with a marker at the start.
|
// crop should return 10 last words with a marker at the start.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -824,7 +832,7 @@ mod tests {
|
|||||||
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
|
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
|
||||||
|
|
||||||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -832,7 +840,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// should highlight "those" and the phrase "and those".
|
// should highlight "those" and the phrase "and those".
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -851,7 +859,7 @@ mod tests {
|
|||||||
|
|
||||||
// set a smaller crop size
|
// set a smaller crop size
|
||||||
let format_options = FormatOptions { highlight: false, crop: Some(2) };
|
let format_options = FormatOptions { highlight: false, crop: Some(2) };
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// because crop size < query size, partially format matches.
|
// because crop size < query size, partially format matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -860,7 +868,7 @@ mod tests {
|
|||||||
|
|
||||||
// set a smaller crop size
|
// set a smaller crop size
|
||||||
let format_options = FormatOptions { highlight: false, crop: Some(1) };
|
let format_options = FormatOptions { highlight: false, crop: Some(1) };
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// because crop size < query size, partially format matches.
|
// because crop size < query size, partially format matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -869,7 +877,7 @@ mod tests {
|
|||||||
|
|
||||||
// set crop size to 0
|
// set crop size to 0
|
||||||
let format_options = FormatOptions { highlight: false, crop: Some(0) };
|
let format_options = FormatOptions { highlight: false, crop: Some(0) };
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
// because crop size is 0, crop is ignored.
|
// because crop size is 0, crop is ignored.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@ -889,7 +897,7 @@ mod tests {
|
|||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
let text = "the do or die can't be he do and or isn't he";
|
let text = "the do or die can't be he do and or isn't he";
|
||||||
let mut matcher = builder.build(text);
|
let mut matcher = builder.build(text, None);
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_"
|
@"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_"
|
||||||
|
@ -24,7 +24,7 @@ mod tests;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::{Language, TokenizerBuilder};
|
||||||
use db_cache::DatabaseCache;
|
use db_cache::DatabaseCache;
|
||||||
use exact_attribute::ExactAttribute;
|
use exact_attribute::ExactAttribute;
|
||||||
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
||||||
@ -639,6 +639,7 @@ pub fn execute_search(
|
|||||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
|
locales: Option<&Vec<Language>>,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
@ -670,9 +671,8 @@ pub fn execute_search(
|
|||||||
tokbuilder.words_dict(dictionary);
|
tokbuilder.words_dict(dictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
let languages = ctx.index.languages(ctx.txn)?;
|
if let Some(locales) = locales {
|
||||||
if !languages.is_empty() {
|
tokbuilder.allow_list(locales);
|
||||||
tokbuilder.allow_list(&languages);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let tokenizer = tokbuilder.build();
|
let tokenizer = tokbuilder.build();
|
||||||
|
@ -24,7 +24,7 @@ pub struct ExtractedTokens {
|
|||||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||||
pub fn located_query_terms_from_tokens(
|
pub fn located_query_terms_from_tokens(
|
||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
query: NormalizedTokenIter<'_, '_>,
|
query: NormalizedTokenIter<'_, '_, '_, '_>,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
) -> Result<ExtractedTokens> {
|
) -> Result<ExtractedTokens> {
|
||||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user