mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Use tokenizer on numbers and booleans
This commit is contained in:
parent
8049df125b
commit
ba7f091db3
4 changed files with 40 additions and 34 deletions
|
@ -48,43 +48,43 @@ impl<'a> DocumentTokenizer<'a> {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
match value {
|
||||
let text;
|
||||
let tokens = match value {
|
||||
Value::Number(n) => {
|
||||
let token = n.to_string();
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token.as_str())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
text = n.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::Bool(b) => {
|
||||
text = b.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::String(text) => {
|
||||
// create an iterator of token with their positions.
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.map(|rule| rule.locales());
|
||||
let tokens = process_tokens(
|
||||
*position,
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales),
|
||||
)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||
}
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
// create an iterator of token with their positions.
|
||||
let tokens = process_tokens(*position, tokens)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
_ => Ok(()),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue