Update charabia

This commit is contained in:
ManyTheFish 2022-12-12 14:53:08 +01:00
parent 5114686394
commit 96d4242b93
3 changed files with 12 additions and 8 deletions

View File

@ -9,7 +9,7 @@ bimap = { version = "0.6.2", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.0.1"
byteorder = "1.4.3"
charabia = { version = "0.6.0", default-features = false }
charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6"
either = "1.8.0"
@ -70,6 +70,10 @@ hebrew = ["charabia/hebrew"]
# allow japanese specialized tokenization
japanese = ["charabia/japanese"]
japanese-transliteration = ["charabia/japanese-transliteration"]
# allow korean specialized tokenization
korean = ["charabia/korean"]
# allow thai specialized tokenization
thai = ["charabia/thai"]

View File

@ -14,14 +14,14 @@ const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
/// Structure used to build a Matcher allowing to customize formating tags.
pub struct MatcherBuilder<'a, A> {
matching_words: MatchingWords,
tokenizer: Tokenizer<'a, A>,
tokenizer: Tokenizer<'a, 'a, A>,
crop_marker: Option<String>,
highlight_prefix: Option<String>,
highlight_suffix: Option<String>,
}
impl<'a, A> MatcherBuilder<'a, A> {
pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, A>) -> Self {
pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, 'a, A>) -> Self {
Self {
matching_words,
tokenizer,
@ -106,7 +106,7 @@ pub struct MatchBounds {
pub struct Matcher<'t, 'm, A> {
text: &'t str,
matching_words: &'m MatchingWords,
tokenizer: &'m Tokenizer<'m, A>,
tokenizer: &'m Tokenizer<'m, 'm, A>,
crop_marker: &'m str,
highlight_prefix: &'m str,
highlight_suffix: &'m str,

View File

@ -6,7 +6,7 @@ use std::hash::Hash;
use std::rc::Rc;
use std::{fmt, mem};
use charabia::classifier::ClassifiedTokenIter;
use charabia::normalizer::NormalizedTokenIter;
use charabia::{SeparatorKind, TokenKind};
use roaring::RoaringBitmap;
use slice_group_by::GroupBy;
@ -270,7 +270,7 @@ impl<'a> QueryTreeBuilder<'a> {
/// (the criterion `typo` will be ignored)
pub fn build<A: AsRef<[u8]>>(
&self,
query: ClassifiedTokenIter<A>,
query: NormalizedTokenIter<A>,
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
let primitive_query = create_primitive_query(query, self.words_limit);
if !primitive_query.is_empty() {
@ -778,7 +778,7 @@ impl PrimitiveQueryPart {
/// Create primitive query from tokenized query string,
/// the primitive query is an intermediate state to build the query tree.
fn create_primitive_query<A>(
query: ClassifiedTokenIter<A>,
query: NormalizedTokenIter<A>,
words_limit: Option<usize>,
) -> PrimitiveQuery
where
@ -892,7 +892,7 @@ mod test {
terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool,
words_limit: Option<usize>,
query: ClassifiedTokenIter<A>,
query: NormalizedTokenIter<A>,
) -> Result<Option<(Operation, PrimitiveQuery)>> {
let primitive_query = create_primitive_query(query, words_limit);
if !primitive_query.is_empty() {