diff --git a/benchmarks/benches/formatting.rs b/benchmarks/benches/formatting.rs
index 5045df268..25e88ffeb 100644
--- a/benchmarks/benches/formatting.rs
+++ b/benchmarks/benches/formatting.rs
@@ -1,5 +1,5 @@
 use criterion::{criterion_group, criterion_main};
-use milli::tokenizer::{Analyzer, AnalyzerConfig};
+use milli::tokenizer::Tokenize;
 use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
 
 #[cfg(target_os = "linux")]
@@ -52,9 +52,7 @@ fn bench_formatting(c: &mut criterion::Criterion) {
         for conf in confs {
             group.bench_function(conf.name, |b| {
                 b.iter(|| {
-                    let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-                    let analyzed = analyzer.analyze(&conf.text);
-                    let tokens: Vec<_> = analyzed.tokens().collect();
+                    let tokens: Vec<_> = conf.text.tokenize().collect();
                     let mut matcher = conf.matching_words.build(&tokens[..], conf.text);
                     matcher.format(option.clone());
                 })
diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs
index 641f82046..57a78b41e 100644
--- a/http-ui/src/main.rs
+++ b/http-ui/src/main.rs
@@ -19,7 +19,7 @@ use flate2::read::GzDecoder;
 use futures::{stream, FutureExt, StreamExt};
 use heed::EnvOpenOptions;
 use milli::documents::DocumentBatchReader;
-use milli::tokenizer::{Analyzer, AnalyzerConfig};
+use milli::tokenizer::{Tokenizer, TokenizerBuilder};
 use milli::update::UpdateIndexingStep::*;
 use milli::update::{
     ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
@@ -139,17 +139,16 @@ pub struct IndexerOpt {
     pub max_positions_per_attributes: Option<u32>,
 }
 
-struct Highlighter<'a, A> {
-    analyzer: Analyzer<'a, A>,
+struct Highlighter<'s, A> {
+    tokenizer: Tokenizer<'s, A>,
 }
 
-impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
-    fn new(stop_words: &'a fst::Set<A>) -> Self {
-        let mut config = AnalyzerConfig::default();
-        config.stop_words(stop_words);
-        let analyzer = Analyzer::new(config);
+impl<'s, A: AsRef<[u8]>> Highlighter<'s, A> {
+    fn new(stop_words: &'s fst::Set<A>) -> Self {
+        let mut builder = TokenizerBuilder::new();
+        builder.stop_words(stop_words);
 
-        Self { analyzer }
+        Self { tokenizer: builder.build() }
     }
 
     fn highlight_value(&self, value: Value, matcher_builder: &MatcherBuilder) -> Value {
@@ -158,9 +157,8 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
             Value::Bool(boolean) => Value::Bool(boolean),
             Value::Number(number) => Value::Number(number),
             Value::String(old_string) => {
-                let analyzed = self.analyzer.analyze(&old_string);
-                let analyzed: Vec<_> = analyzed.tokens().collect();
-                let mut matcher = matcher_builder.build(&analyzed[..], &old_string);
+                let tokens: Vec<_> = self.tokenizer.tokenize(&old_string).collect();
+                let mut matcher = matcher_builder.build(&tokens[..], &old_string);
 
                 let format_options = FormatOptions { highlight: true, crop: Some(10) };
 
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 696384a01..d19ff03a9 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -9,18 +9,18 @@ bimap = { version = "0.6.2", features = ["serde"] }
 bincode = "1.3.3"
 bstr = "0.2.17"
 byteorder = "1.4.3"
+charabia = "0.5.0"
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.2"
 either = "1.6.1"
+flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
-flatten-serde-json = { path = "../flatten-serde-json" }
-grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] }
 geoutils = "0.4.1"
+grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] }
 heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
-meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.9" }
 memmap2 = "0.5.3"
 obkv = "0.2.0"
 once_cell = "1.10.0"
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index e718dccae..f28677ed8 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -21,7 +21,7 @@ pub use filter_parser::{Condition, FilterCondition};
 use fxhash::{FxHasher32, FxHasher64};
 pub use grenad::CompressionType;
 use serde_json::{Map, Value};
-pub use {heed, meilisearch_tokenizer as tokenizer};
+pub use {charabia as tokenizer, heed};
 
 pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
 pub use self::criterion::{default_criteria, Criterion, CriterionError};
diff --git a/milli/src/search/matches/matching_words.rs b/milli/src/search/matches/matching_words.rs
index 84b47bba5..71fbfd794 100644
--- a/milli/src/search/matches/matching_words.rs
+++ b/milli/src/search/matches/matching_words.rs
@@ -3,8 +3,8 @@ use std::collections::BTreeMap;
 use std::fmt;
 use std::ops::{Index, IndexMut};
 
+use charabia::Token;
 use levenshtein_automata::{Distance, DFA};
-use meilisearch_tokenizer::Token;
 
 use crate::search::build_dfa;
 
@@ -99,13 +99,13 @@ impl MatchingWord {
 
     /// Returns the lenght in chars of the match in case of the token matches the term.
     pub fn match_token(&self, token: &Token) -> Option<usize> {
-        match self.dfa.eval(token.text()) {
+        match self.dfa.eval(token.lemma()) {
             Distance::Exact(t) if t <= self.typo => {
                 if self.prefix {
-                    let len = bytes_to_highlight(token.text(), &self.word);
-                    Some(token.num_chars_from_bytes(len))
+                    let len = bytes_to_highlight(token.lemma(), &self.word);
+                    Some(token.original_lengths(len).0)
                 } else {
-                    Some(token.num_chars_from_bytes(token.text().len()))
+                    Some(token.original_lengths(token.lemma().len()).0)
                 }
             }
             _otherwise => None,
@@ -262,7 +262,7 @@ mod tests {
     use std::borrow::Cow;
     use std::str::from_utf8;
 
-    use meilisearch_tokenizer::TokenKind;
+    use charabia::TokenKind;
 
     use super::*;
     use crate::MatchingWords;
@@ -344,11 +344,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("word"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("word"),
+                    char_end: "word".chars().count(),
                     byte_end: "word".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             Some(MatchType::Full { char_len: 3, ids: &[2] })
@@ -357,11 +356,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("nyc"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("nyc"),
+                    char_end: "nyc".chars().count(),
                     byte_end: "nyc".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             None
@@ -370,11 +368,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("world"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("world"),
+                    char_end: "world".chars().count(),
                     byte_end: "world".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             Some(MatchType::Full { char_len: 5, ids: &[2] })
@@ -383,11 +380,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("splitted"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("splitted"),
+                    char_end: "splitted".chars().count(),
                     byte_end: "splitted".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             Some(MatchType::Full { char_len: 5, ids: &[0] })
@@ -396,11 +392,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("thisnew"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("thisnew"),
+                    char_end: "thisnew".chars().count(),
                     byte_end: "thisnew".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             None
@@ -409,11 +404,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("borld"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("borld"),
+                    char_end: "borld".chars().count(),
                     byte_end: "borld".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             Some(MatchType::Full { char_len: 5, ids: &[2] })
@@ -422,11 +416,10 @@ mod tests {
             matching_words
                 .match_token(&Token {
                     kind: TokenKind::Word,
-                    word: Cow::Borrowed("wordsplit"),
-                    byte_start: 0,
-                    char_index: 0,
+                    lemma: Cow::Borrowed("wordsplit"),
+                    char_end: "wordsplit".chars().count(),
                     byte_end: "wordsplit".len(),
-                    char_map: None,
+                    ..Default::default()
                 })
                 .next(),
             Some(MatchType::Full { char_len: 4, ids: &[2] })
diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs
index d89e7dcb6..85e77e15b 100644
--- a/milli/src/search/matches/mod.rs
+++ b/milli/src/search/matches/mod.rs
@@ -1,8 +1,8 @@
 use std::borrow::Cow;
 
+use charabia::{SeparatorKind, Token};
 use matching_words::{MatchType, PartialMatch, PrimitiveWordId};
 pub use matching_words::{MatchingWord, MatchingWords};
-use meilisearch_tokenizer::token::{SeparatorKind, Token};
 use serde::Serialize;
 
 pub mod matching_words;
@@ -168,13 +168,13 @@ impl<'t> Matcher<'t, '_> {
                 let current_token_position = *token_position;
                 let current_word_position = *word_position;
                 *token_position += 1;
-                if token.is_separator().is_none() {
+                if !token.is_separator() {
                     *word_position += 1;
                 }
 
                 Some((current_token_position, current_word_position, token))
             })
-            .filter(|(_, _, token)| token.is_separator().is_none());
+            .filter(|(_, _, token)| !token.is_separator());
 
         while let Some((token_position, word_position, word)) = words_positions.next() {
             for match_type in self.matching_words.match_token(word) {
@@ -243,8 +243,8 @@ impl<'t> Matcher<'t, '_> {
         let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable();
 
         while remaining_words > 0 {
-            let before_token = before_tokens.peek().map(|t| t.is_separator());
-            let after_token = after_tokens.peek().map(|t| t.is_separator());
+            let before_token = before_tokens.peek().map(|t| t.separator_kind());
+            let after_token = after_tokens.peek().map(|t| t.separator_kind());
 
             match (before_token, after_token) {
                 // we can expand both sides.
@@ -470,7 +470,7 @@ impl<'t> Matcher<'t, '_> {
 
 #[cfg(test)]
 mod tests {
-    use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
+    use charabia::Tokenize;
 
     use super::*;
     use crate::search::matches::matching_words::MatchingWord;
@@ -490,30 +490,26 @@ mod tests {
         let matching_words = matching_words();
 
         let builder = MatcherBuilder::from_matching_words(matching_words);
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let format_options = FormatOptions { highlight: false, crop: None };
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop and no highlight should return complete text.
         assert_eq!(&matcher.format(format_options), &text);
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop and no highlight should return complete text.
         assert_eq!(&matcher.format(format_options), &text);
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop and no highlight should return complete text.
         assert_eq!(&matcher.format(format_options), &text);
@@ -524,44 +520,38 @@ mod tests {
         let matching_words = matching_words();
 
         let builder = MatcherBuilder::from_matching_words(matching_words);
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let format_options = FormatOptions { highlight: true, crop: None };
 
         // empty text.
         let text = "";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(&matcher.format(format_options), "");
 
         // text containing only separators.
         let text = ":-)";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(&matcher.format(format_options), ":-)");
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop should return complete text, because there is no matches.
         assert_eq!(&matcher.format(format_options), &text);
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop should return complete text with highlighted matches.
         assert_eq!(&matcher.format(format_options), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop should return complete text with highlighted matches.
         assert_eq!(
@@ -580,30 +570,26 @@ mod tests {
         let matching_words = MatchingWords::new(matching_words);
 
         let builder = MatcherBuilder::from_matching_words(matching_words);
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let format_options = FormatOptions { highlight: true, crop: None };
 
         // Text containing prefix match.
         let text = "Ŵôřlḑôle";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop should return complete text with highlighted matches.
         assert_eq!(&matcher.format(format_options), "<em>Ŵôřlḑ</em>ôle");
 
         // Text containing unicode match.
         let text = "Ŵôřlḑ";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop should return complete text with highlighted matches.
         assert_eq!(&matcher.format(format_options), "<em>Ŵôřlḑ</em>");
 
         // Text containing unicode match.
         let text = "Westfália";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no crop should return complete text with highlighted matches.
         assert_eq!(&matcher.format(format_options), "<em>Westfáli</em>a");
@@ -614,28 +600,24 @@ mod tests {
         let matching_words = matching_words();
 
         let builder = MatcherBuilder::from_matching_words(matching_words);
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let format_options = FormatOptions { highlight: false, crop: Some(10) };
 
         // empty text.
         let text = "";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(&matcher.format(format_options), "");
 
         // text containing only separators.
         let text = ":-)";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(&matcher.format(format_options), ":-)");
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no highlight should return 10 first words with a marker at the end.
         assert_eq!(
@@ -645,8 +627,7 @@ mod tests {
 
         // Text without any match starting by a separator.
         let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no highlight should return 10 first words with a marker at the end.
         assert_eq!(
@@ -656,19 +637,17 @@ mod tests {
 
         // Test phrase propagation
         let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // should crop the phrase instead of croping around the match.
         assert_eq!(
             &matcher.format(format_options),
-            "…Split The World is a book written by Emily Henry…"
+            "… Split The World is a book written by Emily Henry…",
         );
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no highlight should return 10 last words with a marker at the start.
         assert_eq!(
@@ -678,8 +657,7 @@ mod tests {
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // no highlight should return 10 last words with a marker at the start.
         assert_eq!(
@@ -689,8 +667,7 @@ mod tests {
 
         // Text containing a match unordered and a match ordered.
         let text = "The world split void void void void void void void void void split the world void void";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // crop should return 10 last words with a marker at the start.
         assert_eq!(
@@ -700,8 +677,7 @@ mod tests {
 
         // Text containing matches with diferent density.
         let text = "split void the void void world void void void void void void void void void void split the world void void";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // crop should return 10 last words with a marker at the start.
         assert_eq!(
@@ -711,8 +687,7 @@ mod tests {
 
         // Text containing matches with same word.
         let text = "split split split split split split void void void void void void void void void void split the world void void";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // crop should return 10 last words with a marker at the start.
         assert_eq!(
@@ -726,28 +701,24 @@ mod tests {
         let matching_words = matching_words();
 
         let builder = MatcherBuilder::from_matching_words(matching_words);
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let format_options = FormatOptions { highlight: true, crop: Some(10) };
 
         // empty text.
         let text = "";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(&matcher.format(format_options), "");
 
         // text containing only separators.
         let text = ":-)";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(&matcher.format(format_options), ":-)");
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // both should return 10 first words with a marker at the end.
         assert_eq!(
@@ -757,8 +728,7 @@ mod tests {
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // both should return 10 last words with a marker at the start and highlighted matches.
         assert_eq!(
@@ -768,16 +738,14 @@ mod tests {
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // both should return 10 last words with a marker at the start and highlighted matches.
         assert_eq!(&matcher.format(format_options), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
 
         // Text containing a match unordered and a match ordered.
         let text = "The world split void void void void void void void void void split the world void void";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
         let mut matcher = builder.build(&tokens[..], text);
         // crop should return 10 last words with a marker at the start.
         assert_eq!(
@@ -792,11 +760,9 @@ mod tests {
         let matching_words = matching_words();
 
         let builder = MatcherBuilder::from_matching_words(matching_words);
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let text = "void void split the world void void.";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
 
         // set a smaller crop size
         let format_options = FormatOptions { highlight: false, crop: Some(2) };
@@ -847,13 +813,11 @@ mod tests {
         let mut builder = MatcherBuilder::from_matching_words(matching_words);
         builder.highlight_prefix("_".to_string());
         builder.highlight_suffix("_".to_string());
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
 
         let format_options = FormatOptions { highlight: true, crop: None };
 
         let text = "the do or die can't be he do and or isn't he";
-        let analyzed = analyzer.analyze(&text);
-        let tokens: Vec<_> = analyzed.tokens().collect();
+        let tokens: Vec<_> = text.tokenize().collect();
 
         let mut matcher = builder.build(&tokens[..], text);
         assert_eq!(
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index f3f852a48..62a7815b0 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -6,12 +6,12 @@ use std::result::Result as StdResult;
 use std::str::Utf8Error;
 use std::time::Instant;
 
+use charabia::TokenizerBuilder;
 use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
 use fst::automaton::Str;
 use fst::{Automaton, IntoStreamer, Streamer};
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
 use log::debug;
-use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
 
@@ -126,14 +126,14 @@ impl<'a> Search<'a> {
                 builder.words_limit(self.words_limit);
                 // We make sure that the analyzer is aware of the stop words
                 // this ensures that the query builder is able to properly remove them.
-                let mut config = AnalyzerConfig::default();
+                let mut tokbuilder = TokenizerBuilder::new();
                 let stop_words = self.index.stop_words(self.rtxn)?;
                 if let Some(ref stop_words) = stop_words {
-                    config.stop_words(stop_words);
+                    tokbuilder.stop_words(stop_words);
                 }
-                let analyzer = Analyzer::new(config);
-                let result = analyzer.analyze(query);
-                let tokens = result.tokens();
+
+                let tokenizer = tokbuilder.build();
+                let tokens = tokenizer.tokenize(query);
                 builder
                     .build(tokens)?
                     .map_or((None, None, None), |(qt, pq, mw)| (Some(qt), Some(pq), Some(mw)))
diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs
index 76748179b..e0fac0f43 100644
--- a/milli/src/search/query_tree.rs
+++ b/milli/src/search/query_tree.rs
@@ -1,10 +1,9 @@
 use std::borrow::Cow;
 use std::{cmp, fmt, mem};
 
+use charabia::classifier::ClassifiedTokenIter;
+use charabia::{SeparatorKind, TokenKind};
 use fst::Set;
-use meilisearch_tokenizer::token::SeparatorKind;
-use meilisearch_tokenizer::tokenizer::TokenStream;
-use meilisearch_tokenizer::TokenKind;
 use roaring::RoaringBitmap;
 use slice_group_by::GroupBy;
 
@@ -235,9 +234,9 @@ impl<'a> QueryTreeBuilder<'a> {
     /// - if `authorize_typos` is set to `false` the query tree will be generated
     ///   forcing all query words to match documents without any typo
     ///   (the criterion `typo` will be ignored)
-    pub fn build(
+    pub fn build<A: AsRef<[u8]>>(
         &self,
-        query: TokenStream,
+        query: ClassifiedTokenIter<A>,
     ) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
         let stop_words = self.index.stop_words(self.rtxn)?;
         let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
@@ -649,11 +648,14 @@ impl PrimitiveQueryPart {
 
 /// Create primitive query from tokenized query string,
 /// the primitive query is an intermediate state to build the query tree.
-fn create_primitive_query(
-    query: TokenStream,
+fn create_primitive_query<A>(
+    query: ClassifiedTokenIter<A>,
     stop_words: Option<Set<&[u8]>>,
     words_limit: Option<usize>,
-) -> PrimitiveQuery {
+) -> PrimitiveQuery
+where
+    A: AsRef<[u8]>,
+{
     let mut primitive_query = Vec::new();
     let mut phrase = Vec::new();
     let mut quoted = false;
@@ -673,21 +675,18 @@ fn create_primitive_query(
                 // 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
                 // 3. if the word is the last token of the query we push it as a prefix word.
                 if quoted {
-                    phrase.push(token.word.to_string());
+                    phrase.push(token.lemma().to_string());
                 } else if peekable.peek().is_some() {
-                    if !stop_words
-                        .as_ref()
-                        .map_or(false, |swords| swords.contains(token.word.as_ref()))
-                    {
+                    if !stop_words.as_ref().map_or(false, |swords| swords.contains(token.lemma())) {
                         primitive_query
-                            .push(PrimitiveQueryPart::Word(token.word.to_string(), false));
+                            .push(PrimitiveQueryPart::Word(token.lemma().to_string(), false));
                     }
                 } else {
-                    primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true));
+                    primitive_query.push(PrimitiveQueryPart::Word(token.lemma().to_string(), true));
                 }
             }
             TokenKind::Separator(separator_kind) => {
-                let quote_count = token.word.chars().filter(|&s| s == '"').count();
+                let quote_count = token.lemma().chars().filter(|&s| s == '"').count();
                 // swap quoted state if we encounter a double quote
                 if quote_count % 2 != 0 {
                     quoted = !quoted;
@@ -738,8 +737,8 @@ pub fn maximum_proximity(operation: &Operation) -> usize {
 mod test {
     use std::collections::HashMap;
 
+    use charabia::Tokenize;
     use maplit::hashmap;
-    use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
 
@@ -754,12 +753,12 @@ mod test {
     }
 
     impl TestContext {
-        fn build(
+        fn build<A: AsRef<[u8]>>(
             &self,
             optional_words: bool,
             authorize_typos: bool,
             words_limit: Option<usize>,
-            query: TokenStream,
+            query: ClassifiedTokenIter<A>,
         ) -> Result<Option<(Operation, PrimitiveQuery)>> {
             let primitive_query = create_primitive_query(query, None, words_limit);
             if !primitive_query.is_empty() {
@@ -856,9 +855,7 @@ mod test {
     #[test]
     fn prefix() {
         let query = "hey friends";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -889,9 +886,7 @@ mod test {
     #[test]
     fn no_prefix() {
         let query = "hey friends ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -922,9 +917,7 @@ mod test {
     #[test]
     fn synonyms() {
         let query = "hello world ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -987,9 +980,7 @@ mod test {
     #[test]
     fn complex_synonyms() {
         let query = "new york city ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -1087,9 +1078,7 @@ mod test {
     #[test]
     fn ngrams() {
         let query = "n grams ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -1120,9 +1109,7 @@ mod test {
     #[test]
     fn word_split() {
         let query = "wordsplit fish ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -1159,9 +1146,7 @@ mod test {
     #[test]
     fn phrase() {
         let query = "\"hey friends\" \" \" \"wooop";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::And(vec![
             Operation::Phrase(vec!["hey".to_string(), "friends".to_string()]),
@@ -1177,9 +1162,7 @@ mod test {
     #[test]
     fn phrase_with_hard_separator() {
         let query = "\"hey friends. wooop wooop\"";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::And(vec![
             Operation::Phrase(vec!["hey".to_string(), "friends".to_string()]),
@@ -1195,9 +1178,7 @@ mod test {
     #[test]
     fn optional_word() {
         let query = "hey my friend ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             true,
@@ -1280,9 +1261,7 @@ mod test {
     #[test]
     fn optional_word_phrase() {
         let query = "\"hey my\"";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Phrase(vec!["hey".to_string(), "my".to_string()]);
         let (query_tree, _) =
@@ -1294,9 +1273,7 @@ mod test {
     #[test]
     fn optional_word_multiple_phrases() {
         let query = r#""hey" my good "friend""#;
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             true,
@@ -1365,9 +1342,7 @@ mod test {
     #[test]
     fn no_typo() {
         let query = "hey friends ";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::Or(
             false,
@@ -1397,9 +1372,7 @@ mod test {
     #[test]
     fn words_limit() {
         let query = "\"hey my\" good friend";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
-        let tokens = result.tokens();
+        let tokens = query.tokenize();
 
         let expected = Operation::And(vec![
             Operation::Phrase(vec!["hey".to_string(), "my".to_string()]),
@@ -1441,10 +1414,8 @@ mod test {
     #[test]
     fn disable_typo_on_word() {
         let query = "goodbye";
-        let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
-        let result = analyzer.analyze(query);
+        let tokens = query.tokenize();
 
-        let tokens = result.tokens();
         let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner();
         let exact_words = Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap());
         let context = TestContext { exact_words, ..Default::default() };
diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index 44bf9dbf7..9a6060805 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -3,8 +3,7 @@ use std::convert::TryInto;
 use std::fs::File;
 use std::{io, mem, str};
 
-use meilisearch_tokenizer::token::SeparatorKind;
-use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind};
+use charabia::{SeparatorKind, Token, TokenKind, TokenizerBuilder};
 use roaring::RoaringBitmap;
 use serde_json::Value;
 
@@ -40,11 +39,11 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
 
     let mut key_buffer = Vec::new();
     let mut field_buffer = String::new();
-    let mut config = AnalyzerConfig::default();
+    let mut builder = TokenizerBuilder::new();
     if let Some(stop_words) = stop_words {
-        config.stop_words(stop_words);
+        builder.stop_words(stop_words);
     }
-    let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
+    let tokenizer = builder.build();
 
     let mut cursor = obkv_documents.into_cursor()?;
     while let Some((key, value)) = cursor.move_on_next()? {
@@ -64,12 +63,11 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
                     serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
                 field_buffer.clear();
                 if let Some(field) = json_to_string(&value, &mut field_buffer) {
-                    let analyzed = analyzer.analyze(field);
-                    let tokens = process_tokens(analyzed.tokens())
+                    let tokens = process_tokens(tokenizer.tokenize(field))
                         .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
 
                     for (index, token) in tokens {
-                        let token = token.text().trim();
+                        let token = token.lemma().trim();
                         if !token.is_empty() {
                             key_buffer.truncate(mem::size_of::<u32>());
                             key_buffer.extend_from_slice(token.as_bytes());
@@ -146,7 +144,7 @@ fn process_tokens<'a>(
     tokens: impl Iterator<Item = Token<'a>>,
 ) -> impl Iterator<Item = (usize, Token<'a>)> {
     tokens
-        .skip_while(|token| token.is_separator().is_some())
+        .skip_while(|token| token.is_separator())
         .scan((0, None), |(offset, prev_kind), token| {
             match token.kind {
                 TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 829932d5c..9363d8eb6 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1,8 +1,8 @@
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::result::Result as StdResult;
 
+use charabia::{Tokenizer, TokenizerBuilder};
 use itertools::Itertools;
-use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use time::OffsetDateTime;
 
@@ -385,13 +385,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
     fn update_synonyms(&mut self) -> Result<bool> {
         match self.synonyms {
             Setting::Set(ref synonyms) => {
-                fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
-                    analyzer
-                        .analyze(text)
-                        .tokens()
+                fn normalize(tokenizer: &Tokenizer<&[u8]>, text: &str) -> Vec<String> {
+                    tokenizer
+                        .tokenize(text)
                         .filter_map(|token| {
                             if token.is_word() {
-                                Some(token.text().to_string())
+                                Some(token.lemma().to_string())
                             } else {
                                 None
                             }
@@ -399,19 +398,19 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                         .collect::<Vec<_>>()
                 }
 
-                let mut config = AnalyzerConfig::default();
+                let mut builder = TokenizerBuilder::new();
                 let stop_words = self.index.stop_words(self.wtxn)?;
-                if let Some(stop_words) = &stop_words {
-                    config.stop_words(stop_words);
+                if let Some(ref stop_words) = stop_words {
+                    builder.stop_words(stop_words);
                 }
-                let analyzer = Analyzer::new(config);
+                let tokenizer = builder.build();
 
                 let mut new_synonyms = HashMap::new();
                 for (word, synonyms) in synonyms {
                     // Normalize both the word and associated synonyms.
-                    let normalized_word = normalize(&analyzer, word);
+                    let normalized_word = normalize(&tokenizer, word);
                     let normalized_synonyms =
-                        synonyms.iter().map(|synonym| normalize(&analyzer, synonym));
+                        synonyms.iter().map(|synonym| normalize(&tokenizer, synonym));
 
                     // Store the normalized synonyms under the normalized word,
                     // merging the possible duplicate words.
@@ -584,19 +583,19 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
     fn update_exact_words(&mut self) -> Result<()> {
         match self.exact_words {
             Setting::Set(ref mut words) => {
-                fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> String {
-                    analyzer.analyze(text).tokens().map(|token| token.text().to_string()).collect()
+                fn normalize(tokenizer: &Tokenizer<&[u8]>, text: &str) -> String {
+                    tokenizer.tokenize(text).map(|token| token.lemma().to_string()).collect()
                 }
 
-                let mut config = AnalyzerConfig::default();
+                let mut builder = TokenizerBuilder::new();
                 let stop_words = self.index.stop_words(self.wtxn)?;
-                if let Some(stop_words) = &stop_words {
-                    config.stop_words(stop_words);
+                if let Some(ref stop_words) = stop_words {
+                    builder.stop_words(stop_words);
                 }
-                let analyzer = Analyzer::new(config);
+                let tokenizer = builder.build();
 
                 let mut words: Vec<_> =
-                    words.iter().map(|word| normalize(&analyzer, word)).collect();
+                    words.iter().map(|word| normalize(&tokenizer, word)).collect();
 
                 // normalization could reorder words
                 words.sort_unstable();