diff --git a/examples/query-database.rs b/examples/query-database.rs
index e61e2d0ab..0a8771a51 100644
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@@ -48,6 +48,24 @@ fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
     Ok(())
 }
 
+fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
+    let mut byte_index = 0;
+    let mut byte_length = 0;
+
+    for (n, (i, c)) in text.char_indices().enumerate() {
+        if n == index {
+            byte_index = i;
+        }
+
+        if n + 1 == index + length {
+            byte_length = i - byte_index + c.len_utf8();
+            break;
+        }
+    }
+
+    (byte_index, byte_length)
+}
+
 fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr) -> Vec<usize> {
     let mut byte_indexes = BTreeMap::new();
 
@@ -55,11 +73,18 @@ fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr)
         let match_attribute = match_.attribute.attribute();
         if SchemaAttr::new(match_attribute) == attribute {
             let word_area = match_.word_area;
-            let byte_index = word_area.byte_index() as usize;
-            let length = word_area.length() as usize;
+
+            let char_index = word_area.char_index() as usize;
+            let char_length = word_area.length() as usize;
+            let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
+
             match byte_indexes.entry(byte_index) {
-                Entry::Vacant(entry) => { entry.insert(length); },
-                Entry::Occupied(mut entry) => if *entry.get() < length { entry.insert(length); },
+                Entry::Vacant(entry) => { entry.insert(byte_length); },
+                Entry::Occupied(mut entry) => {
+                    if *entry.get() < byte_length {
+                        entry.insert(byte_length);
+                    }
+                },
             }
         }
     }
diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs
index 7bbcca7e7..ae3eba436 100644
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@@ -51,24 +51,14 @@ where B: TokenizerBuilder
     fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
         for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) {
 
+            let document_id = self.document_id;
+
             // FIXME must u32::try_from instead
             let attribute = match Attribute::new(self.attribute.0, word_index as u32) {
                 Ok(attribute) => attribute,
                 Err(_) => return Ok(()),
             };
 
-            // FIXME must u16/u32::try_from instead
-            let word_area = match WordArea::new(char_index as u32, word.len() as u16) {
-                Ok(word_area) => word_area,
-                Err(_) => return Ok(()),
-            };
-
-            let doc_index = DocIndex {
-                document_id: self.document_id,
-                attribute,
-                word_area
-            };
-
             // insert the exact representation
             let word_lower = word.to_lowercase();
 
@@ -77,9 +67,26 @@ where B: TokenizerBuilder
             // and the unidecoded lowercased version
             let word_unidecoded = unidecode::unidecode(word).to_lowercase();
             if word_lower != word_unidecoded {
+
+                // FIXME must u16/u32::try_from instead
+                let length = word_unidecoded.chars().count() as u16;
+                let word_area = match WordArea::new(char_index as u32, length) {
+                    Ok(word_area) => word_area,
+                    Err(_) => return Ok(()),
+                };
+
+                let doc_index = DocIndex { document_id, attribute, word_area };
                 self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
             }
 
+            // FIXME must u16/u32::try_from instead
+            let length = word.chars().count() as u16;
+            let word_area = match WordArea::new(char_index as u32, length) {
+                Ok(word_area) => word_area,
+                Err(_) => return Ok(()),
+            };
+
+            let doc_index = DocIndex { document_id, attribute, word_area };
             self.update.insert_doc_index(word_lower.into_bytes(), doc_index);
         }
         Ok(())
diff --git a/src/lib.rs b/src/lib.rs
index 2a241a2ac..03c2a200d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -97,15 +97,15 @@ enum AttributeError {
 pub struct WordArea(u32);
 
 impl WordArea {
-    /// Construct a `WordArea` from a word position in bytes
-    /// and the length of it.
+    /// Construct a `WordArea` from a word position in expresed as
+    /// a number of characters and the length of it.
     ///
     /// # Panics
     ///
-    /// The byte index must not be greater than 2^22
+    /// The char index must not be greater than 2^22
     /// and the length not greater than 1024.
-    fn new(byte_index: u32, length: u16) -> Result<WordArea, WordAreaError> {
-        if byte_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
+    fn new(char_index: u32, length: u16) -> Result<WordArea, WordAreaError> {
+        if char_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
             return Err(WordAreaError::ByteIndexTooBig)
         }
 
@@ -113,12 +113,12 @@ impl WordArea {
             return Err(WordAreaError::LengthTooBig)
         }
 
-        let byte_index = byte_index << 10;
-        Ok(WordArea(byte_index | u32::from(length)))
+        let char_index = char_index << 10;
+        Ok(WordArea(char_index | u32::from(length)))
     }
 
-    fn new_faillible(byte_index: u32, length: u16) -> WordArea {
-        match WordArea::new(byte_index, length) {
+    fn new_faillible(char_index: u32, length: u16) -> WordArea {
+        match WordArea::new(char_index, length) {
             Ok(word_area) => word_area,
             Err(WordAreaError::ByteIndexTooBig) => {
                 panic!("word area byte index must not be greater than 2^22")
@@ -130,7 +130,7 @@ impl WordArea {
     }
 
     #[inline]
-    pub fn byte_index(self) -> u32 {
+    pub fn char_index(self) -> u32 {
         self.0 >> 10
     }
 
@@ -143,7 +143,7 @@ impl WordArea {
 impl fmt::Debug for WordArea {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.debug_struct("WordArea")
-            .field("byte_index", &self.byte_index())
+            .field("char_index", &self.char_index())
             .field("length", &self.length())
             .finish()
     }
@@ -270,26 +270,26 @@ mod tests {
             TestResult::from_bool(a < b)
         }
 
-        fn qc_word_area(gen_byte_index: u32, gen_length: u16) -> TestResult {
-            if gen_byte_index > 2_u32.pow(22) || gen_length > 2_u16.pow(10) {
+        fn qc_word_area(gen_char_index: u32, gen_length: u16) -> TestResult {
+            if gen_char_index > 2_u32.pow(22) || gen_length > 2_u16.pow(10) {
                 return TestResult::discard()
             }
 
-            let word_area = WordArea::new_faillible(gen_byte_index, gen_length);
+            let word_area = WordArea::new_faillible(gen_char_index, gen_length);
 
-            let valid_char_index = word_area.byte_index() == gen_byte_index;
+            let valid_char_index = word_area.char_index() == gen_char_index;
             let valid_length = word_area.length() == gen_length;
 
             TestResult::from_bool(valid_char_index && valid_length)
         }
 
-        fn qc_word_area_ord(gen_byte_index: u32, gen_length: u16) -> TestResult {
-            if gen_byte_index >= 2_u32.pow(22) || gen_length >= 2_u16.pow(10) {
+        fn qc_word_area_ord(gen_char_index: u32, gen_length: u16) -> TestResult {
+            if gen_char_index >= 2_u32.pow(22) || gen_length >= 2_u16.pow(10) {
                 return TestResult::discard()
             }
 
-            let a = WordArea::new_faillible(gen_byte_index, gen_length);
-            let b = WordArea::new_faillible(gen_byte_index + 1, gen_length + 1);
+            let a = WordArea::new_faillible(gen_char_index, gen_length);
+            let b = WordArea::new_faillible(gen_char_index + 1, gen_length + 1);
 
             TestResult::from_bool(a < b)
         }
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index 79794f6d8..a2910728d 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -96,7 +96,7 @@ impl<'a> Iterator for Tokenizer<'a> {
                         let (spaces, word) = prefix.split_at(start_word);
 
                         self.inner = tail;
-                        self.char_index += spaces.len();
+                        self.char_index += spaces.chars().count();
                         self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
 
                         let token = Token {
@@ -105,7 +105,7 @@ impl<'a> Iterator for Tokenizer<'a> {
                             char_index: self.char_index,
                         };
 
-                        self.char_index += word.len();
+                        self.char_index += word.chars().count();
                         return Some(token)
                     }
 
@@ -122,7 +122,7 @@ impl<'a> Iterator for Tokenizer<'a> {
             let token = Token {
                 word: word,
                 word_index: self.word_index + distance.map(Separator::to_usize).unwrap_or(0),
-                char_index: self.char_index + spaces.len(),
+                char_index: self.char_index + spaces.chars().count(),
             };
             return Some(token)
         }
@@ -173,7 +173,7 @@ mod tests {
 
         assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
         assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
         assert_eq!(tokenizer.next(), None);
 
         let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
@@ -181,8 +181,8 @@ mod tests {
         assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
         assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
         assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 19 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 25 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
         assert_eq!(tokenizer.next(), None);
     }
 }