From ac347d788c356b5ce9c181e0eb4ac45a6b125ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 10 Jan 2019 15:23:52 +0100 Subject: [PATCH] feat: Make multiple document updates shadow themselves --- src/database/serde/indexer_serializer.rs | 10 +++------- src/database/update/raw_builder.rs | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs index ae3eba436..bdbfb281d 100644 --- a/src/database/serde/indexer_serializer.rs +++ b/src/database/serde/indexer_serializer.rs @@ -49,8 +49,8 @@ where B: TokenizerBuilder } fn serialize_str(self, v: &str) -> Result { - for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) { - + for token in self.tokenizer_builder.build(v) { + let Token { word, word_index, char_index } = token; let document_id = self.document_id; // FIXME must u32::try_from instead @@ -61,15 +61,13 @@ where B: TokenizerBuilder // insert the exact representation let word_lower = word.to_lowercase(); + let length = word.chars().count() as u16; if self.stop_words.contains(&word_lower) { continue } // and the unidecoded lowercased version let word_unidecoded = unidecode::unidecode(word).to_lowercase(); if word_lower != word_unidecoded { - - // FIXME must u16/u32::try_from instead - let length = word_unidecoded.chars().count() as u16; let word_area = match WordArea::new(char_index as u32, length) { Ok(word_area) => word_area, Err(_) => return Ok(()), @@ -79,8 +77,6 @@ where B: TokenizerBuilder self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index); } - // FIXME must u16/u32::try_from instead - let length = word.chars().count() as u16; let word_area = match WordArea::new(char_index as u32, length) { Ok(word_area) => word_area, Err(_) => return Ok(()), diff --git a/src/database/update/raw_builder.rs b/src/database/update/raw_builder.rs index d116e05fe..323f55eb5 100644 --- a/src/database/update/raw_builder.rs +++ b/src/database/update/raw_builder.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::collections::btree_map::{BTreeMap, Entry}; use std::path::PathBuf; use std::error::Error; @@ -39,6 +39,10 @@ impl DocumentUpdate { pub fn remove(&mut self) { self.cleared = true; + self.clear(); + } + + pub fn clear(&mut self) { self.words_indexes.clear(); self.attributes.clear(); } @@ -61,7 +65,13 @@ impl RawUpdateBuilder { } pub fn document_update(&mut self, document_id: DocumentId) -> &mut DocumentUpdate { - self.document_updates.entry(document_id).or_insert_with(DocumentUpdate::new) + match self.document_updates.entry(document_id) { + Entry::Occupied(mut occupied) => { + occupied.get_mut().clear(); + occupied.into_mut() + }, + Entry::Vacant(vacant) => vacant.insert(DocumentUpdate::new()), + } } pub fn build(mut self) -> Result> {