Merge pull request #155 from meilisearch/update-sdset

Use safest SetBuf constructor instead of new_unchecked
This commit is contained in:
Clément Renault 2019-05-21 18:23:39 +02:00 committed by GitHub
commit 34ba520f44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 9 additions and 18 deletions

View File

@ -11,7 +11,7 @@ lazy_static = "1.2.0"
log = "0.4.6" log = "0.4.6"
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
rayon = "1.0.3" rayon = "1.0.3"
sdset = "0.3.1" sdset = "0.3.2"
serde = { version = "1.0.88", features = ["derive"] } serde = { version = "1.0.88", features = ["derive"] }
slice-group-by = "0.2.4" slice-group-by = "0.2.4"
zerocopy = "0.2.2" zerocopy = "0.2.2"

View File

@ -13,7 +13,7 @@ linked-hash-map = { version = "0.5.2", features = ["serde_impl"] }
meilidb-core = { path = "../meilidb-core", version = "0.1.0" } meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
ordered-float = { version = "1.0.2", features = ["serde"] } ordered-float = { version = "1.0.2", features = ["serde"] }
sdset = "0.3.1" sdset = "0.3.2"
serde = { version = "1.0.91", features = ["derive"] } serde = { version = "1.0.91", features = ["derive"] }
serde_json = { version = "1.0.39", features = ["preserve_order"] } serde_json = { version = "1.0.39", features = ["preserve_order"] }
sled = "0.23.0" sled = "0.23.0"

View File

@ -49,11 +49,7 @@ impl<'a> DocumentsDeletion<'a> {
let schema = &lease_inner.schema; let schema = &lease_inner.schema;
let words = &lease_inner.raw.words; let words = &lease_inner.raw.words;
let idset = { let idset = SetBuf::from_dirty(self.documents);
self.documents.sort_unstable();
self.documents.dedup();
SetBuf::new_unchecked(self.documents)
};
// collect the ranked attributes according to the schema // collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema.iter() let ranked_attrs: Vec<_> = schema.iter()
@ -63,7 +59,7 @@ impl<'a> DocumentsDeletion<'a> {
.collect(); .collect();
let mut words_document_ids = HashMap::new(); let mut words_document_ids = HashMap::new();
for id in idset.into_vec() { for id in idset {
// remove all the ranked attributes from the ranked_map // remove all the ranked attributes from the ranked_map
for ranked_attr in &ranked_attrs { for ranked_attr in &ranked_attrs {
self.ranked_map.remove(id, *ranked_attr); self.ranked_map.remove(id, *ranked_attr);
@ -79,10 +75,8 @@ impl<'a> DocumentsDeletion<'a> {
} }
let mut removed_words = BTreeSet::new(); let mut removed_words = BTreeSet::new();
for (word, mut document_ids) in words_document_ids { for (word, document_ids) in words_document_ids {
document_ids.sort_unstable(); let document_ids = SetBuf::from_dirty(document_ids);
document_ids.dedup();
let document_ids = SetBuf::new_unchecked(document_ids);
if let Some(doc_indexes) = words.doc_indexes(&word)? { if let Some(doc_indexes) = words.doc_indexes(&word)? {
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id); let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
@ -96,7 +90,7 @@ impl<'a> DocumentsDeletion<'a> {
} }
} }
for id in document_ids.into_vec() { for id in document_ids {
documents.del_all_document_fields(id)?; documents.del_all_document_fields(id)?;
docs_words.del_doc_words(id)?; docs_words.del_doc_words(id)?;
} }

View File

@ -70,11 +70,8 @@ impl Indexer {
pub fn build(self) -> Indexed { pub fn build(self) -> Indexed {
let words_doc_indexes = self.words_doc_indexes let words_doc_indexes = self.words_doc_indexes
.into_iter() .into_iter()
.map(|(word, mut indexes)| { .map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
indexes.sort_unstable(); .collect();
indexes.dedup();
(word, SetBuf::new_unchecked(indexes))
}).collect();
let docs_words = self.docs_words let docs_words = self.docs_words
.into_iter() .into_iter()