From 4ebae7784cc9872eec268e170ed83de05936de99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 22 Dec 2018 12:00:24 +0100 Subject: [PATCH] feat: Create a strong DocumentId type Forcing it to be something internal will permit to avoid possible miss comparisons to be done with other types. --- examples/create-database.rs | 3 ++- src/data/doc_indexes.rs | 15 +++++++++------ src/database/blob/positive/blob.rs | 15 +++++++++------ src/database/database_view.rs | 2 +- src/database/document_key.rs | 8 +++++--- src/database/mod.rs | 9 +++++---- src/database/update/negative/unordered_builder.rs | 2 +- src/lib.rs | 7 ++++++- src/rank/criterion/sum_of_typos.rs | 14 ++++++++------ 9 files changed, 46 insertions(+), 29 deletions(-) diff --git a/examples/create-database.rs b/examples/create-database.rs index b66bf7f5b..e7a8e72e4 100644 --- a/examples/create-database.rs +++ b/examples/create-database.rs @@ -10,6 +10,7 @@ use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED}; use meilidb::database::update::PositiveUpdateBuilder; use meilidb::tokenizer::DefaultBuilder; use meilidb::database::Database; +use meilidb::DocumentId; #[derive(Debug, StructOpt)] pub struct Opt { @@ -67,7 +68,7 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result(slice: &[T]) -> &[u8] { #[cfg(test)] mod tests { use super::*; + use std::error::Error; + use crate::DocumentId; + #[test] fn builder_serialize_deserialize() -> Result<(), Box> { - let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; - let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; - let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; + let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; + let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; + let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; let mut builder = DocIndexesBuilder::memory(); @@ -183,9 +186,9 @@ mod tests { #[test] fn serialize_deserialize() -> Result<(), Box> { - let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; - let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; - let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; + let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; + let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; + let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; let mut builder = DocIndexesBuilder::memory(); diff --git a/src/database/blob/positive/blob.rs b/src/database/blob/positive/blob.rs index fd6f2251d..3687bc1bb 100644 --- a/src/database/blob/positive/blob.rs +++ b/src/database/blob/positive/blob.rs @@ -201,13 +201,16 @@ impl PositiveBlobBuilder { #[cfg(test)] mod tests { use super::*; + use std::error::Error; + use crate::DocumentId; + #[test] fn serialize_deserialize() -> Result<(), Box> { - let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; - let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; - let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; + let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; + let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; + let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; let mut builder = PositiveBlobBuilder::memory(); @@ -228,9 +231,9 @@ mod tests { #[test] fn serde_serialize_deserialize() -> Result<(), Box> { - let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; - let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; - let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; + let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; + let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; + let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; let mut builder = PositiveBlobBuilder::memory(); diff --git a/src/database/database_view.rs b/src/database/database_view.rs index f43d65439..c8eed37c6 100644 --- a/src/database/database_view.rs +++ b/src/database/database_view.rs @@ -100,7 +100,7 @@ where D: Deref { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut options = ReadOptions::new(); - let lower = DocumentKey::new(0); + let lower = DocumentKey::new(DocumentId(0)); options.set_iterate_lower_bound(lower.as_ref()); let mut iter = self.snapshot.iter_opt(options); diff --git a/src/database/document_key.rs b/src/database/document_key.rs index 529bc1b75..9104df5f6 100644 --- a/src/database/document_key.rs +++ b/src/database/document_key.rs @@ -19,7 +19,7 @@ impl DocumentKey { let mut wtr = Cursor::new(&mut buffer[..]); wtr.write_all(b"doc-").unwrap(); - wtr.write_u64::(id).unwrap(); + wtr.write_u64::(id.0).unwrap(); DocumentKey(buffer) } @@ -43,7 +43,8 @@ impl DocumentKey { } pub fn document_id(&self) -> DocumentId { - (&self.0[4..]).read_u64::().unwrap() + let id = (&self.0[4..]).read_u64::().unwrap(); + DocumentId(id) } } @@ -88,7 +89,8 @@ impl DocumentKeyAttr { } pub fn document_id(&self) -> DocumentId { - (&self.0[4..]).read_u64::().unwrap() + let id = (&self.0[4..]).read_u64::().unwrap(); + DocumentId(id) } pub fn attribute(&self) -> SchemaAttr { diff --git a/src/database/mod.rs b/src/database/mod.rs index 99fc5228e..2351c658c 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -194,6 +194,7 @@ mod tests { use serde_derive::{Serialize, Deserialize}; use tempfile::tempdir; + use crate::DocumentId; use crate::tokenizer::DefaultBuilder; use crate::database::update::PositiveUpdateBuilder; use crate::database::schema::{SchemaBuilder, STORED, INDEXED}; @@ -238,8 +239,8 @@ mod tests { let mut update = { let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder); - builder.update(0, &doc0).unwrap(); - builder.update(1, &doc1).unwrap(); + builder.update(DocumentId(0), &doc0).unwrap(); + builder.update(DocumentId(1), &doc1).unwrap(); builder.build()? }; @@ -248,8 +249,8 @@ mod tests { database.ingest_update_file(update)?; let view = database.view(); - let de_doc0: SimpleDoc = view.retrieve_document(0)?; - let de_doc1: SimpleDoc = view.retrieve_document(1)?; + let de_doc0: SimpleDoc = view.retrieve_document(DocumentId(0))?; + let de_doc1: SimpleDoc = view.retrieve_document(DocumentId(1))?; assert_eq!(doc0, de_doc0); assert_eq!(doc1, de_doc1); diff --git a/src/database/update/negative/unordered_builder.rs b/src/database/update/negative/unordered_builder.rs index b73ecd2e3..4278e6974 100644 --- a/src/database/update/negative/unordered_builder.rs +++ b/src/database/update/negative/unordered_builder.rs @@ -30,7 +30,7 @@ impl UnorderedNegativeBlobBuilder { pub fn into_inner(mut self) -> io::Result { for id in self.doc_ids { - self.wrt.write_u64::(id)?; + self.wrt.write_u64::(id.0)?; } Ok(self.wrt) } diff --git a/src/lib.rs b/src/lib.rs index 655a234ca..d95dcc2ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,12 @@ pub use rocksdb; pub use self::tokenizer::Tokenizer; pub use self::common_words::CommonWords; -pub type DocumentId = u64; +/// Represent an internally generated document unique identifier. +/// +/// It is used to inform the database the document you want to deserialize. +/// Helpful for custom ranking. +#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct DocumentId(pub u64); /// This structure represent the position of a word /// in a document and its attributes. diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index a7074bd86..3af339233 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -44,6 +44,8 @@ where D: Deref mod tests { use super::*; + use crate::DocumentId; + // typing: "Geox CEO" // // doc0: "Geox SpA: CEO and Executive" @@ -56,7 +58,7 @@ mod tests { Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, ]; Document { - id: 0, + id: DocumentId(0), matches: matches, } }; @@ -67,7 +69,7 @@ mod tests { Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, ]; Document { - id: 1, + id: DocumentId(1), matches: matches, } }; @@ -89,7 +91,7 @@ mod tests { Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false }, ]; Document { - id: 0, + id: DocumentId(0), matches: matches, } }; @@ -99,7 +101,7 @@ mod tests { Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, ]; Document { - id: 1, + id: DocumentId(1), matches: matches, } }; @@ -121,7 +123,7 @@ mod tests { Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false }, ]; Document { - id: 0, + id: DocumentId(0), matches: matches, } }; @@ -131,7 +133,7 @@ mod tests { Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, ]; Document { - id: 1, + id: DocumentId(1), matches: matches, } };