Merge pull request #44 from Kerollmops/real-document-id-type

Create a real DocumentId type
This commit is contained in:
Clément Renault 2018-12-24 15:41:47 +01:00 committed by GitHub
commit 62521262e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 46 additions and 29 deletions

View File

@ -10,6 +10,7 @@ use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED};
use meilidb::database::update::PositiveUpdateBuilder; use meilidb::database::update::PositiveUpdateBuilder;
use meilidb::tokenizer::DefaultBuilder; use meilidb::tokenizer::DefaultBuilder;
use meilidb::database::Database; use meilidb::database::Database;
use meilidb::DocumentId;
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
pub struct Opt { pub struct Opt {
@ -67,7 +68,7 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<D
} }
}; };
let document_id = calculate_hash(&document.id); let document_id = DocumentId(calculate_hash(&document.id));
update.update(document_id, &document).unwrap(); update.update(document_id, &document).unwrap();
} }

View File

@ -156,13 +156,16 @@ unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use std::error::Error; use std::error::Error;
use crate::DocumentId;
#[test] #[test]
fn builder_serialize_deserialize() -> Result<(), Box<Error>> { fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
let mut builder = DocIndexesBuilder::memory(); let mut builder = DocIndexesBuilder::memory();
@ -183,9 +186,9 @@ mod tests {
#[test] #[test]
fn serialize_deserialize() -> Result<(), Box<Error>> { fn serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
let mut builder = DocIndexesBuilder::memory(); let mut builder = DocIndexesBuilder::memory();

View File

@ -201,13 +201,16 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use std::error::Error; use std::error::Error;
use crate::DocumentId;
#[test] #[test]
fn serialize_deserialize() -> Result<(), Box<Error>> { fn serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();
@ -228,9 +231,9 @@ mod tests {
#[test] #[test]
fn serde_serialize_deserialize() -> Result<(), Box<Error>> { fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();

View File

@ -100,7 +100,7 @@ where D: Deref<Target=DB>
{ {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut options = ReadOptions::new(); let mut options = ReadOptions::new();
let lower = DocumentKey::new(0); let lower = DocumentKey::new(DocumentId(0));
options.set_iterate_lower_bound(lower.as_ref()); options.set_iterate_lower_bound(lower.as_ref());
let mut iter = self.snapshot.iter_opt(options); let mut iter = self.snapshot.iter_opt(options);

View File

@ -19,7 +19,7 @@ impl DocumentKey {
let mut wtr = Cursor::new(&mut buffer[..]); let mut wtr = Cursor::new(&mut buffer[..]);
wtr.write_all(b"doc-").unwrap(); wtr.write_all(b"doc-").unwrap();
wtr.write_u64::<NativeEndian>(id).unwrap(); wtr.write_u64::<NativeEndian>(id.0).unwrap();
DocumentKey(buffer) DocumentKey(buffer)
} }
@ -43,7 +43,8 @@ impl DocumentKey {
} }
pub fn document_id(&self) -> DocumentId { pub fn document_id(&self) -> DocumentId {
(&self.0[4..]).read_u64::<NativeEndian>().unwrap() let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap();
DocumentId(id)
} }
} }
@ -88,7 +89,8 @@ impl DocumentKeyAttr {
} }
pub fn document_id(&self) -> DocumentId { pub fn document_id(&self) -> DocumentId {
(&self.0[4..]).read_u64::<NativeEndian>().unwrap() let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap();
DocumentId(id)
} }
pub fn attribute(&self) -> SchemaAttr { pub fn attribute(&self) -> SchemaAttr {

View File

@ -194,6 +194,7 @@ mod tests {
use serde_derive::{Serialize, Deserialize}; use serde_derive::{Serialize, Deserialize};
use tempfile::tempdir; use tempfile::tempdir;
use crate::DocumentId;
use crate::tokenizer::DefaultBuilder; use crate::tokenizer::DefaultBuilder;
use crate::database::update::PositiveUpdateBuilder; use crate::database::update::PositiveUpdateBuilder;
use crate::database::schema::{SchemaBuilder, STORED, INDEXED}; use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
@ -238,8 +239,8 @@ mod tests {
let mut update = { let mut update = {
let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder); let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
builder.update(0, &doc0).unwrap(); builder.update(DocumentId(0), &doc0).unwrap();
builder.update(1, &doc1).unwrap(); builder.update(DocumentId(1), &doc1).unwrap();
builder.build()? builder.build()?
}; };
@ -248,8 +249,8 @@ mod tests {
database.ingest_update_file(update)?; database.ingest_update_file(update)?;
let view = database.view(); let view = database.view();
let de_doc0: SimpleDoc = view.retrieve_document(0)?; let de_doc0: SimpleDoc = view.retrieve_document(DocumentId(0))?;
let de_doc1: SimpleDoc = view.retrieve_document(1)?; let de_doc1: SimpleDoc = view.retrieve_document(DocumentId(1))?;
assert_eq!(doc0, de_doc0); assert_eq!(doc0, de_doc0);
assert_eq!(doc1, de_doc1); assert_eq!(doc1, de_doc1);

View File

@ -30,7 +30,7 @@ impl<W: io::Write> UnorderedNegativeBlobBuilder<W> {
pub fn into_inner(mut self) -> io::Result<W> { pub fn into_inner(mut self) -> io::Result<W> {
for id in self.doc_ids { for id in self.doc_ids {
self.wrt.write_u64::<NativeEndian>(id)?; self.wrt.write_u64::<NativeEndian>(id.0)?;
} }
Ok(self.wrt) Ok(self.wrt)
} }

View File

@ -11,7 +11,12 @@ pub use rocksdb;
pub use self::tokenizer::Tokenizer; pub use self::tokenizer::Tokenizer;
pub use self::common_words::CommonWords; pub use self::common_words::CommonWords;
pub type DocumentId = u64; /// Represent an internally generated document unique identifier.
///
/// It is used to inform the database the document you want to deserialize.
/// Helpful for custom ranking.
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub struct DocumentId(pub u64);
/// This structure represent the position of a word /// This structure represent the position of a word
/// in a document and its attributes. /// in a document and its attributes.

View File

@ -44,6 +44,8 @@ where D: Deref<Target=DB>
mod tests { mod tests {
use super::*; use super::*;
use crate::DocumentId;
// typing: "Geox CEO" // typing: "Geox CEO"
// //
// doc0: "Geox SpA: CEO and Executive" // doc0: "Geox SpA: CEO and Executive"
@ -56,7 +58,7 @@ mod tests {
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
]; ];
Document { Document {
id: 0, id: DocumentId(0),
matches: matches, matches: matches,
} }
}; };
@ -67,7 +69,7 @@ mod tests {
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
]; ];
Document { Document {
id: 1, id: DocumentId(1),
matches: matches, matches: matches,
} }
}; };
@ -89,7 +91,7 @@ mod tests {
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false }, Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false },
]; ];
Document { Document {
id: 0, id: DocumentId(0),
matches: matches, matches: matches,
} }
}; };
@ -99,7 +101,7 @@ mod tests {
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
]; ];
Document { Document {
id: 1, id: DocumentId(1),
matches: matches, matches: matches,
} }
}; };
@ -121,7 +123,7 @@ mod tests {
Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false }, Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false },
]; ];
Document { Document {
id: 0, id: DocumentId(0),
matches: matches, matches: matches,
} }
}; };
@ -131,7 +133,7 @@ mod tests {
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
]; ];
Document { Document {
id: 1, id: DocumentId(1),
matches: matches, matches: matches,
} }
}; };