mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge pull request #44 from Kerollmops/real-document-id-type
Create a real DocumentId type
This commit is contained in:
commit
62521262e8
@ -10,6 +10,7 @@ use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED};
|
||||
use meilidb::database::update::PositiveUpdateBuilder;
|
||||
use meilidb::tokenizer::DefaultBuilder;
|
||||
use meilidb::database::Database;
|
||||
use meilidb::DocumentId;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
pub struct Opt {
|
||||
@ -67,7 +68,7 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<D
|
||||
}
|
||||
};
|
||||
|
||||
let document_id = calculate_hash(&document.id);
|
||||
let document_id = DocumentId(calculate_hash(&document.id));
|
||||
update.update(document_id, &document).unwrap();
|
||||
}
|
||||
|
||||
|
@ -156,13 +156,16 @@ unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use crate::DocumentId;
|
||||
|
||||
#[test]
|
||||
fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||
|
||||
let mut builder = DocIndexesBuilder::memory();
|
||||
|
||||
@ -183,9 +186,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||
|
||||
let mut builder = DocIndexesBuilder::memory();
|
||||
|
||||
|
@ -201,13 +201,16 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use crate::DocumentId;
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
|
||||
@ -228,9 +231,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
|
||||
|
@ -100,7 +100,7 @@ where D: Deref<Target=DB>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut options = ReadOptions::new();
|
||||
let lower = DocumentKey::new(0);
|
||||
let lower = DocumentKey::new(DocumentId(0));
|
||||
options.set_iterate_lower_bound(lower.as_ref());
|
||||
|
||||
let mut iter = self.snapshot.iter_opt(options);
|
||||
|
@ -19,7 +19,7 @@ impl DocumentKey {
|
||||
|
||||
let mut wtr = Cursor::new(&mut buffer[..]);
|
||||
wtr.write_all(b"doc-").unwrap();
|
||||
wtr.write_u64::<NativeEndian>(id).unwrap();
|
||||
wtr.write_u64::<NativeEndian>(id.0).unwrap();
|
||||
|
||||
DocumentKey(buffer)
|
||||
}
|
||||
@ -43,7 +43,8 @@ impl DocumentKey {
|
||||
}
|
||||
|
||||
pub fn document_id(&self) -> DocumentId {
|
||||
(&self.0[4..]).read_u64::<NativeEndian>().unwrap()
|
||||
let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap();
|
||||
DocumentId(id)
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,7 +89,8 @@ impl DocumentKeyAttr {
|
||||
}
|
||||
|
||||
pub fn document_id(&self) -> DocumentId {
|
||||
(&self.0[4..]).read_u64::<NativeEndian>().unwrap()
|
||||
let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap();
|
||||
DocumentId(id)
|
||||
}
|
||||
|
||||
pub fn attribute(&self) -> SchemaAttr {
|
||||
|
@ -194,6 +194,7 @@ mod tests {
|
||||
use serde_derive::{Serialize, Deserialize};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::DocumentId;
|
||||
use crate::tokenizer::DefaultBuilder;
|
||||
use crate::database::update::PositiveUpdateBuilder;
|
||||
use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
|
||||
@ -238,8 +239,8 @@ mod tests {
|
||||
let mut update = {
|
||||
let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
|
||||
|
||||
builder.update(0, &doc0).unwrap();
|
||||
builder.update(1, &doc1).unwrap();
|
||||
builder.update(DocumentId(0), &doc0).unwrap();
|
||||
builder.update(DocumentId(1), &doc1).unwrap();
|
||||
|
||||
builder.build()?
|
||||
};
|
||||
@ -248,8 +249,8 @@ mod tests {
|
||||
database.ingest_update_file(update)?;
|
||||
let view = database.view();
|
||||
|
||||
let de_doc0: SimpleDoc = view.retrieve_document(0)?;
|
||||
let de_doc1: SimpleDoc = view.retrieve_document(1)?;
|
||||
let de_doc0: SimpleDoc = view.retrieve_document(DocumentId(0))?;
|
||||
let de_doc1: SimpleDoc = view.retrieve_document(DocumentId(1))?;
|
||||
|
||||
assert_eq!(doc0, de_doc0);
|
||||
assert_eq!(doc1, de_doc1);
|
||||
|
@ -30,7 +30,7 @@ impl<W: io::Write> UnorderedNegativeBlobBuilder<W> {
|
||||
|
||||
pub fn into_inner(mut self) -> io::Result<W> {
|
||||
for id in self.doc_ids {
|
||||
self.wrt.write_u64::<NativeEndian>(id)?;
|
||||
self.wrt.write_u64::<NativeEndian>(id.0)?;
|
||||
}
|
||||
Ok(self.wrt)
|
||||
}
|
||||
|
@ -11,7 +11,12 @@ pub use rocksdb;
|
||||
pub use self::tokenizer::Tokenizer;
|
||||
pub use self::common_words::CommonWords;
|
||||
|
||||
pub type DocumentId = u64;
|
||||
/// Represent an internally generated document unique identifier.
|
||||
///
|
||||
/// It is used to inform the database the document you want to deserialize.
|
||||
/// Helpful for custom ranking.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
pub struct DocumentId(pub u64);
|
||||
|
||||
/// This structure represent the position of a word
|
||||
/// in a document and its attributes.
|
||||
|
@ -44,6 +44,8 @@ where D: Deref<Target=DB>
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::DocumentId;
|
||||
|
||||
// typing: "Geox CEO"
|
||||
//
|
||||
// doc0: "Geox SpA: CEO and Executive"
|
||||
@ -56,7 +58,7 @@ mod tests {
|
||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
id: 0,
|
||||
id: DocumentId(0),
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
@ -67,7 +69,7 @@ mod tests {
|
||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
id: 1,
|
||||
id: DocumentId(1),
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
@ -89,7 +91,7 @@ mod tests {
|
||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
id: 0,
|
||||
id: DocumentId(0),
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
@ -99,7 +101,7 @@ mod tests {
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
id: 1,
|
||||
id: DocumentId(1),
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
@ -121,7 +123,7 @@ mod tests {
|
||||
Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
id: 0,
|
||||
id: DocumentId(0),
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
@ -131,7 +133,7 @@ mod tests {
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
id: 1,
|
||||
id: DocumentId(1),
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user