mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Merge pull request #44 from Kerollmops/real-document-id-type
Create a real DocumentId type
This commit is contained in:
commit
62521262e8
@ -10,6 +10,7 @@ use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED};
|
|||||||
use meilidb::database::update::PositiveUpdateBuilder;
|
use meilidb::database::update::PositiveUpdateBuilder;
|
||||||
use meilidb::tokenizer::DefaultBuilder;
|
use meilidb::tokenizer::DefaultBuilder;
|
||||||
use meilidb::database::Database;
|
use meilidb::database::Database;
|
||||||
|
use meilidb::DocumentId;
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
pub struct Opt {
|
pub struct Opt {
|
||||||
@ -67,7 +68,7 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<D
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let document_id = calculate_hash(&document.id);
|
let document_id = DocumentId(calculate_hash(&document.id));
|
||||||
update.update(document_id, &document).unwrap();
|
update.update(document_id, &document).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,13 +156,16 @@ unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
|
use crate::DocumentId;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
|
fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
|
||||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||||
|
|
||||||
let mut builder = DocIndexesBuilder::memory();
|
let mut builder = DocIndexesBuilder::memory();
|
||||||
|
|
||||||
@ -183,9 +186,9 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||||
|
|
||||||
let mut builder = DocIndexesBuilder::memory();
|
let mut builder = DocIndexesBuilder::memory();
|
||||||
|
|
||||||
|
@ -201,13 +201,16 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
|
use crate::DocumentId;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||||
|
|
||||||
let mut builder = PositiveBlobBuilder::memory();
|
let mut builder = PositiveBlobBuilder::memory();
|
||||||
|
|
||||||
@ -228,9 +231,9 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
|
fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
|
||||||
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 };
|
||||||
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 };
|
||||||
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 };
|
||||||
|
|
||||||
let mut builder = PositiveBlobBuilder::memory();
|
let mut builder = PositiveBlobBuilder::memory();
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ where D: Deref<Target=DB>
|
|||||||
{
|
{
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let mut options = ReadOptions::new();
|
let mut options = ReadOptions::new();
|
||||||
let lower = DocumentKey::new(0);
|
let lower = DocumentKey::new(DocumentId(0));
|
||||||
options.set_iterate_lower_bound(lower.as_ref());
|
options.set_iterate_lower_bound(lower.as_ref());
|
||||||
|
|
||||||
let mut iter = self.snapshot.iter_opt(options);
|
let mut iter = self.snapshot.iter_opt(options);
|
||||||
|
@ -19,7 +19,7 @@ impl DocumentKey {
|
|||||||
|
|
||||||
let mut wtr = Cursor::new(&mut buffer[..]);
|
let mut wtr = Cursor::new(&mut buffer[..]);
|
||||||
wtr.write_all(b"doc-").unwrap();
|
wtr.write_all(b"doc-").unwrap();
|
||||||
wtr.write_u64::<NativeEndian>(id).unwrap();
|
wtr.write_u64::<NativeEndian>(id.0).unwrap();
|
||||||
|
|
||||||
DocumentKey(buffer)
|
DocumentKey(buffer)
|
||||||
}
|
}
|
||||||
@ -43,7 +43,8 @@ impl DocumentKey {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn document_id(&self) -> DocumentId {
|
pub fn document_id(&self) -> DocumentId {
|
||||||
(&self.0[4..]).read_u64::<NativeEndian>().unwrap()
|
let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap();
|
||||||
|
DocumentId(id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +89,8 @@ impl DocumentKeyAttr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn document_id(&self) -> DocumentId {
|
pub fn document_id(&self) -> DocumentId {
|
||||||
(&self.0[4..]).read_u64::<NativeEndian>().unwrap()
|
let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap();
|
||||||
|
DocumentId(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn attribute(&self) -> SchemaAttr {
|
pub fn attribute(&self) -> SchemaAttr {
|
||||||
|
@ -194,6 +194,7 @@ mod tests {
|
|||||||
use serde_derive::{Serialize, Deserialize};
|
use serde_derive::{Serialize, Deserialize};
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
use crate::DocumentId;
|
||||||
use crate::tokenizer::DefaultBuilder;
|
use crate::tokenizer::DefaultBuilder;
|
||||||
use crate::database::update::PositiveUpdateBuilder;
|
use crate::database::update::PositiveUpdateBuilder;
|
||||||
use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
|
use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
|
||||||
@ -238,8 +239,8 @@ mod tests {
|
|||||||
let mut update = {
|
let mut update = {
|
||||||
let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
|
let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
|
||||||
|
|
||||||
builder.update(0, &doc0).unwrap();
|
builder.update(DocumentId(0), &doc0).unwrap();
|
||||||
builder.update(1, &doc1).unwrap();
|
builder.update(DocumentId(1), &doc1).unwrap();
|
||||||
|
|
||||||
builder.build()?
|
builder.build()?
|
||||||
};
|
};
|
||||||
@ -248,8 +249,8 @@ mod tests {
|
|||||||
database.ingest_update_file(update)?;
|
database.ingest_update_file(update)?;
|
||||||
let view = database.view();
|
let view = database.view();
|
||||||
|
|
||||||
let de_doc0: SimpleDoc = view.retrieve_document(0)?;
|
let de_doc0: SimpleDoc = view.retrieve_document(DocumentId(0))?;
|
||||||
let de_doc1: SimpleDoc = view.retrieve_document(1)?;
|
let de_doc1: SimpleDoc = view.retrieve_document(DocumentId(1))?;
|
||||||
|
|
||||||
assert_eq!(doc0, de_doc0);
|
assert_eq!(doc0, de_doc0);
|
||||||
assert_eq!(doc1, de_doc1);
|
assert_eq!(doc1, de_doc1);
|
||||||
|
@ -30,7 +30,7 @@ impl<W: io::Write> UnorderedNegativeBlobBuilder<W> {
|
|||||||
|
|
||||||
pub fn into_inner(mut self) -> io::Result<W> {
|
pub fn into_inner(mut self) -> io::Result<W> {
|
||||||
for id in self.doc_ids {
|
for id in self.doc_ids {
|
||||||
self.wrt.write_u64::<NativeEndian>(id)?;
|
self.wrt.write_u64::<NativeEndian>(id.0)?;
|
||||||
}
|
}
|
||||||
Ok(self.wrt)
|
Ok(self.wrt)
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,12 @@ pub use rocksdb;
|
|||||||
pub use self::tokenizer::Tokenizer;
|
pub use self::tokenizer::Tokenizer;
|
||||||
pub use self::common_words::CommonWords;
|
pub use self::common_words::CommonWords;
|
||||||
|
|
||||||
pub type DocumentId = u64;
|
/// Represent an internally generated document unique identifier.
|
||||||
|
///
|
||||||
|
/// It is used to inform the database the document you want to deserialize.
|
||||||
|
/// Helpful for custom ranking.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct DocumentId(pub u64);
|
||||||
|
|
||||||
/// This structure represent the position of a word
|
/// This structure represent the position of a word
|
||||||
/// in a document and its attributes.
|
/// in a document and its attributes.
|
||||||
|
@ -44,6 +44,8 @@ where D: Deref<Target=DB>
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
use crate::DocumentId;
|
||||||
|
|
||||||
// typing: "Geox CEO"
|
// typing: "Geox CEO"
|
||||||
//
|
//
|
||||||
// doc0: "Geox SpA: CEO and Executive"
|
// doc0: "Geox SpA: CEO and Executive"
|
||||||
@ -56,7 +58,7 @@ mod tests {
|
|||||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
||||||
];
|
];
|
||||||
Document {
|
Document {
|
||||||
id: 0,
|
id: DocumentId(0),
|
||||||
matches: matches,
|
matches: matches,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -67,7 +69,7 @@ mod tests {
|
|||||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
||||||
];
|
];
|
||||||
Document {
|
Document {
|
||||||
id: 1,
|
id: DocumentId(1),
|
||||||
matches: matches,
|
matches: matches,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -89,7 +91,7 @@ mod tests {
|
|||||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false },
|
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false },
|
||||||
];
|
];
|
||||||
Document {
|
Document {
|
||||||
id: 0,
|
id: DocumentId(0),
|
||||||
matches: matches,
|
matches: matches,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -99,7 +101,7 @@ mod tests {
|
|||||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||||
];
|
];
|
||||||
Document {
|
Document {
|
||||||
id: 1,
|
id: DocumentId(1),
|
||||||
matches: matches,
|
matches: matches,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -121,7 +123,7 @@ mod tests {
|
|||||||
Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false },
|
Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false },
|
||||||
];
|
];
|
||||||
Document {
|
Document {
|
||||||
id: 0,
|
id: DocumentId(0),
|
||||||
matches: matches,
|
matches: matches,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -131,7 +133,7 @@ mod tests {
|
|||||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||||
];
|
];
|
||||||
Document {
|
Document {
|
||||||
id: 1,
|
id: DocumentId(1),
|
||||||
matches: matches,
|
matches: matches,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user