chore: Move index related things to the meilidb-core workspace member

2025-07-04 12:27:13 +02:00 · 2019-02-24 19:44:24 +01:00 · 2019-02-24 19:44:24 +01:00 · 14790eeae3
commit 14790eeae3
parent 3056b351fa
44 changed files with 1343 additions and 252 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,55 +1,5 @@
-[package]
-edition = "2018"
-name = "meilidb"
-version = "0.3.2"
-authors = ["Kerollmops <renault.cle@gmail.com>"]
-
-[dependencies]
-arc-swap = "0.3.7"
-bincode = "1.1.2"
-byteorder = "1.3.1"
-fst = "0.3.3"
-hashbrown = { version = "0.1.8", features = ["serde"] }
-lazy_static = "1.2.0"
-levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
-linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
-lockfree = "0.5.1"
-log = "0.4.6"
-rayon = "1.0.3"
-sdset = "0.3.1"
-serde = "1.0.88"
-serde_derive = "1.0.88"
-serde_json = { version = "1.0.38", features = ["preserve_order"] }
-size_format = "1.0.2"
-slice-group-by = "0.2.4"
-unidecode = "0.3.0"
-
-[dependencies.toml]
-git = "https://github.com/Kerollmops/toml-rs.git"
-features = ["preserve_order"]
-rev = "0372ba6"
-
-[dependencies.rocksdb]
-git = "https://github.com/pingcap/rust-rocksdb.git"
-rev = "306e201"
-
-[features]
-default = ["simd"]
-i128 = ["bincode/i128", "byteorder/i128"]
-portable = ["rocksdb/portable"]
-simd = ["rocksdb/sse"]
-nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
-
-[dev-dependencies]
-csv = "1.0.5"
-env_logger = "0.6.0"
-jemallocator = "0.1.9"
-quickcheck = "0.8.2"
-rand = "0.6.5"
-rand_xorshift = "0.1.1"
-structopt = "0.2.14"
-tempfile = "3.0.7"
-termcolor = "1.0.4"
-
-[profile.release]
-debug = true
+[workspace]
+members = [
+    "meilidb",
+    "meilidb-core",
+]
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "meilidb-core"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+byteorder = "1.3.1"
+fst = "0.3.3"
+hashbrown = "0.1.8"
+lazy_static = "1.2.0"
+levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
+log = "0.4.6"
+rayon = "1.0.3"
+sdset = "0.3.1"
+serde = "1.0.88"
+serde_derive = "1.0.88"
+slice-group-by = "0.2.4"
+
+[features]
+i128 = ["byteorder/i128"]
--- a/meilidb-core/src/automaton.rs
+++ b/meilidb-core/src/automaton.rs
--- a/meilidb-core/src/criterion/document_id.rs
+++ b/meilidb-core/src/criterion/document_id.rs
@ -1,7 +1,6 @@
 use std::cmp::Ordering;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[derive(Debug, Clone, Copy)]
 pub struct DocumentId;
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
--- a/meilidb-core/src/criterion/mod.rs
+++ b/meilidb-core/src/criterion/mod.rs
@ -4,11 +4,11 @@ mod words_proximity;
 mod sum_of_words_attribute;
 mod sum_of_words_position;
 mod exact;
-mod sort_by_attr;
+// mod sort_by_attr;
 mod document_id;

 use std::cmp::Ordering;
-use crate::rank::RawDocument;
+use crate::RawDocument;

 pub use self::{
    sum_of_typos::SumOfTypos,
@ -17,7 +17,7 @@ pub use self::{
    sum_of_words_attribute::SumOfWordsAttribute,
    sum_of_words_position::SumOfWordsPosition,
    exact::Exact,
-    sort_by_attr::SortByAttr,
+    // sort_by_attr::SortByAttr,
    document_id::DocumentId,
 };

--- a/meilidb-core/src/criterion/number_of_words.rs
+++ b/meilidb-core/src/criterion/number_of_words.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn number_of_query_words(query_index: &[u32]) -> usize {
--- a/meilidb-core/src/criterion/sort_by_attr.rs
+++ b/meilidb-core/src/criterion/sort_by_attr.rs
@ -3,9 +3,9 @@ use std::error::Error;
 use std::fmt;

 use crate::database::schema::{Schema, SchemaAttr};
-use crate::rank::criterion::Criterion;
+use crate::criterion::Criterion;
 use crate::database::RankedMap;
-use crate::rank::RawDocument;
+use crate::RawDocument;

 /// An helper struct that permit to sort documents by
 /// some of their stored attributes.
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -2,8 +2,8 @@ use std::cmp::Ordering;

 use slice_group_by::GroupBy;

-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 // This function is a wrong logarithmic 10 function.
 // It is safe to panic on input number higher than 3,
--- a/meilidb-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilidb-core/src/criterion/sum_of_words_attribute.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
--- a/meilidb-core/src/criterion/sum_of_words_position.rs
+++ b/meilidb-core/src/criterion/sum_of_words_position.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
--- a/meilidb-core/src/criterion/words_proximity.rs
+++ b/meilidb-core/src/criterion/words_proximity.rs
@ -1,9 +1,7 @@
 use std::cmp::{self, Ordering};
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 const MAX_DISTANCE: u16 = 8;

--- a/meilidb-core/src/data/doc_ids.rs
+++ b/meilidb-core/src/data/doc_ids.rs
--- a/meilidb-core/src/data/doc_indexes.rs
+++ b/meilidb-core/src/data/doc_indexes.rs
--- a/meilidb-core/src/data/mod.rs
+++ b/meilidb-core/src/data/mod.rs
--- a/meilidb-core/src/data/shared_data.rs
+++ b/meilidb-core/src/data/shared_data.rs
--- a/meilidb-core/src/distinct_map.rs
+++ b/meilidb-core/src/distinct_map.rs
--- a/meilidb-core/src/index.rs
+++ b/meilidb-core/src/index.rs
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -1,16 +1,118 @@
 pub mod criterion;
+pub mod data;
+mod index;
+mod automaton;
 mod query_builder;
 mod distinct_map;

+pub mod shared_data_cursor;
+pub mod write_to_bytes;
+
 use std::sync::Arc;
+use serde_derive::{Serialize, Deserialize};

 use slice_group_by::GroupBy;
 use rayon::slice::ParallelSliceMut;

-use crate::{Match, DocumentId};
-
+pub use self::index::{Index, IndexBuilder};
 pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};

+/// Represent an internally generated document unique identifier.
+///
+/// It is used to inform the database the document you want to deserialize.
+/// Helpful for custom ranking.
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct DocumentId(pub u64);
+
+/// This structure represent the position of a word
+/// in a document and its attributes.
+///
+/// This is stored in the map, generated at index time,
+/// extracted and interpreted at search time.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[repr(C)]
+pub struct DocIndex {
+    /// The document identifier where the word was found.
+    pub document_id: DocumentId,
+
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+    pub word_index: u16,
+
+    /// The position in bytes where the word was found
+    /// along with the length of it.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+    pub char_length: u16,
+}
+
+/// This structure represent a matching word with informations
+/// on the location of the word in the document.
+///
+/// The order of the field is important because it defines
+/// the way these structures are ordered between themselves.
+///
+/// The word in itself is not important.
+// TODO do data oriented programming ? very arrays ?
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Match {
+    /// The word index in the query sentence.
+    /// Same as the `attribute_index` but for the query words.
+    ///
+    /// Used to retrieve the automaton that match this word.
+    pub query_index: u32,
+
+    /// The distance the word has with the query word
+    /// (i.e. the Levenshtein distance).
+    pub distance: u8,
+
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+    pub word_index: u16,
+
+    /// Whether the word that match is an exact match or a prefix.
+    pub is_exact: bool,
+
+    /// The position in bytes where the word was found
+    /// along with the length of it.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+    pub char_length: u16,
+}
+
+impl Match {
+    pub fn zero() -> Self {
+        Match {
+            query_index: 0,
+            distance: 0,
+            attribute: 0,
+            word_index: 0,
+            is_exact: false,
+            char_index: 0,
+            char_length: 0,
+        }
+    }
+
+    pub fn max() -> Self {
+        Match {
+            query_index: u32::max_value(),
+            distance: u8::max_value(),
+            attribute: u16::max_value(),
+            word_index: u16::max_value(),
+            is_exact: true,
+            char_index: u16::max_value(),
+            char_length: u16::max_value(),
+        }
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Document {
    pub id: DocumentId,
@ -181,3 +283,15 @@ impl Matches {
        }
    }
 }
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::mem;
+
+    #[test]
+    fn docindex_mem_size() {
+        assert_eq!(mem::size_of::<DocIndex>(), 24);
+    }
+}
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@ -11,11 +11,23 @@ use fst::Streamer;
 use log::info;

 use crate::automaton::{self, DfaExt, AutomatonExt};
-use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
-use crate::rank::criterion::Criteria;
-use crate::database::Index;
-use crate::rank::{raw_documents_from_matches, RawDocument, Document};
-use crate::{is_cjk, Match, DocumentId};
+use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
+use crate::criterion::Criteria;
+use crate::{raw_documents_from_matches, RawDocument, Document};
+use crate::{Index, Match, DocumentId};
+
+// query splitting must move out of this crate
+pub fn is_cjk(c: char) -> bool {
+    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
+    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
+    (c >= '\u{3040}' && c <= '\u{309f}') ||
+    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
+    (c >= '\u{3100}' && c <= '\u{312f}') ||
+    (c >= '\u{3200}' && c <= '\u{32ff}') ||
+    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
+    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
+    (c >= '\u{f900}' && c <= '\u{faff}')
+}

 #[derive(Debug, PartialEq, Eq)]
 enum CharCategory {
--- a/meilidb-core/src/shared_data_cursor.rs
+++ b/meilidb-core/src/shared_data_cursor.rs
--- a/meilidb-core/src/write_to_bytes.rs
+++ b/meilidb-core/src/write_to_bytes.rs
--- a/meilidb/Cargo.lock
+++ b/meilidb/Cargo.lock
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -0,0 +1,50 @@
+[package]
+edition = "2018"
+name = "meilidb"
+version = "0.3.1"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+
+[dependencies]
+arc-swap = "0.3.7"
+bincode = "1.1.2"
+byteorder = "1.3.1"
+fst = "0.3.3"
+hashbrown = { version = "0.1.8", features = ["serde"] }
+linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
+lockfree = "0.5.1"
+log = "0.4.6"
+sdset = "0.3.1"
+serde = "1.0.88"
+serde_derive = "1.0.88"
+serde_json = { version = "1.0.38", features = ["preserve_order"] }
+size_format = "1.0.2"
+slice-group-by = "0.2.4"
+unidecode = "0.3.0"
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+
+[dependencies.toml]
+git = "https://github.com/Kerollmops/toml-rs.git"
+features = ["preserve_order"]
+rev = "0372ba6"
+
+[dependencies.rocksdb]
+git = "https://github.com/pingcap/rust-rocksdb.git"
+rev = "306e201"
+
+[features]
+default = ["simd"]
+i128 = ["bincode/i128"]
+portable = ["rocksdb/portable"]
+simd = ["rocksdb/sse"]
+nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
+
+[dev-dependencies]
+csv = "1.0.5"
+env_logger = "0.6.0"
+jemallocator = "0.1.9"
+quickcheck = "0.8.2"
+rand = "0.6.5"
+rand_xorshift = "0.1.1"
+structopt = "0.2.14"
+tempfile = "3.0.7"
+termcolor = "1.0.4"
--- a/meilidb/src/common_words.rs
+++ b/meilidb/src/common_words.rs
--- a/meilidb/src/database/config.rs
+++ b/meilidb/src/database/config.rs
--- a/meilidb/src/database/document_key.rs
+++ b/meilidb/src/database/document_key.rs
@ -5,7 +5,7 @@ use std::fmt;
 use byteorder::{BigEndian, WriteBytesExt, ReadBytesExt};

 use crate::database::schema::SchemaAttr;
-use crate::DocumentId;
+use meilidb_core::DocumentId;

 const DOC_KEY_LEN:      usize = 4 + size_of::<u64>();
 const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u16>();
--- a/meilidb/src/database/mod.rs
+++ b/meilidb/src/database/mod.rs
@ -17,9 +17,9 @@ use hashbrown::HashMap;
 use log::{info, error, warn};

 use crate::database::schema::SchemaAttr;
-use crate::shared_data_cursor::FromSharedDataCursor;
-use crate::write_to_bytes::WriteToBytes;
-use crate::DocumentId;
+use meilidb_core::shared_data_cursor::FromSharedDataCursor;
+use meilidb_core::write_to_bytes::WriteToBytes;
+use meilidb_core::{Index, DocumentId};

 use self::update::{ReadIndexEvent, ReadRankedMapEvent};

@ -29,7 +29,6 @@ pub use self::view::{DatabaseView, DocumentIter};
 pub use self::update::Update;
 pub use self::serde::SerializerError;
 pub use self::schema::Schema;
-pub use self::index::Index;
 pub use self::number::{Number, ParseNumberError};

 pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>;
@ -41,7 +40,6 @@ const CONFIG:          &[u8] = b"config";

 pub mod config;
 pub mod schema;
-pub(crate) mod index;
 mod number;
 mod document_key;
 mod serde;
--- a/meilidb/src/database/number.rs
+++ b/meilidb/src/database/number.rs
--- a/meilidb/src/database/schema.rs
+++ b/meilidb/src/database/schema.rs
@ -10,7 +10,7 @@ use linked_hash_map::LinkedHashMap;

 use crate::database::serde::find_id::FindDocumentIdSerializer;
 use crate::database::serde::SerializerError;
-use crate::DocumentId;
+use meilidb_core::DocumentId;

 pub const STORED: SchemaProps  = SchemaProps { stored: true,  indexed: false, ranked: false };
 pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true,  ranked: false };
--- a/meilidb/src/database/serde/deserializer.rs
+++ b/meilidb/src/database/serde/deserializer.rs
@ -10,7 +10,7 @@ use serde::de::{self, Visitor, IntoDeserializer};

 use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
 use crate::database::schema::Schema;
-use crate::DocumentId;
+use meilidb_core::DocumentId;

 pub struct Deserializer<'a, D>
 where D: Deref<Target=DB>
--- a/meilidb/src/database/serde/find_id.rs
+++ b/meilidb/src/database/serde/find_id.rs
@ -3,7 +3,7 @@ use serde::ser;

 use crate::database::serde::key_to_string::KeyToStringSerializer;
 use crate::database::serde::{SerializerError, calculate_hash};
-use crate::DocumentId;
+use meilidb_core::DocumentId;

 pub struct FindDocumentIdSerializer<'a> {
    pub id_attribute_name: &'a str,
--- a/meilidb/src/database/serde/indexer_serializer.rs
+++ b/meilidb/src/database/serde/indexer_serializer.rs
@ -2,13 +2,14 @@ use std::collections::HashSet;

 use serde::Serialize;
 use serde::ser;
+use meilidb_core::{DocumentId, DocIndex};

 use crate::database::update::DocumentUpdate;
 use crate::database::serde::SerializerError;
 use crate::database::schema::SchemaAttr;
 use crate::tokenizer::TokenizerBuilder;
 use crate::tokenizer::Token;
-use crate::{is_cjk, DocumentId, DocIndex};
+use crate::is_cjk;

 pub struct IndexerSerializer<'a, 'b, B> {
    pub tokenizer_builder: &'a B,
--- a/meilidb/src/database/serde/key_to_string.rs
+++ b/meilidb/src/database/serde/key_to_string.rs
--- a/meilidb/src/database/serde/mod.rs
+++ b/meilidb/src/database/serde/mod.rs
--- a/meilidb/src/database/serde/serializer.rs
+++ b/meilidb/src/database/serde/serializer.rs
@ -10,7 +10,7 @@ use crate::database::update::DocumentUpdate;
 use crate::database::serde::SerializerError;
 use crate::tokenizer::TokenizerBuilder;
 use crate::database::schema::Schema;
-use crate::DocumentId;
+use meilidb_core::DocumentId;

 pub struct Serializer<'a, 'b, B> {
    pub schema: &'a Schema,
--- a/meilidb/src/database/serde/value_to_number.rs
+++ b/meilidb/src/database/serde/value_to_number.rs
--- a/meilidb/src/database/update/index_event.rs
+++ b/meilidb/src/database/update/index_event.rs
@ -1,11 +1,11 @@
 use std::error::Error;

 use byteorder::{ReadBytesExt, WriteBytesExt};
+use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use meilidb_core::write_to_bytes::WriteToBytes;
+use meilidb_core::data::DocIds;

-use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
-use crate::write_to_bytes::WriteToBytes;
 use crate::database::Index;
-use crate::data::DocIds;

 pub enum WriteIndexEvent<'a> {
    RemovedDocuments(&'a DocIds),
--- a/meilidb/src/database/update/mod.rs
+++ b/meilidb/src/database/update/mod.rs
@ -5,19 +5,18 @@ use rocksdb::rocksdb::{Writable, WriteBatch};
 use hashbrown::hash_map::HashMap;
 use sdset::{Set, SetBuf};
 use serde::Serialize;
+use meilidb_core::write_to_bytes::WriteToBytes;
+use meilidb_core::data::DocIds;
+use meilidb_core::{IndexBuilder, DocumentId, DocIndex};

 use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
 use crate::database::serde::serializer::Serializer;
 use crate::database::serde::SerializerError;
 use crate::database::schema::SchemaAttr;
 use crate::database::schema::Schema;
-use crate::database::index::IndexBuilder;
 use crate::database::{DATA_INDEX, DATA_RANKED_MAP};
 use crate::database::{RankedMap, Number};
 use crate::tokenizer::TokenizerBuilder;
-use crate::write_to_bytes::WriteToBytes;
-use crate::data::DocIds;
-use crate::{DocumentId, DocIndex};

 pub use self::index_event::{ReadIndexEvent, WriteIndexEvent};
 pub use self::ranked_map_event::{ReadRankedMapEvent, WriteRankedMapEvent};
--- a/meilidb/src/database/update/ranked_map_event.rs
+++ b/meilidb/src/database/update/ranked_map_event.rs
@ -1,11 +1,11 @@
 use std::error::Error;

 use byteorder::{ReadBytesExt, WriteBytesExt};
+use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use meilidb_core::write_to_bytes::WriteToBytes;
+use meilidb_core::data::DocIds;

-use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
-use crate::write_to_bytes::WriteToBytes;
 use crate::database::RankedMap;
-use crate::data::DocIds;

 pub enum WriteRankedMapEvent<'a> {
    RemovedDocuments(&'a DocIds),
--- a/meilidb/src/database/view.rs
+++ b/meilidb/src/database/view.rs
@ -6,16 +6,15 @@ use std::{fmt, marker};
 use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
 use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
 use serde::de::DeserializeOwned;
+use meilidb_core::{Index, QueryBuilder, FilterFunc};
+use meilidb_core::DocumentId;

 use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_ranked_map, retrieve_config};
 use crate::database::serde::deserializer::Deserializer;
 use crate::database::{DocumentKey, DocumentKeyAttr};
-use crate::rank::{QueryBuilder, FilterFunc};
 use crate::database::schema::Schema;
-use crate::database::index::Index;
 use crate::database::RankedMap;
 use crate::database::Config;
-use crate::DocumentId;

 pub struct DatabaseView<D>
 where D: Deref<Target=DB>
--- a/meilidb/src/lib.rs
+++ b/meilidb/src/lib.rs
@ -0,0 +1,22 @@
+#![cfg_attr(feature = "nightly", feature(test))]
+
+pub mod database;
+pub mod tokenizer;
+mod common_words;
+
+pub use rocksdb;
+
+pub use self::tokenizer::Tokenizer;
+pub use self::common_words::CommonWords;
+
+pub fn is_cjk(c: char) -> bool {
+    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
+    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
+    (c >= '\u{3040}' && c <= '\u{309f}') ||
+    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
+    (c >= '\u{3100}' && c <= '\u{312f}') ||
+    (c >= '\u{3200}' && c <= '\u{32ff}') ||
+    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
+    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
+    (c >= '\u{f900}' && c <= '\u{faff}')
+}
--- a/meilidb/src/tokenizer/mod.rs
+++ b/meilidb/src/tokenizer/mod.rs
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,136 +0,0 @@
-#![cfg_attr(feature = "nightly", feature(test))]
-
-pub mod automaton;
-pub mod database;
-pub mod data;
-pub mod rank;
-pub mod tokenizer;
-mod common_words;
-mod shared_data_cursor;
-mod write_to_bytes;
-
-use serde_derive::{Serialize, Deserialize};
-
-pub use rocksdb;
-
-pub use self::tokenizer::Tokenizer;
-pub use self::common_words::CommonWords;
-
-pub fn is_cjk(c: char) -> bool {
-    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
-    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
-    (c >= '\u{3040}' && c <= '\u{309f}') ||
-    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
-    (c >= '\u{3100}' && c <= '\u{312f}') ||
-    (c >= '\u{3200}' && c <= '\u{32ff}') ||
-    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
-    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
-    (c >= '\u{f900}' && c <= '\u{faff}')
-}
-
-/// Represent an internally generated document unique identifier.
-///
-/// It is used to inform the database the document you want to deserialize.
-/// Helpful for custom ranking.
-#[derive(Serialize, Deserialize)]
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-pub struct DocumentId(u64);
-
-/// This structure represent the position of a word
-/// in a document and its attributes.
-///
-/// This is stored in the map, generated at index time,
-/// extracted and interpreted at search time.
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(C)]
-pub struct DocIndex {
-    /// The document identifier where the word was found.
-    pub document_id: DocumentId,
-
-    /// The attribute in the document where the word was found
-    /// along with the index in it.
-    pub attribute: u16,
-    pub word_index: u16,
-
-    /// The position in bytes where the word was found
-    /// along with the length of it.
-    ///
-    /// It informs on the original word area in the text indexed
-    /// without needing to run the tokenizer again.
-    pub char_index: u16,
-    pub char_length: u16,
-}
-
-/// This structure represent a matching word with informations
-/// on the location of the word in the document.
-///
-/// The order of the field is important because it defines
-/// the way these structures are ordered between themselves.
-///
-/// The word in itself is not important.
-// TODO do data oriented programming ? very arrays ?
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Match {
-    /// The word index in the query sentence.
-    /// Same as the `attribute_index` but for the query words.
-    ///
-    /// Used to retrieve the automaton that match this word.
-    pub query_index: u32,
-
-    /// The distance the word has with the query word
-    /// (i.e. the Levenshtein distance).
-    pub distance: u8,
-
-    /// The attribute in the document where the word was found
-    /// along with the index in it.
-    pub attribute: u16,
-    pub word_index: u16,
-
-    /// Whether the word that match is an exact match or a prefix.
-    pub is_exact: bool,
-
-    /// The position in bytes where the word was found
-    /// along with the length of it.
-    ///
-    /// It informs on the original word area in the text indexed
-    /// without needing to run the tokenizer again.
-    pub char_index: u16,
-    pub char_length: u16,
-}
-
-impl Match {
-    pub fn zero() -> Self {
-        Match {
-            query_index: 0,
-            distance: 0,
-            attribute: 0,
-            word_index: 0,
-            is_exact: false,
-            char_index: 0,
-            char_length: 0,
-        }
-    }
-
-    pub fn max() -> Self {
-        Match {
-            query_index: u32::max_value(),
-            distance: u8::max_value(),
-            attribute: u16::max_value(),
-            word_index: u16::max_value(),
-            is_exact: true,
-            char_index: u16::max_value(),
-            char_length: u16::max_value(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::mem;
-
-    #[test]
-    fn docindex_mem_size() {
-        assert_eq!(mem::size_of::<DocIndex>(), 16);
-    }
-}