chore: Move index related things to the meilidb-core workspace member

2025-06-14 20:11:38 +02:00 · 2019-02-24 19:44:24 +01:00 · 2019-02-24 19:44:24 +01:00 · 14790eeae3
commit 14790eeae3
parent 3056b351fa
44 changed files with 1343 additions and 252 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,55 +1,5 @@
-[package]
+[workspace]
-edition = "2018"
+members = [
-name = "meilidb"
+    "meilidb",
-version = "0.3.2"
+    "meilidb-core",
-authors = ["Kerollmops <renault.cle@gmail.com>"]
+]
 [dependencies]
 arc-swap = "0.3.7"
 bincode = "1.1.2"
 byteorder = "1.3.1"
 fst = "0.3.3"
 hashbrown = { version = "0.1.8", features = ["serde"] }
 lazy_static = "1.2.0"
 levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
 linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
 lockfree = "0.5.1"
 log = "0.4.6"
 rayon = "1.0.3"
 sdset = "0.3.1"
 serde = "1.0.88"
 serde_derive = "1.0.88"
 serde_json = { version = "1.0.38", features = ["preserve_order"] }
 size_format = "1.0.2"
 slice-group-by = "0.2.4"
 unidecode = "0.3.0"
 [dependencies.toml]
 git = "https://github.com/Kerollmops/toml-rs.git"
 features = ["preserve_order"]
 rev = "0372ba6"
 [dependencies.rocksdb]
 git = "https://github.com/pingcap/rust-rocksdb.git"
 rev = "306e201"
 [features]
 default = ["simd"]
 i128 = ["bincode/i128", "byteorder/i128"]
 portable = ["rocksdb/portable"]
 simd = ["rocksdb/sse"]
 nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
 [dev-dependencies]
 csv = "1.0.5"
 env_logger = "0.6.0"
 jemallocator = "0.1.9"
 quickcheck = "0.8.2"
 rand = "0.6.5"
 rand_xorshift = "0.1.1"
 structopt = "0.2.14"
 tempfile = "3.0.7"
 termcolor = "1.0.4"
 [profile.release]
 debug = true
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -0,0 +1,21 @@
 [package]
 name = "meilidb-core"
 version = "0.1.0"
 authors = ["Kerollmops <renault.cle@gmail.com>"]
 edition = "2018"
 [dependencies]
 byteorder = "1.3.1"
 fst = "0.3.3"
 hashbrown = "0.1.8"
 lazy_static = "1.2.0"
 levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
 log = "0.4.6"
 rayon = "1.0.3"
 sdset = "0.3.1"
 serde = "1.0.88"
 serde_derive = "1.0.88"
 slice-group-by = "0.2.4"
 [features]
 i128 = ["byteorder/i128"]
--- a/meilidb-core/src/automaton.rs
+++ b/meilidb-core/src/automaton.rs
--- a/meilidb-core/src/criterion/document_id.rs
+++ b/meilidb-core/src/criterion/document_id.rs
@ -1,7 +1,6 @@
 use std::cmp::Ordering;
-
+use crate::criterion::Criterion;
-use crate::rank::criterion::Criterion;
+use crate::RawDocument;
 use crate::rank::RawDocument;
 #[derive(Debug, Clone, Copy)]
 pub struct DocumentId;
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
 use slice_group_by::GroupBy;
-
+use crate::criterion::Criterion;
-use crate::rank::criterion::Criterion;
+use crate::RawDocument;
 use crate::rank::RawDocument;
 #[inline]
 fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
--- a/meilidb-core/src/criterion/mod.rs
+++ b/meilidb-core/src/criterion/mod.rs
@ -4,11 +4,11 @@ mod words_proximity;
 mod sum_of_words_attribute;
 mod sum_of_words_position;
 mod exact;
-mod sort_by_attr;
+// mod sort_by_attr;
 mod document_id;
 use std::cmp::Ordering;
-use crate::rank::RawDocument;
+use crate::RawDocument;
 pub use self::{
    sum_of_typos::SumOfTypos,
@ -17,7 +17,7 @@ pub use self::{
    sum_of_words_attribute::SumOfWordsAttribute,
    sum_of_words_position::SumOfWordsPosition,
    exact::Exact,
-    sort_by_attr::SortByAttr,
+    // sort_by_attr::SortByAttr,
    document_id::DocumentId,
 };
--- a/meilidb-core/src/criterion/number_of_words.rs
+++ b/meilidb-core/src/criterion/number_of_words.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
 use slice_group_by::GroupBy;
-
+use crate::criterion::Criterion;
-use crate::rank::criterion::Criterion;
+use crate::RawDocument;
 use crate::rank::RawDocument;
 #[inline]
 fn number_of_query_words(query_index: &[u32]) -> usize {
--- a/meilidb-core/src/criterion/sort_by_attr.rs
+++ b/meilidb-core/src/criterion/sort_by_attr.rs
@ -3,9 +3,9 @@ use std::error::Error;
 use std::fmt;
 use crate::database::schema::{Schema, SchemaAttr};
-use crate::rank::criterion::Criterion;
+use crate::criterion::Criterion;
 use crate::database::RankedMap;
-use crate::rank::RawDocument;
+use crate::RawDocument;
 /// An helper struct that permit to sort documents by
 /// some of their stored attributes.
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -2,8 +2,8 @@ use std::cmp::Ordering;
 use slice_group_by::GroupBy;
-use crate::rank::criterion::Criterion;
+use crate::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::RawDocument;
 // This function is a wrong logarithmic 10 function.
 // It is safe to panic on input number higher than 3,
--- a/meilidb-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilidb-core/src/criterion/sum_of_words_attribute.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
 use slice_group_by::GroupBy;
-
+use crate::criterion::Criterion;
-use crate::rank::criterion::Criterion;
+use crate::RawDocument;
 use crate::rank::RawDocument;
 #[inline]
 fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
--- a/meilidb-core/src/criterion/sum_of_words_position.rs
+++ b/meilidb-core/src/criterion/sum_of_words_position.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
 use slice_group_by::GroupBy;
-
+use crate::criterion::Criterion;
-use crate::rank::criterion::Criterion;
+use crate::RawDocument;
 use crate::rank::RawDocument;
 #[inline]
 fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
--- a/meilidb-core/src/criterion/words_proximity.rs
+++ b/meilidb-core/src/criterion/words_proximity.rs
@ -1,9 +1,7 @@
 use std::cmp::{self, Ordering};
 use slice_group_by::GroupBy;
-
+use crate::criterion::Criterion;
-use crate::rank::criterion::Criterion;
+use crate::RawDocument;
 use crate::rank::RawDocument;
 const MAX_DISTANCE: u16 = 8;
--- a/meilidb-core/src/data/doc_ids.rs
+++ b/meilidb-core/src/data/doc_ids.rs
--- a/meilidb-core/src/data/doc_indexes.rs
+++ b/meilidb-core/src/data/doc_indexes.rs
--- a/meilidb-core/src/data/mod.rs
+++ b/meilidb-core/src/data/mod.rs
--- a/meilidb-core/src/data/shared_data.rs
+++ b/meilidb-core/src/data/shared_data.rs
--- a/meilidb-core/src/distinct_map.rs
+++ b/meilidb-core/src/distinct_map.rs
--- a/meilidb-core/src/index.rs
+++ b/meilidb-core/src/index.rs
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -1,16 +1,118 @@
 pub mod criterion;
 pub mod data;
 mod index;
 mod automaton;
 mod query_builder;
 mod distinct_map;
 pub mod shared_data_cursor;
 pub mod write_to_bytes;
 use std::sync::Arc;
 use serde_derive::{Serialize, Deserialize};
 use slice_group_by::GroupBy;
 use rayon::slice::ParallelSliceMut;
-use crate::{Match, DocumentId};
+pub use self::index::{Index, IndexBuilder};
 pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
 /// Represent an internally generated document unique identifier.
 ///
 /// It is used to inform the database the document you want to deserialize.
 /// Helpful for custom ranking.
 #[derive(Serialize, Deserialize)]
 #[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
 pub struct DocumentId(pub u64);
 /// This structure represent the position of a word
 /// in a document and its attributes.
 ///
 /// This is stored in the map, generated at index time,
 /// extracted and interpreted at search time.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 #[repr(C)]
 pub struct DocIndex {
    /// The document identifier where the word was found.
    pub document_id: DocumentId,
    /// The attribute in the document where the word was found
    /// along with the index in it.
    pub attribute: u16,
    pub word_index: u16,
    /// The position in bytes where the word was found
    /// along with the length of it.
    ///
    /// It informs on the original word area in the text indexed
    /// without needing to run the tokenizer again.
    pub char_index: u16,
    pub char_length: u16,
 }
 /// This structure represent a matching word with informations
 /// on the location of the word in the document.
 ///
 /// The order of the field is important because it defines
 /// the way these structures are ordered between themselves.
 ///
 /// The word in itself is not important.
 // TODO do data oriented programming ? very arrays ?
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Match {
    /// The word index in the query sentence.
    /// Same as the `attribute_index` but for the query words.
    ///
    /// Used to retrieve the automaton that match this word.
    pub query_index: u32,
    /// The distance the word has with the query word
    /// (i.e. the Levenshtein distance).
    pub distance: u8,
    /// The attribute in the document where the word was found
    /// along with the index in it.
    pub attribute: u16,
    pub word_index: u16,
    /// Whether the word that match is an exact match or a prefix.
    pub is_exact: bool,
    /// The position in bytes where the word was found
    /// along with the length of it.
    ///
    /// It informs on the original word area in the text indexed
    /// without needing to run the tokenizer again.
    pub char_index: u16,
    pub char_length: u16,
 }
 impl Match {
    pub fn zero() -> Self {
        Match {
            query_index: 0,
            distance: 0,
            attribute: 0,
            word_index: 0,
            is_exact: false,
            char_index: 0,
            char_length: 0,
        }
    }
    pub fn max() -> Self {
        Match {
            query_index: u32::max_value(),
            distance: u8::max_value(),
            attribute: u16::max_value(),
            word_index: u16::max_value(),
            is_exact: true,
            char_index: u16::max_value(),
            char_length: u16::max_value(),
        }
    }
 }
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Document {
    pub id: DocumentId,
@ -181,3 +283,15 @@ impl Matches {
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::mem;
    #[test]
    fn docindex_mem_size() {
        assert_eq!(mem::size_of::<DocIndex>(), 24);
    }
 }
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@ -11,11 +11,23 @@ use fst::Streamer;
 use log::info;
 use crate::automaton::{self, DfaExt, AutomatonExt};
-use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
+use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
-use crate::rank::criterion::Criteria;
+use crate::criterion::Criteria;
-use crate::database::Index;
+use crate::{raw_documents_from_matches, RawDocument, Document};
-use crate::rank::{raw_documents_from_matches, RawDocument, Document};
+use crate::{Index, Match, DocumentId};
-use crate::{is_cjk, Match, DocumentId};
+
 // query splitting must move out of this crate
 pub fn is_cjk(c: char) -> bool {
    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
    (c >= '\u{3040}' && c <= '\u{309f}') ||
    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
    (c >= '\u{3100}' && c <= '\u{312f}') ||
    (c >= '\u{3200}' && c <= '\u{32ff}') ||
    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
    (c >= '\u{f900}' && c <= '\u{faff}')
 }
 #[derive(Debug, PartialEq, Eq)]
 enum CharCategory {
--- a/meilidb-core/src/shared_data_cursor.rs
+++ b/meilidb-core/src/shared_data_cursor.rs
--- a/meilidb-core/src/write_to_bytes.rs
+++ b/meilidb-core/src/write_to_bytes.rs
--- a/meilidb/Cargo.lock
+++ b/meilidb/Cargo.lock
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -0,0 +1,50 @@
 [package]
 edition = "2018"
 name = "meilidb"
 version = "0.3.1"
 authors = ["Kerollmops <renault.cle@gmail.com>"]
 [dependencies]
 arc-swap = "0.3.7"
 bincode = "1.1.2"
 byteorder = "1.3.1"
 fst = "0.3.3"
 hashbrown = { version = "0.1.8", features = ["serde"] }
 linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
 lockfree = "0.5.1"
 log = "0.4.6"
 sdset = "0.3.1"
 serde = "1.0.88"
 serde_derive = "1.0.88"
 serde_json = { version = "1.0.38", features = ["preserve_order"] }
 size_format = "1.0.2"
 slice-group-by = "0.2.4"
 unidecode = "0.3.0"
 meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
 [dependencies.toml]
 git = "https://github.com/Kerollmops/toml-rs.git"
 features = ["preserve_order"]
 rev = "0372ba6"
 [dependencies.rocksdb]
 git = "https://github.com/pingcap/rust-rocksdb.git"
 rev = "306e201"
 [features]
 default = ["simd"]
 i128 = ["bincode/i128"]
 portable = ["rocksdb/portable"]
 simd = ["rocksdb/sse"]
 nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
 [dev-dependencies]
 csv = "1.0.5"
 env_logger = "0.6.0"
 jemallocator = "0.1.9"
 quickcheck = "0.8.2"
 rand = "0.6.5"
 rand_xorshift = "0.1.1"
 structopt = "0.2.14"
 tempfile = "3.0.7"
 termcolor = "1.0.4"
--- a/meilidb/src/common_words.rs
+++ b/meilidb/src/common_words.rs
--- a/meilidb/src/database/config.rs
+++ b/meilidb/src/database/config.rs
--- a/meilidb/src/database/document_key.rs
+++ b/meilidb/src/database/document_key.rs
@ -5,7 +5,7 @@ use std::fmt;
 use byteorder::{BigEndian, WriteBytesExt, ReadBytesExt};
 use crate::database::schema::SchemaAttr;
-use crate::DocumentId;
+use meilidb_core::DocumentId;
 const DOC_KEY_LEN:      usize = 4 + size_of::<u64>();
 const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u16>();
--- a/meilidb/src/database/mod.rs
+++ b/meilidb/src/database/mod.rs
@ -17,9 +17,9 @@ use hashbrown::HashMap;
 use log::{info, error, warn};
 use crate::database::schema::SchemaAttr;
-use crate::shared_data_cursor::FromSharedDataCursor;
+use meilidb_core::shared_data_cursor::FromSharedDataCursor;
-use crate::write_to_bytes::WriteToBytes;
+use meilidb_core::write_to_bytes::WriteToBytes;
-use crate::DocumentId;
+use meilidb_core::{Index, DocumentId};
 use self::update::{ReadIndexEvent, ReadRankedMapEvent};
@ -29,7 +29,6 @@ pub use self::view::{DatabaseView, DocumentIter};
 pub use self::update::Update;
 pub use self::serde::SerializerError;
 pub use self::schema::Schema;
 pub use self::index::Index;
 pub use self::number::{Number, ParseNumberError};
 pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>;
@ -41,7 +40,6 @@ const CONFIG:          &[u8] = b"config";
 pub mod config;
 pub mod schema;
 pub(crate) mod index;
 mod number;
 mod document_key;
 mod serde;
--- a/meilidb/src/database/number.rs
+++ b/meilidb/src/database/number.rs
--- a/meilidb/src/database/schema.rs
+++ b/meilidb/src/database/schema.rs
@ -10,7 +10,7 @@ use linked_hash_map::LinkedHashMap;
 use crate::database::serde::find_id::FindDocumentIdSerializer;
 use crate::database::serde::SerializerError;
-use crate::DocumentId;
+use meilidb_core::DocumentId;
 pub const STORED: SchemaProps  = SchemaProps { stored: true,  indexed: false, ranked: false };
 pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true,  ranked: false };
--- a/meilidb/src/database/serde/deserializer.rs
+++ b/meilidb/src/database/serde/deserializer.rs
@ -10,7 +10,7 @@ use serde::de::{self, Visitor, IntoDeserializer};
 use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
 use crate::database::schema::Schema;
-use crate::DocumentId;
+use meilidb_core::DocumentId;
 pub struct Deserializer<'a, D>
 where D: Deref<Target=DB>
--- a/meilidb/src/database/serde/find_id.rs
+++ b/meilidb/src/database/serde/find_id.rs
@ -3,7 +3,7 @@ use serde::ser;
 use crate::database::serde::key_to_string::KeyToStringSerializer;
 use crate::database::serde::{SerializerError, calculate_hash};
-use crate::DocumentId;
+use meilidb_core::DocumentId;
 pub struct FindDocumentIdSerializer<'a> {
    pub id_attribute_name: &'a str,
--- a/meilidb/src/database/serde/indexer_serializer.rs
+++ b/meilidb/src/database/serde/indexer_serializer.rs
@ -2,13 +2,14 @@ use std::collections::HashSet;
 use serde::Serialize;
 use serde::ser;
 use meilidb_core::{DocumentId, DocIndex};
 use crate::database::update::DocumentUpdate;
 use crate::database::serde::SerializerError;
 use crate::database::schema::SchemaAttr;
 use crate::tokenizer::TokenizerBuilder;
 use crate::tokenizer::Token;
-use crate::{is_cjk, DocumentId, DocIndex};
+use crate::is_cjk;
 pub struct IndexerSerializer<'a, 'b, B> {
    pub tokenizer_builder: &'a B,
--- a/meilidb/src/database/serde/key_to_string.rs
+++ b/meilidb/src/database/serde/key_to_string.rs
--- a/meilidb/src/database/serde/mod.rs
+++ b/meilidb/src/database/serde/mod.rs
--- a/meilidb/src/database/serde/serializer.rs
+++ b/meilidb/src/database/serde/serializer.rs
@ -10,7 +10,7 @@ use crate::database::update::DocumentUpdate;
 use crate::database::serde::SerializerError;
 use crate::tokenizer::TokenizerBuilder;
 use crate::database::schema::Schema;
-use crate::DocumentId;
+use meilidb_core::DocumentId;
 pub struct Serializer<'a, 'b, B> {
    pub schema: &'a Schema,
--- a/meilidb/src/database/serde/value_to_number.rs
+++ b/meilidb/src/database/serde/value_to_number.rs
--- a/meilidb/src/database/update/index_event.rs
+++ b/meilidb/src/database/update/index_event.rs
@ -1,11 +1,11 @@
 use std::error::Error;
 use byteorder::{ReadBytesExt, WriteBytesExt};
 use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
 use meilidb_core::write_to_bytes::WriteToBytes;
 use meilidb_core::data::DocIds;
 use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
 use crate::write_to_bytes::WriteToBytes;
 use crate::database::Index;
 use crate::data::DocIds;
 pub enum WriteIndexEvent<'a> {
    RemovedDocuments(&'a DocIds),
--- a/meilidb/src/database/update/mod.rs
+++ b/meilidb/src/database/update/mod.rs
@ -5,19 +5,18 @@ use rocksdb::rocksdb::{Writable, WriteBatch};
 use hashbrown::hash_map::HashMap;
 use sdset::{Set, SetBuf};
 use serde::Serialize;
 use meilidb_core::write_to_bytes::WriteToBytes;
 use meilidb_core::data::DocIds;
 use meilidb_core::{IndexBuilder, DocumentId, DocIndex};
 use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
 use crate::database::serde::serializer::Serializer;
 use crate::database::serde::SerializerError;
 use crate::database::schema::SchemaAttr;
 use crate::database::schema::Schema;
 use crate::database::index::IndexBuilder;
 use crate::database::{DATA_INDEX, DATA_RANKED_MAP};
 use crate::database::{RankedMap, Number};
 use crate::tokenizer::TokenizerBuilder;
 use crate::write_to_bytes::WriteToBytes;
 use crate::data::DocIds;
 use crate::{DocumentId, DocIndex};
 pub use self::index_event::{ReadIndexEvent, WriteIndexEvent};
 pub use self::ranked_map_event::{ReadRankedMapEvent, WriteRankedMapEvent};
--- a/meilidb/src/database/update/ranked_map_event.rs
+++ b/meilidb/src/database/update/ranked_map_event.rs
@ -1,11 +1,11 @@
 use std::error::Error;
 use byteorder::{ReadBytesExt, WriteBytesExt};
 use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
 use meilidb_core::write_to_bytes::WriteToBytes;
 use meilidb_core::data::DocIds;
 use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
 use crate::write_to_bytes::WriteToBytes;
 use crate::database::RankedMap;
 use crate::data::DocIds;
 pub enum WriteRankedMapEvent<'a> {
    RemovedDocuments(&'a DocIds),
--- a/meilidb/src/database/view.rs
+++ b/meilidb/src/database/view.rs
@ -6,16 +6,15 @@ use std::{fmt, marker};
 use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
 use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
 use serde::de::DeserializeOwned;
 use meilidb_core::{Index, QueryBuilder, FilterFunc};
 use meilidb_core::DocumentId;
 use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_ranked_map, retrieve_config};
 use crate::database::serde::deserializer::Deserializer;
 use crate::database::{DocumentKey, DocumentKeyAttr};
 use crate::rank::{QueryBuilder, FilterFunc};
 use crate::database::schema::Schema;
 use crate::database::index::Index;
 use crate::database::RankedMap;
 use crate::database::Config;
 use crate::DocumentId;
 pub struct DatabaseView<D>
 where D: Deref<Target=DB>
--- a/meilidb/src/lib.rs
+++ b/meilidb/src/lib.rs
@ -0,0 +1,22 @@
 #![cfg_attr(feature = "nightly", feature(test))]
 pub mod database;
 pub mod tokenizer;
 mod common_words;
 pub use rocksdb;
 pub use self::tokenizer::Tokenizer;
 pub use self::common_words::CommonWords;
 pub fn is_cjk(c: char) -> bool {
    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
    (c >= '\u{3040}' && c <= '\u{309f}') ||
    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
    (c >= '\u{3100}' && c <= '\u{312f}') ||
    (c >= '\u{3200}' && c <= '\u{32ff}') ||
    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
    (c >= '\u{f900}' && c <= '\u{faff}')
 }
--- a/meilidb/src/tokenizer/mod.rs
+++ b/meilidb/src/tokenizer/mod.rs
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,136 +0,0 @@
 #![cfg_attr(feature = "nightly", feature(test))]
 pub mod automaton;
 pub mod database;
 pub mod data;
 pub mod rank;
 pub mod tokenizer;
 mod common_words;
 mod shared_data_cursor;
 mod write_to_bytes;
 use serde_derive::{Serialize, Deserialize};
 pub use rocksdb;
 pub use self::tokenizer::Tokenizer;
 pub use self::common_words::CommonWords;
 pub fn is_cjk(c: char) -> bool {
    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
    (c >= '\u{3040}' && c <= '\u{309f}') ||
    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
    (c >= '\u{3100}' && c <= '\u{312f}') ||
    (c >= '\u{3200}' && c <= '\u{32ff}') ||
    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
    (c >= '\u{f900}' && c <= '\u{faff}')
 }
 /// Represent an internally generated document unique identifier.
 ///
 /// It is used to inform the database the document you want to deserialize.
 /// Helpful for custom ranking.
 #[derive(Serialize, Deserialize)]
 #[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
 pub struct DocumentId(u64);
 /// This structure represent the position of a word
 /// in a document and its attributes.
 ///
 /// This is stored in the map, generated at index time,
 /// extracted and interpreted at search time.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 #[repr(C)]
 pub struct DocIndex {
    /// The document identifier where the word was found.
    pub document_id: DocumentId,
    /// The attribute in the document where the word was found
    /// along with the index in it.
    pub attribute: u16,
    pub word_index: u16,
    /// The position in bytes where the word was found
    /// along with the length of it.
    ///
    /// It informs on the original word area in the text indexed
    /// without needing to run the tokenizer again.
    pub char_index: u16,
    pub char_length: u16,
 }
 /// This structure represent a matching word with informations
 /// on the location of the word in the document.
 ///
 /// The order of the field is important because it defines
 /// the way these structures are ordered between themselves.
 ///
 /// The word in itself is not important.
 // TODO do data oriented programming ? very arrays ?
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Match {
    /// The word index in the query sentence.
    /// Same as the `attribute_index` but for the query words.
    ///
    /// Used to retrieve the automaton that match this word.
    pub query_index: u32,
    /// The distance the word has with the query word
    /// (i.e. the Levenshtein distance).
    pub distance: u8,
    /// The attribute in the document where the word was found
    /// along with the index in it.
    pub attribute: u16,
    pub word_index: u16,
    /// Whether the word that match is an exact match or a prefix.
    pub is_exact: bool,
    /// The position in bytes where the word was found
    /// along with the length of it.
    ///
    /// It informs on the original word area in the text indexed
    /// without needing to run the tokenizer again.
    pub char_index: u16,
    pub char_length: u16,
 }
 impl Match {
    pub fn zero() -> Self {
        Match {
            query_index: 0,
            distance: 0,
            attribute: 0,
            word_index: 0,
            is_exact: false,
            char_index: 0,
            char_length: 0,
        }
    }
    pub fn max() -> Self {
        Match {
            query_index: u32::max_value(),
            distance: u8::max_value(),
            attribute: u16::max_value(),
            word_index: u16::max_value(),
            is_exact: true,
            char_index: u16::max_value(),
            char_length: u16::max_value(),
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::mem;
    #[test]
    fn docindex_mem_size() {
        assert_eq!(mem::size_of::<DocIndex>(), 16);
    }
 }