mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Document the Index types and the internal LMDB databases
This commit is contained in:
parent
2f0e1afd16
commit
ba3e805981
@ -329,22 +329,22 @@ fn writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyh
|
|||||||
}
|
}
|
||||||
else if key.starts_with(&[1]) {
|
else if key.starts_with(&[1]) {
|
||||||
// Write the postings lists
|
// Write the postings lists
|
||||||
index.postings_attrs.as_polymorph()
|
index.word_positions.as_polymorph()
|
||||||
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
||||||
}
|
}
|
||||||
else if key.starts_with(&[2]) {
|
else if key.starts_with(&[2]) {
|
||||||
// Write the prefix postings lists
|
// Write the prefix postings lists
|
||||||
index.prefix_postings_attrs.as_polymorph()
|
index.prefix_word_positions.as_polymorph()
|
||||||
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
||||||
}
|
}
|
||||||
else if key.starts_with(&[3]) {
|
else if key.starts_with(&[3]) {
|
||||||
// Write the postings lists
|
// Write the postings lists
|
||||||
index.postings_ids.as_polymorph()
|
index.word_position_docids.as_polymorph()
|
||||||
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
||||||
}
|
}
|
||||||
else if key.starts_with(&[4]) {
|
else if key.starts_with(&[4]) {
|
||||||
// Write the prefix postings lists
|
// Write the prefix postings lists
|
||||||
index.prefix_postings_ids.as_polymorph()
|
index.prefix_word_position_docids.as_polymorph()
|
||||||
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
.put::<_, ByteSlice, ByteSlice>(wtxn, &key[1..], val)?;
|
||||||
}
|
}
|
||||||
else if key.starts_with(&[5]) {
|
else if key.starts_with(&[5]) {
|
||||||
|
32
src/lib.rs
32
src/lib.rs
@ -36,24 +36,28 @@ pub type AttributeId = u32;
|
|||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
|
/// Contains many different types (e.g. the documents CSV headers).
|
||||||
pub main: PolyDatabase,
|
pub main: PolyDatabase,
|
||||||
pub postings_attrs: Database<Str, RoaringBitmapCodec>,
|
/// A word and all the positions where it appears in the whole dataset.
|
||||||
pub prefix_postings_attrs: Database<ByteSlice, RoaringBitmapCodec>,
|
pub word_positions: Database<Str, RoaringBitmapCodec>,
|
||||||
pub postings_ids: Database<ByteSlice, RoaringBitmapCodec>,
|
pub prefix_word_positions: Database<Str, RoaringBitmapCodec>,
|
||||||
pub prefix_postings_ids: Database<ByteSlice, RoaringBitmapCodec>,
|
/// Maps a word at a position (u32) and all the documents ids where it appears.
|
||||||
|
pub word_position_docids: Database<ByteSlice, RoaringBitmapCodec>,
|
||||||
|
pub prefix_word_position_docids: Database<ByteSlice, RoaringBitmapCodec>,
|
||||||
|
/// Maps an internal document to the content of the document in CSV.
|
||||||
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn new(env: &heed::Env) -> heed::Result<Index> {
|
pub fn new(env: &heed::Env) -> heed::Result<Index> {
|
||||||
let main = env.create_poly_database(None)?;
|
Ok(Index {
|
||||||
let postings_attrs = env.create_database(Some("postings-attrs"))?;
|
main: env.create_poly_database(None)?,
|
||||||
let prefix_postings_attrs = env.create_database(Some("prefix-postings-attrs"))?;
|
word_positions: env.create_database(Some("word-positions"))?,
|
||||||
let postings_ids = env.create_database(Some("postings-ids"))?;
|
prefix_word_positions: env.create_database(Some("prefix-word-positions"))?,
|
||||||
let prefix_postings_ids = env.create_database(Some("prefix-postings-ids"))?;
|
word_position_docids: env.create_database(Some("word-position-docids"))?,
|
||||||
let documents = env.create_database(Some("documents"))?;
|
prefix_word_position_docids: env.create_database(Some("prefix-word-position-docids"))?,
|
||||||
|
documents: env.create_database(Some("documents"))?,
|
||||||
Ok(Index { main, postings_attrs, prefix_postings_attrs, postings_ids, prefix_postings_ids, documents })
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn headers<'t>(&self, rtxn: &'t heed::RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
pub fn headers<'t>(&self, rtxn: &'t heed::RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
||||||
@ -107,7 +111,7 @@ impl Index {
|
|||||||
let mut stream = fst.search(&dfa).into_stream();
|
let mut stream = fst.search(&dfa).into_stream();
|
||||||
while let Some(word) = stream.next() {
|
while let Some(word) = stream.next() {
|
||||||
let word = std::str::from_utf8(word)?;
|
let word = std::str::from_utf8(word)?;
|
||||||
if let Some(right) = self.postings_attrs.get(rtxn, word)? {
|
if let Some(right) = self.word_positions.get(rtxn, word)? {
|
||||||
union_positions.union_with(&right);
|
union_positions.union_with(&right);
|
||||||
derived_words.push((word.as_bytes().to_vec(), right));
|
derived_words.push((word.as_bytes().to_vec(), right));
|
||||||
count += 1;
|
count += 1;
|
||||||
@ -131,7 +135,7 @@ impl Index {
|
|||||||
if attrs.contains(pos) {
|
if attrs.contains(pos) {
|
||||||
let mut key = word.clone();
|
let mut key = word.clone();
|
||||||
key.extend_from_slice(&pos.to_be_bytes());
|
key.extend_from_slice(&pos.to_be_bytes());
|
||||||
if let Some(right) = self.postings_ids.get(rtxn, &key).unwrap() {
|
if let Some(right) = self.word_position_docids.get(rtxn, &key).unwrap() {
|
||||||
union_docids.union_with(&right);
|
union_docids.union_with(&right);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user