Add the raw document IDs to the postings lists

This commit is contained in:
Clément Renault 2020-01-08 15:30:43 +01:00
parent 9420edadf4
commit 81c573ec92
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
7 changed files with 54 additions and 59 deletions

View file

@ -1,8 +1,7 @@
use std::collections::HashMap;
use std::borrow::Cow;
use fst::{set::OpBuilder, SetBuilder, IntoStreamer, Streamer};
use sdset::{duo::Union, SetOperation, Set, SetBuf};
use sdset::{duo::Union, SetOperation, Set};
use serde::{Deserialize, Serialize};
use log::debug;
@ -201,7 +200,7 @@ pub fn apply_documents_addition<'a, 'b>(
// compute prefixes and store those in the PrefixPostingsListsCache.
let mut stream = words_fst.into_stream();
while let Some(input) = stream.next() {
if let Some(postings_list) = postings_lists_store.postings_list(writer, input)?.map(Cow::into_owned) {
if let Some(postings_list) = postings_lists_store.postings_list(writer, input)?.map(|p| p.matches.into_owned()) {
let prefix = &input[..1];
let mut arr = [0; 4];
@ -453,7 +452,7 @@ pub fn write_documents_addition_index(
delta_words_builder.insert(&word).unwrap();
let set = match postings_lists_store.postings_list(writer, &word)? {
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
Some(postings) => Union::new(&postings.matches, &delta_set).into_set_buf(),
None => delta_set,
};

View file

@ -142,8 +142,8 @@ pub fn apply_documents_deletion(
for (word, document_ids) in words_document_ids {
let document_ids = SetBuf::from_dirty(document_ids);
if let Some(doc_indexes) = postings_lists_store.postings_list(writer, &word)? {
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
if let Some(postings) = postings_lists_store.postings_list(writer, &word)? {
let op = DifferenceByKey::new(&postings.matches, &document_ids, |d| d.document_id, |id| *id);
let doc_indexes = op.into_set_buf();
if !doc_indexes.is_empty() {