mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
feat: Replace the HashMap by a simple Vec in stream ops
This commit is contained in:
parent
31a83eae4d
commit
f6a40ed7e4
@ -5,8 +5,8 @@ use std::path::Path;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
use std::mem;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use fst::raw::MmapReadOnly;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use crate::DocIndex;
|
||||
|
||||
#[repr(C)]
|
||||
@ -180,9 +180,6 @@ fn into_sliced_ranges<T>(vecs: Vec<Vec<T>>, number_docs: usize) -> (Vec<Range>,
|
||||
let mut ranges = Vec::with_capacity(cap);
|
||||
let mut values = Vec::with_capacity(number_docs);
|
||||
|
||||
// @Improvement: remove bounds duplications: the left bound of a range
|
||||
// is already the right bound of the previous range,
|
||||
// we could use a slice window of size 2.
|
||||
for v in &vecs {
|
||||
let len = v.len() as u64;
|
||||
let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0);
|
||||
|
@ -1,15 +1,13 @@
|
||||
// pub mod difference;
|
||||
// pub mod stream_ops;
|
||||
mod ops_indexed_value;
|
||||
pub mod ops;
|
||||
pub mod doc_indexes;
|
||||
|
||||
use fst::{Map, MapBuilder};
|
||||
use self::doc_indexes::{DocIndexes, DocIndexesBuilder};
|
||||
use std::error::Error;
|
||||
use std::path::Path;
|
||||
use std::io::Write;
|
||||
use crate::DocIndex;
|
||||
use self::doc_indexes::{DocIndexes, DocIndexesBuilder};
|
||||
|
||||
pub struct Metadata {
|
||||
map: Map,
|
||||
@ -87,8 +85,6 @@ impl<W: Write, X: Write> MetadataBuilder<W, X> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::vec_read_only::VecReadOnly;
|
||||
use crate::metadata::ops::IndexedDocIndexes;
|
||||
|
||||
#[test]
|
||||
fn empty_serialize_deserialize() {
|
||||
|
@ -1,5 +1,4 @@
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
use std::collections::BTreeMap;
|
||||
use fst::{map, Streamer, Automaton};
|
||||
use fst::automaton::AlwaysMatch;
|
||||
use sdset::multi::OpBuilder as SdOpBuilder;
|
||||
@ -9,7 +8,6 @@ use crate::metadata::ops_indexed_value::{
|
||||
};
|
||||
use crate::metadata::doc_indexes::DocIndexes;
|
||||
use crate::metadata::Metadata;
|
||||
use crate::automaton::AutomatonExt;
|
||||
use crate::vec_read_only::VecReadOnly;
|
||||
use crate::DocIndex;
|
||||
|
||||
@ -60,19 +58,19 @@ impl<'m, A: 'm + Automaton> OpBuilder<'m, A> {
|
||||
}
|
||||
|
||||
pub fn union(self) -> Union<'m> {
|
||||
Union::new(self.maps, self.indexes)
|
||||
Union::new(self.maps, self.indexes, self.automatons.len())
|
||||
}
|
||||
|
||||
pub fn intersection(self) -> Intersection<'m> {
|
||||
Intersection::new(self.maps, self.indexes)
|
||||
Intersection::new(self.maps, self.indexes, self.automatons.len())
|
||||
}
|
||||
|
||||
pub fn difference(self) -> Difference<'m> {
|
||||
Difference::new(self.maps, self.indexes)
|
||||
Difference::new(self.maps, self.indexes, self.automatons.len())
|
||||
}
|
||||
|
||||
pub fn symmetric_difference(self) -> SymmetricDifference<'m> {
|
||||
SymmetricDifference::new(self.maps, self.indexes)
|
||||
SymmetricDifference::new(self.maps, self.indexes, self.automatons.len())
|
||||
}
|
||||
}
|
||||
|
||||
@ -94,15 +92,16 @@ macro_rules! logical_operation {
|
||||
pub struct $name<'m> {
|
||||
maps: UnionIndexedValue<'m>,
|
||||
indexes: Vec<&'m DocIndexes>,
|
||||
number_automatons: usize,
|
||||
outs: Vec<IndexedDocIndexes>,
|
||||
}
|
||||
|
||||
impl<'m> $name<'m> {
|
||||
fn new(maps: OpIndexedValueBuilder<'m>, indexes: Vec<&'m DocIndexes>) -> Self
|
||||
{
|
||||
fn new(maps: OpIndexedValueBuilder<'m>, indexes: Vec<&'m DocIndexes>, number_automatons: usize) -> Self {
|
||||
$name {
|
||||
maps: maps.union(),
|
||||
indexes: indexes,
|
||||
number_automatons: number_automatons,
|
||||
outs: Vec::new(),
|
||||
}
|
||||
}
|
||||
@ -116,17 +115,15 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
||||
Some((input, ivalues)) => {
|
||||
self.outs.clear();
|
||||
|
||||
// @Improvement: better use a `Vec` instead,
|
||||
// `aut indexes` follow them selfs
|
||||
let mut builders = HashMap::new();
|
||||
let mut builders = vec![BTreeMap::new(); self.number_automatons];
|
||||
for iv in ivalues {
|
||||
let builder = builders.entry(iv.aut_index).or_insert_with(BTreeMap::new);
|
||||
let builder = &mut builders[iv.aut_index];
|
||||
builder.insert(iv.rdr_index, iv.value);
|
||||
}
|
||||
|
||||
let mut doc_indexes = Vec::new();
|
||||
let mut doc_indexes_slots = Vec::with_capacity(builders.len());
|
||||
for (aut_index, values) in builders.into_iter() {
|
||||
for (aut_index, values) in builders.into_iter().enumerate() {
|
||||
let mut builder = SdOpBuilder::with_capacity(values.len());
|
||||
for (rdr_index, value) in values {
|
||||
let indexes = self.indexes[rdr_index].get(value).expect("could not find indexes");
|
||||
@ -137,14 +134,14 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
||||
let start = doc_indexes.len();
|
||||
builder.$operation().extend_vec(&mut doc_indexes);
|
||||
let len = doc_indexes.len() - start;
|
||||
if len == 0 { continue }
|
||||
|
||||
let slot = SlotIndexedDocIndexes {
|
||||
index: aut_index,
|
||||
start: start,
|
||||
len: len,
|
||||
};
|
||||
doc_indexes_slots.push(slot);
|
||||
if len != 0 {
|
||||
let slot = SlotIndexedDocIndexes {
|
||||
index: aut_index,
|
||||
start: start,
|
||||
len: len,
|
||||
};
|
||||
doc_indexes_slots.push(slot);
|
||||
}
|
||||
}
|
||||
|
||||
let read_only = VecReadOnly::new(doc_indexes);
|
||||
|
@ -38,6 +38,12 @@ pub struct UnionIndexedValue<'f> {
|
||||
cur_slot: Option<SlotIndexedValue>,
|
||||
}
|
||||
|
||||
impl<'f> UnionIndexedValue<'f> {
|
||||
pub fn len(&self) -> usize {
|
||||
self.heap.num_slots()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'm> fst::Streamer<'a> for UnionIndexedValue<'m> {
|
||||
type Item = (&'a [u8], &'a [IndexedValue]);
|
||||
|
||||
@ -54,7 +60,7 @@ impl<'a, 'm> fst::Streamer<'a> for UnionIndexedValue<'m> {
|
||||
};
|
||||
self.outs.clear();
|
||||
self.outs.push(slot.indexed_value());
|
||||
while let Some(mut slot2) = self.heap.pop_if_equal(slot.input()) {
|
||||
while let Some(slot2) = self.heap.pop_if_equal(slot.input()) {
|
||||
self.outs.push(slot2.indexed_value());
|
||||
self.heap.refill(slot2);
|
||||
}
|
||||
|
@ -14,7 +14,6 @@ use group_by::GroupByMut;
|
||||
use crate::automaton::{DfaExt, AutomatonExt};
|
||||
use crate::metadata::Metadata;
|
||||
use crate::metadata::ops::{OpBuilder, Union};
|
||||
use crate::metadata::doc_indexes::DocIndexes;
|
||||
use crate::{Match, DocumentId};
|
||||
|
||||
use self::{
|
||||
|
Loading…
Reference in New Issue
Block a user