mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Fixes for clippy bringing us down to 18 remaining issues.
This brings us a step closer to enforcing clippy on each build.
This commit is contained in:
parent
004c09a8e2
commit
6b2fe94192
@ -70,7 +70,7 @@ impl FromStr for Member {
|
|||||||
type Err = AscDescError;
|
type Err = AscDescError;
|
||||||
|
|
||||||
fn from_str(text: &str) -> Result<Member, Self::Err> {
|
fn from_str(text: &str) -> Result<Member, Self::Err> {
|
||||||
match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(")")) {
|
match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(')')) {
|
||||||
Some(point) => {
|
Some(point) => {
|
||||||
let (lat, lng) = point
|
let (lat, lng) = point
|
||||||
.split_once(',')
|
.split_once(',')
|
||||||
|
@ -60,7 +60,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
|||||||
/// Appends a new JSON object into the batch and updates the `DocumentsBatchIndex` accordingly.
|
/// Appends a new JSON object into the batch and updates the `DocumentsBatchIndex` accordingly.
|
||||||
pub fn append_json_object(&mut self, object: &Object) -> io::Result<()> {
|
pub fn append_json_object(&mut self, object: &Object) -> io::Result<()> {
|
||||||
// Make sure that we insert the fields ids in order as the obkv writer has this requirement.
|
// Make sure that we insert the fields ids in order as the obkv writer has this requirement.
|
||||||
let mut fields_ids: Vec<_> = object.keys().map(|k| self.fields_index.insert(&k)).collect();
|
let mut fields_ids: Vec<_> = object.keys().map(|k| self.fields_index.insert(k)).collect();
|
||||||
fields_ids.sort_unstable();
|
fields_ids.sort_unstable();
|
||||||
|
|
||||||
self.obkv_buffer.clear();
|
self.obkv_buffer.clear();
|
||||||
|
@ -25,9 +25,9 @@ const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
|
|||||||
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
|
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
|
||||||
obkv.iter()
|
obkv.iter()
|
||||||
.map(|(field_id, value)| {
|
.map(|(field_id, value)| {
|
||||||
let field_name = index.name(field_id).ok_or_else(|| {
|
let field_name = index
|
||||||
FieldIdMapMissingEntry::FieldId { field_id, process: "obkv_to_object" }
|
.name(field_id)
|
||||||
})?;
|
.ok_or(FieldIdMapMissingEntry::FieldId { field_id, process: "obkv_to_object" })?;
|
||||||
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
|
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
|
||||||
Ok((field_name.to_string(), value))
|
Ok((field_name.to_string(), value))
|
||||||
})
|
})
|
||||||
|
@ -65,7 +65,7 @@ impl FieldsIdsMap {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over the ids in the order of the ids.
|
/// Iterate over the ids in the order of the ids.
|
||||||
pub fn ids<'a>(&'a self) -> impl Iterator<Item = FieldId> + 'a {
|
pub fn ids(&'_ self) -> impl Iterator<Item = FieldId> + '_ {
|
||||||
self.ids_names.keys().copied()
|
self.ids_names.keys().copied()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ where
|
|||||||
type EItem = (&'a str, C::EItem);
|
type EItem = (&'a str, C::EItem);
|
||||||
|
|
||||||
fn bytes_encode((string, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode((string, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let value_bytes = C::bytes_encode(&value)?;
|
let value_bytes = C::bytes_encode(value)?;
|
||||||
|
|
||||||
let mut bytes = Vec::with_capacity(2 + string.len() + value_bytes.len());
|
let mut bytes = Vec::with_capacity(2 + string.len() + value_bytes.len());
|
||||||
encode_prefix_string(string, &mut bytes).ok()?;
|
encode_prefix_string(string, &mut bytes).ok()?;
|
||||||
|
@ -66,14 +66,14 @@ where
|
|||||||
bytes.extend_from_slice(left.as_bytes());
|
bytes.extend_from_slice(left.as_bytes());
|
||||||
bytes.extend_from_slice(right.as_bytes());
|
bytes.extend_from_slice(right.as_bytes());
|
||||||
|
|
||||||
let value_bytes = C::bytes_encode(&value)?;
|
let value_bytes = C::bytes_encode(value)?;
|
||||||
bytes.extend_from_slice(&value_bytes[..]);
|
bytes.extend_from_slice(&value_bytes[..]);
|
||||||
|
|
||||||
Some(Cow::Owned(bytes))
|
Some(Cow::Owned(bytes))
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
bytes.push(0);
|
bytes.push(0);
|
||||||
let value_bytes = C::bytes_encode(&value)?;
|
let value_bytes = C::bytes_encode(value)?;
|
||||||
bytes.extend_from_slice(&value_bytes[..]);
|
bytes.extend_from_slice(&value_bytes[..]);
|
||||||
Some(Cow::Owned(bytes))
|
Some(Cow::Owned(bytes))
|
||||||
}
|
}
|
||||||
|
@ -320,7 +320,7 @@ impl Index {
|
|||||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||||
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||||
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
||||||
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key)
|
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||||
@ -1013,7 +1013,7 @@ impl Index {
|
|||||||
let kv = self
|
let kv = self
|
||||||
.documents
|
.documents
|
||||||
.get(rtxn, &BEU32::new(id))?
|
.get(rtxn, &BEU32::new(id))?
|
||||||
.ok_or_else(|| UserError::UnknownInternalDocumentId { document_id: id })?;
|
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
|
||||||
documents.push((id, kv));
|
documents.push((id, kv));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1072,7 +1072,7 @@ impl Index {
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
time: &OffsetDateTime,
|
time: &OffsetDateTime,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, &time)
|
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, time)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
|
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
|
||||||
|
@ -115,7 +115,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
let mut candidates = match (&self.query_tree, candidates) {
|
let mut candidates = match (&self.query_tree, candidates) {
|
||||||
(_, Some(candidates)) => candidates,
|
(_, Some(candidates)) => candidates,
|
||||||
(Some(qt), None) => {
|
(Some(qt), None) => {
|
||||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||||
resolve_query_tree(&context, qt, params.wdcache)?
|
resolve_query_tree(&context, qt, params.wdcache)?
|
||||||
}
|
}
|
||||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||||
|
@ -89,7 +89,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut set_buckets = match self.set_buckets.as_mut() {
|
let set_buckets = match self.set_buckets.as_mut() {
|
||||||
Some(set_buckets) => set_buckets,
|
Some(set_buckets) => set_buckets,
|
||||||
None => {
|
None => {
|
||||||
let new_buckets = initialize_set_buckets(
|
let new_buckets = initialize_set_buckets(
|
||||||
@ -102,7 +102,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
match set_compute_candidates(&mut set_buckets, &allowed_candidates)? {
|
match set_compute_candidates(set_buckets, &allowed_candidates)? {
|
||||||
Some((_score, candidates)) => candidates,
|
Some((_score, candidates)) => candidates,
|
||||||
None => {
|
None => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
@ -199,18 +199,18 @@ impl<'t> QueryPositionIterator<'t> {
|
|||||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||||
inner.push(iter.peekable());
|
inner.push(iter.peekable());
|
||||||
} else {
|
} else {
|
||||||
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?
|
for (word, _) in word_derivations(word, true, 0, ctx.words_fst(), wdcache)?
|
||||||
{
|
{
|
||||||
let iter = ctx.word_position_iterator(&word, in_prefix_cache)?;
|
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||||
inner.push(iter.peekable());
|
inner.push(iter.peekable());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QueryKind::Tolerant { typo, word } => {
|
QueryKind::Tolerant { typo, word } => {
|
||||||
for (word, _) in
|
for (word, _) in
|
||||||
word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
||||||
{
|
{
|
||||||
let iter = ctx.word_position_iterator(&word, in_prefix_cache)?;
|
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||||
inner.push(iter.peekable());
|
inner.push(iter.peekable());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,8 +476,7 @@ fn initialize_linear_buckets(
|
|||||||
} else {
|
} else {
|
||||||
words_positions
|
words_positions
|
||||||
.get(word)
|
.get(word)
|
||||||
.map(|positions| positions.iter().next())
|
.and_then(|positions| positions.iter().next())
|
||||||
.flatten()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QueryKind::Tolerant { typo, word } => {
|
QueryKind::Tolerant { typo, word } => {
|
||||||
@ -574,7 +573,7 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
|||||||
if ops.iter().all(|op| op.query().is_some()) {
|
if ops.iter().all(|op| op.query().is_some()) {
|
||||||
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
||||||
} else {
|
} else {
|
||||||
ops.iter().map(recurse).flatten().collect()
|
ops.iter().flat_map(recurse).collect()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Phrase(words) => {
|
Phrase(words) => {
|
||||||
|
@ -90,7 +90,7 @@ impl Criterion for Geo<'_> {
|
|||||||
let mut candidates = match (&query_tree, candidates) {
|
let mut candidates = match (&query_tree, candidates) {
|
||||||
(_, Some(candidates)) => candidates,
|
(_, Some(candidates)) => candidates,
|
||||||
(Some(qt), None) => {
|
(Some(qt), None) => {
|
||||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||||
resolve_query_tree(&context, qt, params.wdcache)?
|
resolve_query_tree(&context, qt, params.wdcache)?
|
||||||
}
|
}
|
||||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||||
|
@ -44,7 +44,7 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
|
|||||||
let mut candidates = resolve_query_tree(
|
let mut candidates = resolve_query_tree(
|
||||||
self.ctx,
|
self.ctx,
|
||||||
answer.query_tree.as_ref().unwrap(),
|
answer.query_tree.as_ref().unwrap(),
|
||||||
&mut params.wdcache,
|
params.wdcache,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Apply the filters on the documents retrieved with the query tree.
|
// Apply the filters on the documents retrieved with the query tree.
|
||||||
|
@ -186,19 +186,19 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index.word_docids.get(self.rtxn, &word)
|
self.index.word_docids.get(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index.exact_word_docids.get(self.rtxn, &word)
|
self.index.exact_word_docids.get(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index.word_prefix_docids.get(self.rtxn, &word)
|
self.index.word_prefix_docids.get(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exact_word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn exact_word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index.exact_word_prefix_docids.get(self.rtxn, &word)
|
self.index.exact_word_prefix_docids.get(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_pair_proximity_docids(
|
fn word_pair_proximity_docids(
|
||||||
@ -321,7 +321,7 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
exhaustive_number_hits,
|
exhaustive_number_hits,
|
||||||
distinct,
|
distinct,
|
||||||
)) as Box<dyn Criterion>;
|
)) as Box<dyn Criterion>;
|
||||||
for name in self.index.criteria(&self.rtxn)? {
|
for name in self.index.criteria(self.rtxn)? {
|
||||||
criterion = match name {
|
criterion = match name {
|
||||||
Name::Words => Box::new(Words::new(self, criterion)),
|
Name::Words => Box::new(Words::new(self, criterion)),
|
||||||
Name::Typo => Box::new(Typo::new(self, criterion)),
|
Name::Typo => Box::new(Typo::new(self, criterion)),
|
||||||
@ -330,29 +330,23 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
for asc_desc in sort_criteria {
|
for asc_desc in sort_criteria {
|
||||||
criterion = match asc_desc {
|
criterion = match asc_desc {
|
||||||
AscDescName::Asc(Member::Field(field)) => Box::new(AscDesc::asc(
|
AscDescName::Asc(Member::Field(field)) => Box::new(AscDesc::asc(
|
||||||
&self.index,
|
self.index,
|
||||||
&self.rtxn,
|
self.rtxn,
|
||||||
criterion,
|
criterion,
|
||||||
field.to_string(),
|
field.to_string(),
|
||||||
)?),
|
)?),
|
||||||
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
|
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
|
||||||
&self.index,
|
self.index,
|
||||||
&self.rtxn,
|
self.rtxn,
|
||||||
criterion,
|
criterion,
|
||||||
field.to_string(),
|
field.to_string(),
|
||||||
)?),
|
)?),
|
||||||
AscDescName::Asc(Member::Geo(point)) => Box::new(Geo::asc(
|
AscDescName::Asc(Member::Geo(point)) => {
|
||||||
&self.index,
|
Box::new(Geo::asc(self.index, self.rtxn, criterion, *point)?)
|
||||||
&self.rtxn,
|
}
|
||||||
criterion,
|
AscDescName::Desc(Member::Geo(point)) => {
|
||||||
point.clone(),
|
Box::new(Geo::desc(self.index, self.rtxn, criterion, *point)?)
|
||||||
)?),
|
}
|
||||||
AscDescName::Desc(Member::Geo(point)) => Box::new(Geo::desc(
|
|
||||||
&self.index,
|
|
||||||
&self.rtxn,
|
|
||||||
criterion,
|
|
||||||
point.clone(),
|
|
||||||
)?),
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
criterion
|
criterion
|
||||||
@ -363,10 +357,10 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
||||||
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
||||||
Name::Asc(field) => {
|
Name::Asc(field) => {
|
||||||
Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?)
|
Box::new(AscDesc::asc(self.index, self.rtxn, criterion, field)?)
|
||||||
}
|
}
|
||||||
Name::Desc(field) => {
|
Name::Desc(field) => {
|
||||||
Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?)
|
Box::new(AscDesc::desc(self.index, self.rtxn, criterion, field)?)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -408,7 +402,7 @@ pub fn resolve_query_tree(
|
|||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
Phrase(words) => resolve_phrase(ctx, &words),
|
Phrase(words) => resolve_phrase(ctx, words),
|
||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
@ -457,7 +451,7 @@ pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBit
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
||||||
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
|
bitmaps.sort_unstable_by_key(|a| a.len());
|
||||||
|
|
||||||
for bitmap in bitmaps {
|
for bitmap in bitmaps {
|
||||||
if first_iter {
|
if first_iter {
|
||||||
@ -500,40 +494,40 @@ fn query_docids(
|
|||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
match &query.kind {
|
match &query.kind {
|
||||||
QueryKind::Exact { word, original_typo } => {
|
QueryKind::Exact { word, original_typo } => {
|
||||||
if query.prefix && ctx.in_prefix_cache(&word) {
|
if query.prefix && ctx.in_prefix_cache(word) {
|
||||||
let mut docids = ctx.word_prefix_docids(&word)?.unwrap_or_default();
|
let mut docids = ctx.word_prefix_docids(word)?.unwrap_or_default();
|
||||||
// only add the exact docids if the word hasn't been derived
|
// only add the exact docids if the word hasn't been derived
|
||||||
if *original_typo == 0 {
|
if *original_typo == 0 {
|
||||||
docids |= ctx.exact_word_prefix_docids(&word)?.unwrap_or_default();
|
docids |= ctx.exact_word_prefix_docids(word)?.unwrap_or_default();
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else if query.prefix {
|
} else if query.prefix {
|
||||||
let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?;
|
let words = word_derivations(word, true, 0, ctx.words_fst(), wdcache)?;
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (word, _typo) in words {
|
for (word, _typo) in words {
|
||||||
docids |= ctx.word_docids(&word)?.unwrap_or_default();
|
docids |= ctx.word_docids(word)?.unwrap_or_default();
|
||||||
// only add the exact docids if the word hasn't been derived
|
// only add the exact docids if the word hasn't been derived
|
||||||
if *original_typo == 0 {
|
if *original_typo == 0 {
|
||||||
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
|
docids |= ctx.exact_word_docids(word)?.unwrap_or_default();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else {
|
} else {
|
||||||
let mut docids = ctx.word_docids(&word)?.unwrap_or_default();
|
let mut docids = ctx.word_docids(word)?.unwrap_or_default();
|
||||||
// only add the exact docids if the word hasn't been derived
|
// only add the exact docids if the word hasn't been derived
|
||||||
if *original_typo == 0 {
|
if *original_typo == 0 {
|
||||||
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
|
docids |= ctx.exact_word_docids(word)?.unwrap_or_default();
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QueryKind::Tolerant { typo, word } => {
|
QueryKind::Tolerant { typo, word } => {
|
||||||
let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
|
let words = word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (word, typo) in words {
|
for (word, typo) in words {
|
||||||
let mut current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
let mut current_docids = ctx.word_docids(word)?.unwrap_or_default();
|
||||||
if *typo == 0 {
|
if *typo == 0 {
|
||||||
current_docids |= ctx.exact_word_docids(&word)?.unwrap_or_default()
|
current_docids |= ctx.exact_word_docids(word)?.unwrap_or_default()
|
||||||
}
|
}
|
||||||
docids |= current_docids;
|
docids |= current_docids;
|
||||||
}
|
}
|
||||||
@ -585,7 +579,7 @@ fn query_pair_proximity_docids(
|
|||||||
}
|
}
|
||||||
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
|
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
|
||||||
let l_words =
|
let l_words =
|
||||||
word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
|
word_derivations(left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
|
||||||
if prefix {
|
if prefix {
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (left, _) in l_words {
|
for (left, _) in l_words {
|
||||||
|
@ -99,7 +99,7 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
// use set theory based algorithm
|
// use set theory based algorithm
|
||||||
resolve_candidates(
|
resolve_candidates(
|
||||||
self.ctx,
|
self.ctx,
|
||||||
&query_tree,
|
query_tree,
|
||||||
self.proximity,
|
self.proximity,
|
||||||
&mut self.candidates_cache,
|
&mut self.candidates_cache,
|
||||||
params.wdcache,
|
params.wdcache,
|
||||||
@ -194,7 +194,7 @@ fn resolve_candidates<'t>(
|
|||||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
|
|
||||||
match (most_left, most_right) {
|
match (most_left, most_right) {
|
||||||
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, &words)?)],
|
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, words)?)],
|
||||||
_otherwise => Default::default(),
|
_otherwise => Default::default(),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -496,7 +496,7 @@ fn resolve_plane_sweep_candidates(
|
|||||||
match kind {
|
match kind {
|
||||||
QueryKind::Exact { word, .. } => {
|
QueryKind::Exact { word, .. } => {
|
||||||
if *prefix {
|
if *prefix {
|
||||||
let iter = word_derivations(word, true, 0, &words_positions)
|
let iter = word_derivations(word, true, 0, words_positions)
|
||||||
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
||||||
result.extend(iter);
|
result.extend(iter);
|
||||||
} else if let Some(positions) = words_positions.get(word) {
|
} else if let Some(positions) = words_positions.get(word) {
|
||||||
@ -504,7 +504,7 @@ fn resolve_plane_sweep_candidates(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
QueryKind::Tolerant { typo, word } => {
|
QueryKind::Tolerant { typo, word } => {
|
||||||
let iter = word_derivations(word, *prefix, *typo, &words_positions)
|
let iter = word_derivations(word, *prefix, *typo, words_positions)
|
||||||
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
||||||
result.extend(iter);
|
result.extend(iter);
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
let fst = self.ctx.words_fst();
|
let fst = self.ctx.words_fst();
|
||||||
let new_query_tree = match self.typos {
|
let new_query_tree = match self.typos {
|
||||||
typos if typos < MAX_TYPOS_PER_WORD => alterate_query_tree(
|
typos if typos < MAX_TYPOS_PER_WORD => alterate_query_tree(
|
||||||
&fst,
|
fst,
|
||||||
query_tree.clone(),
|
query_tree.clone(),
|
||||||
self.typos,
|
self.typos,
|
||||||
params.wdcache,
|
params.wdcache,
|
||||||
@ -78,7 +78,7 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
|
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
|
||||||
// we keep the altered query tree
|
// we keep the altered query tree
|
||||||
*query_tree = alterate_query_tree(
|
*query_tree = alterate_query_tree(
|
||||||
&fst,
|
fst,
|
||||||
query_tree.clone(),
|
query_tree.clone(),
|
||||||
self.typos,
|
self.typos,
|
||||||
params.wdcache,
|
params.wdcache,
|
||||||
@ -199,7 +199,7 @@ fn alterate_query_tree(
|
|||||||
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
||||||
}
|
}
|
||||||
// Because Phrases don't allow typos, no alteration can be done.
|
// Because Phrases don't allow typos, no alteration can be done.
|
||||||
Phrase(_words) => return Ok(()),
|
Phrase(_words) => Ok(()),
|
||||||
Operation::Query(q) => {
|
Operation::Query(q) => {
|
||||||
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
||||||
// if no typo is allowed we don't call word_derivations function,
|
// if no typo is allowed we don't call word_derivations function,
|
||||||
|
@ -53,10 +53,7 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
let bucket_candidates = self.bucket_candidates.as_mut().map(take);
|
||||||
Some(bucket_candidates) => Some(take(bucket_candidates)),
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
|
@ -66,7 +66,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
match facet_type {
|
match facet_type {
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
|
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
||||||
|
|
||||||
let distribution_prelength = distribution.len();
|
let distribution_prelength = distribution.len();
|
||||||
let db = self.index.field_id_docid_facet_f64s;
|
let db = self.index.field_id_docid_facet_f64s;
|
||||||
@ -91,7 +91,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
}
|
}
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
let mut normalized_distribution = BTreeMap::new();
|
let mut normalized_distribution = BTreeMap::new();
|
||||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
|
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
||||||
|
|
||||||
let db = self.index.field_id_docid_facet_strings;
|
let db = self.index.field_id_docid_facet_strings;
|
||||||
for docid in candidates.into_iter() {
|
for docid in candidates.into_iter() {
|
||||||
|
@ -96,7 +96,7 @@ impl<'a> Filter<'a> {
|
|||||||
Either::Left(array) => {
|
Either::Left(array) => {
|
||||||
let mut ors = vec![];
|
let mut ors = vec![];
|
||||||
for rule in array {
|
for rule in array {
|
||||||
if let Some(filter) = Self::from_str(rule.as_ref())? {
|
if let Some(filter) = Self::from_str(rule)? {
|
||||||
ors.push(filter.condition);
|
ors.push(filter.condition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,7 +108,7 @@ impl<'a> Filter<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Either::Right(rule) => {
|
Either::Right(rule) => {
|
||||||
if let Some(filter) = Self::from_str(rule.as_ref())? {
|
if let Some(filter) = Self::from_str(rule)? {
|
||||||
ands.push(filter.condition);
|
ands.push(filter.condition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -358,7 +358,7 @@ impl<'a> Filter<'a> {
|
|||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
)?;
|
)?;
|
||||||
return Ok(all_ids - selected);
|
Ok(all_ids - selected)
|
||||||
}
|
}
|
||||||
FilterCondition::In { fid, els } => {
|
FilterCondition::In { fid, els } => {
|
||||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||||
@ -377,38 +377,36 @@ impl<'a> Filter<'a> {
|
|||||||
Ok(RoaringBitmap::new())
|
Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||||
attribute: fid.value(),
|
attribute: fid.value(),
|
||||||
filterable_fields: filterable_fields.clone(),
|
filterable_fields: filterable_fields.clone(),
|
||||||
}))?;
|
}))?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FilterCondition::Condition { fid, op } => {
|
FilterCondition::Condition { fid, op } => {
|
||||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||||
Self::evaluate_operator(rtxn, index, fid, &op)
|
Self::evaluate_operator(rtxn, index, fid, op)
|
||||||
} else {
|
} else {
|
||||||
return Ok(RoaringBitmap::new());
|
Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
match fid.lexeme() {
|
match fid.lexeme() {
|
||||||
attribute @ "_geo" => {
|
attribute @ "_geo" => {
|
||||||
return Err(fid.as_external_error(FilterError::BadGeo(attribute)))?;
|
Err(fid.as_external_error(FilterError::BadGeo(attribute)))?
|
||||||
}
|
}
|
||||||
attribute if attribute.starts_with("_geoPoint(") => {
|
attribute if attribute.starts_with("_geoPoint(") => {
|
||||||
return Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?;
|
Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?
|
||||||
}
|
}
|
||||||
attribute @ "_geoDistance" => {
|
attribute @ "_geoDistance" => {
|
||||||
return Err(fid.as_external_error(FilterError::Reserved(attribute)))?;
|
Err(fid.as_external_error(FilterError::Reserved(attribute)))?
|
||||||
}
|
}
|
||||||
attribute => {
|
attribute => {
|
||||||
return Err(fid.as_external_error(
|
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||||
FilterError::AttributeNotFilterable {
|
|
||||||
attribute,
|
attribute,
|
||||||
filterable_fields: filterable_fields.clone(),
|
filterable_fields: filterable_fields.clone(),
|
||||||
},
|
}))?
|
||||||
))?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -477,10 +475,10 @@ impl<'a> Filter<'a> {
|
|||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
} else {
|
} else {
|
||||||
return Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
||||||
attribute: "_geo",
|
attribute: "_geo",
|
||||||
filterable_fields: filterable_fields.clone(),
|
filterable_fields: filterable_fields.clone(),
|
||||||
}))?;
|
}))?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -44,7 +44,7 @@ impl<'a> Iterator for MatchesIter<'a, '_> {
|
|||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self.inner.next() {
|
match self.inner.next() {
|
||||||
Some((matching_words, ids)) => match matching_words[0].match_token(&self.token) {
|
Some((matching_words, ids)) => match matching_words[0].match_token(self.token) {
|
||||||
Some(char_len) => {
|
Some(char_len) => {
|
||||||
if matching_words.len() > 1 {
|
if matching_words.len() > 1 {
|
||||||
Some(MatchType::Partial(PartialMatch {
|
Some(MatchType::Partial(PartialMatch {
|
||||||
|
@ -49,16 +49,16 @@ impl<'a, A> MatcherBuilder<'a, A> {
|
|||||||
pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> {
|
pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> {
|
||||||
let crop_marker = match &self.crop_marker {
|
let crop_marker = match &self.crop_marker {
|
||||||
Some(marker) => marker.as_str(),
|
Some(marker) => marker.as_str(),
|
||||||
None => &DEFAULT_CROP_MARKER,
|
None => DEFAULT_CROP_MARKER,
|
||||||
};
|
};
|
||||||
|
|
||||||
let highlight_prefix = match &self.highlight_prefix {
|
let highlight_prefix = match &self.highlight_prefix {
|
||||||
Some(marker) => marker.as_str(),
|
Some(marker) => marker.as_str(),
|
||||||
None => &DEFAULT_HIGHLIGHT_PREFIX,
|
None => DEFAULT_HIGHLIGHT_PREFIX,
|
||||||
};
|
};
|
||||||
let highlight_suffix = match &self.highlight_suffix {
|
let highlight_suffix = match &self.highlight_suffix {
|
||||||
Some(marker) => marker.as_str(),
|
Some(marker) => marker.as_str(),
|
||||||
None => &DEFAULT_HIGHLIGHT_SUFFIX,
|
None => DEFAULT_HIGHLIGHT_SUFFIX,
|
||||||
};
|
};
|
||||||
Matcher {
|
Matcher {
|
||||||
text,
|
text,
|
||||||
@ -95,7 +95,7 @@ pub struct Match {
|
|||||||
token_position: usize,
|
token_position: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct MatchBounds {
|
pub struct MatchBounds {
|
||||||
pub start: usize,
|
pub start: usize,
|
||||||
pub length: usize,
|
pub length: usize,
|
||||||
@ -131,7 +131,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
|
|||||||
potential_matches.push((token_position, word_position, partial.char_len()));
|
potential_matches.push((token_position, word_position, partial.char_len()));
|
||||||
|
|
||||||
for (token_position, word_position, word) in words_positions {
|
for (token_position, word_position, word) in words_positions {
|
||||||
partial = match partial.match_token(&word) {
|
partial = match partial.match_token(word) {
|
||||||
// token matches the partial match, but the match is not full,
|
// token matches the partial match, but the match is not full,
|
||||||
// we temporarly save the current token then we try to match the next one.
|
// we temporarly save the current token then we try to match the next one.
|
||||||
Some(MatchType::Partial(partial)) => {
|
Some(MatchType::Partial(partial)) => {
|
||||||
|
@ -188,8 +188,8 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||||
let one = self.index.min_word_len_one_typo(&self.rtxn)?;
|
let one = self.index.min_word_len_one_typo(self.rtxn)?;
|
||||||
let two = self.index.min_word_len_two_typos(&self.rtxn)?;
|
let two = self.index.min_word_len_two_typos(self.rtxn)?;
|
||||||
Ok((one, two))
|
Ok((one, two))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,7 +207,7 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
|||||||
self.index
|
self.index
|
||||||
.word_pair_proximity_docids
|
.word_pair_proximity_docids
|
||||||
.remap_data_type::<CboRoaringBitmapLenCodec>()
|
.remap_data_type::<CboRoaringBitmapLenCodec>()
|
||||||
.get(&self.rtxn, &key)
|
.get(self.rtxn, &key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -313,7 +313,7 @@ pub struct TypoConfig<'a> {
|
|||||||
|
|
||||||
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
||||||
/// and the provided word length.
|
/// and the provided word length.
|
||||||
fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind {
|
fn typos(word: String, authorize_typos: bool, config: TypoConfig<'_>) -> QueryKind {
|
||||||
if authorize_typos && !config.exact_words.map_or(false, |s| s.contains(&word)) {
|
if authorize_typos && !config.exact_words.map_or(false, |s| s.contains(&word)) {
|
||||||
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
||||||
if count < config.word_len_one_typo {
|
if count < config.word_len_one_typo {
|
||||||
@ -556,7 +556,7 @@ fn create_matching_words(
|
|||||||
for synonym in synonyms {
|
for synonym in synonyms {
|
||||||
let synonym = synonym
|
let synonym = synonym
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|syn| MatchingWord::new(syn.to_string(), 0, false))
|
.map(|syn| MatchingWord::new(syn, 0, false))
|
||||||
.collect();
|
.collect();
|
||||||
matching_words.push((synonym, vec![id]));
|
matching_words.push((synonym, vec![id]));
|
||||||
}
|
}
|
||||||
@ -583,8 +583,7 @@ fn create_matching_words(
|
|||||||
PrimitiveQueryPart::Phrase(words) => {
|
PrimitiveQueryPart::Phrase(words) => {
|
||||||
let ids: Vec<_> =
|
let ids: Vec<_> =
|
||||||
(0..words.len()).into_iter().map(|i| id + i as PrimitiveWordId).collect();
|
(0..words.len()).into_iter().map(|i| id + i as PrimitiveWordId).collect();
|
||||||
let words =
|
let words = words.into_iter().map(|w| MatchingWord::new(w, 0, false)).collect();
|
||||||
words.into_iter().map(|w| MatchingWord::new(w.to_string(), 0, false)).collect();
|
|
||||||
matching_words.push((words, ids));
|
matching_words.push((words, ids));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -639,7 +638,7 @@ fn create_matching_words(
|
|||||||
for synonym in synonyms {
|
for synonym in synonyms {
|
||||||
let synonym = synonym
|
let synonym = synonym
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|syn| MatchingWord::new(syn.to_string(), 0, false))
|
.map(|syn| MatchingWord::new(syn, 0, false))
|
||||||
.collect();
|
.collect();
|
||||||
matching_words.push((synonym, ids.clone()));
|
matching_words.push((synonym, ids.clone()));
|
||||||
}
|
}
|
||||||
|
@ -127,7 +127,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// the `soft_deleted_documents_ids` bitmap and early exit.
|
// the `soft_deleted_documents_ids` bitmap and early exit.
|
||||||
let size_used = self.index.used_size()?;
|
let size_used = self.index.used_size()?;
|
||||||
let map_size = self.index.env.map_size()? as u64;
|
let map_size = self.index.env.map_size()? as u64;
|
||||||
let nb_documents = self.index.number_of_documents(&self.wtxn)?;
|
let nb_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
let nb_soft_deleted = soft_deleted_docids.len();
|
let nb_soft_deleted = soft_deleted_docids.len();
|
||||||
|
|
||||||
let percentage_available = 100 - (size_used * 100 / map_size);
|
let percentage_available = 100 - (size_used * 100 / map_size);
|
||||||
@ -158,11 +158,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// and we can reset the soft deleted bitmap
|
// and we can reset the soft deleted bitmap
|
||||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
||||||
|
|
||||||
let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
|
let primary_key =
|
||||||
InternalError::DatabaseMissingEntry {
|
self.index.primary_key(self.wtxn)?.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
db_name: db_name::MAIN,
|
db_name: db_name::MAIN,
|
||||||
key: Some(main_key::PRIMARY_KEY_KEY),
|
key: Some(main_key::PRIMARY_KEY_KEY),
|
||||||
}
|
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// Since we already checked if the DB was empty, if we can't find the primary key, then
|
// Since we already checked if the DB was empty, if we can't find the primary key, then
|
||||||
@ -433,7 +432,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
.map(|point| (point, point.data.0))
|
.map(|point| (point, point.data.0))
|
||||||
.unzip();
|
.unzip();
|
||||||
points_to_remove.iter().for_each(|point| {
|
points_to_remove.iter().for_each(|point| {
|
||||||
rtree.remove(&point);
|
rtree.remove(point);
|
||||||
});
|
});
|
||||||
geo_faceted_doc_ids -= docids_to_remove;
|
geo_faceted_doc_ids -= docids_to_remove;
|
||||||
|
|
||||||
@ -534,7 +533,7 @@ fn remove_from_word_docids(
|
|||||||
// We create an iterator to be able to get the content and delete the word docids.
|
// We create an iterator to be able to get the content and delete the word docids.
|
||||||
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
|
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
|
||||||
// the LMDB B-Tree two times but only once.
|
// the LMDB B-Tree two times but only once.
|
||||||
let mut iter = db.prefix_iter_mut(txn, &word)?;
|
let mut iter = db.prefix_iter_mut(txn, word)?;
|
||||||
if let Some((key, mut docids)) = iter.next().transpose()? {
|
if let Some((key, mut docids)) = iter.next().transpose()? {
|
||||||
if key == word {
|
if key == word {
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
@ -597,7 +596,7 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
|||||||
// level key. We must then parse the value using the appropriate codec.
|
// level key. We must then parse the value using the appropriate codec.
|
||||||
let (group, mut docids) =
|
let (group, mut docids) =
|
||||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
||||||
.ok_or_else(|| SerializationError::Decoding { db_name })?;
|
.ok_or(SerializationError::Decoding { db_name })?;
|
||||||
|
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids -= to_remove;
|
docids -= to_remove;
|
||||||
@ -609,7 +608,7 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
|||||||
let val = &(group, docids);
|
let val = &(group, docids);
|
||||||
let value_bytes =
|
let value_bytes =
|
||||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
||||||
.ok_or_else(|| SerializationError::Encoding { db_name })?;
|
.ok_or(SerializationError::Encoding { db_name })?;
|
||||||
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.put_current(&key, &value_bytes)? };
|
unsafe { iter.put_current(&key, &value_bytes)? };
|
||||||
@ -619,7 +618,7 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
|||||||
// The key corresponds to a level zero facet string.
|
// The key corresponds to a level zero facet string.
|
||||||
let (original_value, mut docids) =
|
let (original_value, mut docids) =
|
||||||
FacetStringLevelZeroValueCodec::bytes_decode(val)
|
FacetStringLevelZeroValueCodec::bytes_decode(val)
|
||||||
.ok_or_else(|| SerializationError::Decoding { db_name })?;
|
.ok_or(SerializationError::Decoding { db_name })?;
|
||||||
|
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids -= to_remove;
|
docids -= to_remove;
|
||||||
@ -630,7 +629,7 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
|||||||
let key = key.to_owned();
|
let key = key.to_owned();
|
||||||
let val = &(original_value, docids);
|
let val = &(original_value, docids);
|
||||||
let value_bytes = FacetStringLevelZeroValueCodec::bytes_encode(val)
|
let value_bytes = FacetStringLevelZeroValueCodec::bytes_encode(val)
|
||||||
.ok_or_else(|| SerializationError::Encoding { db_name })?;
|
.ok_or(SerializationError::Encoding { db_name })?;
|
||||||
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.put_current(&key, &value_bytes)? };
|
unsafe { iter.put_current(&key, &value_bytes)? };
|
||||||
|
@ -262,8 +262,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
/// 1. a vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
|
/// 1. a vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
|
||||||
/// that must be inserted into the database.
|
/// that must be inserted into the database.
|
||||||
/// 2. a roaring bitmap of all the document ids present in the database
|
/// 2. a roaring bitmap of all the document ids present in the database
|
||||||
fn compute_facet_number_levels<'t>(
|
fn compute_facet_number_levels(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'_ heed::RoTxn,
|
||||||
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
||||||
compression_type: CompressionType,
|
compression_type: CompressionType,
|
||||||
compression_level: Option<u32>,
|
compression_level: Option<u32>,
|
||||||
@ -496,7 +496,7 @@ where
|
|||||||
bitmaps.clear();
|
bitmaps.clear();
|
||||||
}
|
}
|
||||||
// level 0 is already stored in the DB
|
// level 0 is already stored in the DB
|
||||||
return Ok(vec![]);
|
Ok(vec![])
|
||||||
} else {
|
} else {
|
||||||
// level >= 1
|
// level >= 1
|
||||||
// we compute each element of this level based on the elements of the level below it
|
// we compute each element of this level based on the elements of the level below it
|
||||||
@ -562,7 +562,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
sub_writers.push(writer_into_reader(cur_writer)?);
|
sub_writers.push(writer_into_reader(cur_writer)?);
|
||||||
return Ok(sub_writers);
|
Ok(sub_writers)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -598,7 +598,7 @@ fn write_number_entry(
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let key = (field_id, level, left, right);
|
let key = (field_id, level, left, right);
|
||||||
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||||
let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?;
|
let data = CboRoaringBitmapCodec::bytes_encode(ids).ok_or(Error::Encoding)?;
|
||||||
writer.insert(&key, &data)?;
|
writer.insert(&key, &data)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,7 @@ fn fetch_or_generate_document_id(
|
|||||||
}
|
}
|
||||||
None => Ok(Err(UserError::MissingDocumentId {
|
None => Ok(Err(UserError::MissingDocumentId {
|
||||||
primary_key: primary_key.to_string(),
|
primary_key: primary_key.to_string(),
|
||||||
document: obkv_to_object(&document, &documents_batch_index)?,
|
document: obkv_to_object(document, documents_batch_index)?,
|
||||||
})),
|
})),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -156,7 +156,7 @@ fn fetch_or_generate_document_id(
|
|||||||
if matching_documents_ids.len() >= 2 {
|
if matching_documents_ids.len() >= 2 {
|
||||||
return Ok(Err(UserError::TooManyDocumentIds {
|
return Ok(Err(UserError::TooManyDocumentIds {
|
||||||
primary_key: nested.name().to_string(),
|
primary_key: nested.name().to_string(),
|
||||||
document: obkv_to_object(&document, &documents_batch_index)?,
|
document: obkv_to_object(document, documents_batch_index)?,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -170,7 +170,7 @@ fn fetch_or_generate_document_id(
|
|||||||
},
|
},
|
||||||
None => Ok(Err(UserError::MissingDocumentId {
|
None => Ok(Err(UserError::MissingDocumentId {
|
||||||
primary_key: nested.name().to_string(),
|
primary_key: nested.name().to_string(),
|
||||||
document: obkv_to_object(&document, &documents_batch_index)?,
|
document: obkv_to_object(document, documents_batch_index)?,
|
||||||
})),
|
})),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -313,7 +313,7 @@ pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String
|
|||||||
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
|
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
|
||||||
},
|
},
|
||||||
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
|
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
|
||||||
content => Ok(Err(UserError::InvalidDocumentId { document_id: content.clone() })),
|
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Value::String(string) = value {
|
if let Value::String(string) = value {
|
||||||
Some(&string)
|
Some(string)
|
||||||
} else if inner(value, buffer) {
|
} else if inner(value, buffer) {
|
||||||
Some(buffer)
|
Some(buffer)
|
||||||
} else {
|
} else {
|
||||||
|
@ -67,7 +67,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
facet_exists_docids.entry(field_id).or_default().insert(document);
|
facet_exists_docids.entry(field_id).or_default().insert(document);
|
||||||
|
|
||||||
// For the other extraction tasks, prefix the key with the field_id and the document_id
|
// For the other extraction tasks, prefix the key with the field_id and the document_id
|
||||||
key_buffer.extend_from_slice(&docid_bytes);
|
key_buffer.extend_from_slice(docid_bytes);
|
||||||
|
|
||||||
let value =
|
let value =
|
||||||
serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||||
@ -107,8 +107,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;
|
let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
sorter_into_reader(fid_docid_facet_numbers_sorter, indexer.clone())?,
|
sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
|
||||||
sorter_into_reader(fid_docid_facet_strings_sorter, indexer.clone())?,
|
sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
|
||||||
facet_exists_docids_reader,
|
facet_exists_docids_reader,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
@ -150,7 +150,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_fid_facet_numbers_chunks,
|
docid_fid_facet_numbers_chunks,
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx,
|
||||||
extract_facet_number_docids,
|
extract_facet_number_docids,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
TypedChunk::FieldIdFacetNumberDocids,
|
TypedChunk::FieldIdFacetNumberDocids,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user