Fix the tests and remaining warnings

This commit is contained in:
Clément Renault 2021-05-03 15:58:47 +02:00 committed by Kerollmops
parent 02c655ff1a
commit 3a4a150ef0
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
9 changed files with 88 additions and 89 deletions

View File

@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::mem::take;
use anyhow::Context;
@ -7,11 +6,10 @@ use log::debug;
use ordered_float::OrderedFloat;
use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::FacetIter;
use crate::search::query_tree::Operation;
use crate::{FieldsIdsMap, FieldId, Index};
use crate::{FieldId, Index};
use super::{Criterion, CriterionParameters, CriterionResult};
/// Threshold on the number of candidates that will make
@ -119,7 +117,6 @@ impl<'t> Criterion for AscDesc<'t> {
self.index,
self.rtxn,
self.field_id,
self.facet_type,
self.ascending,
candidates,
)?;
@ -141,20 +138,6 @@ impl<'t> Criterion for AscDesc<'t> {
}
}
fn field_id_facet_type(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<String, FacetType>,
field: &str,
) -> anyhow::Result<(FieldId, FacetType)> {
let id = fields_ids_map
.id(field)
.with_context(|| format!("field {:?} isn't registered", field))?;
let facet_type = faceted_fields
.get(field)
.with_context(|| format!("field {:?} isn't faceted", field))?;
Ok((id, *facet_type))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order.
///
/// It will either use an iterative or a recursive method on the whole facet database depending

View File

@ -189,23 +189,21 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
#[cfg(test)]
mod test {
use std::collections::HashMap;
use std::collections::HashSet;
use super::super::test::{generate_index, validate_distinct_candidates};
use super::*;
use crate::facet::FacetType;
macro_rules! test_facet_distinct {
($name:ident, $distinct:literal, $facet_type:expr) => {
($name:ident, $distinct:literal) => {
#[test]
fn $name() {
use std::iter::FromIterator;
let facets =
HashMap::from_iter(Some(($distinct.to_string(), $facet_type.to_string())));
let facets = HashSet::from_iter(Some(($distinct.to_string())));
let (index, fid, candidates) = generate_index($distinct, facets);
let txn = index.read_txn().unwrap();
let mut map_distinct = FacetDistinct::new(fid, &index, &txn, $facet_type);
let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
let excluded = RoaringBitmap::new();
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
@ -215,7 +213,7 @@ mod test {
};
}
test_facet_distinct!(test_string, "txt", FacetType::String);
test_facet_distinct!(test_strings, "txts", FacetType::String);
test_facet_distinct!(test_number, "cat-int", FacetType::Number);
test_facet_distinct!(test_string, "txt");
test_facet_distinct!(test_strings, "txts");
test_facet_distinct!(test_number, "cat-int");
}

View File

@ -110,7 +110,7 @@ impl<'a, 'b> Distinct<'b> for MapDistinct<'a> {
#[cfg(test)]
mod test {
use std::collections::HashMap;
use std::collections::HashSet;
use super::*;
use super::super::test::{generate_index, validate_distinct_candidates};
@ -119,7 +119,7 @@ mod test {
($name:ident, $distinct:literal) => {
#[test]
fn $name() {
let (index, fid, candidates) = generate_index($distinct, HashMap::new());
let (index, fid, candidates) = generate_index($distinct, HashSet::new());
let txn = index.read_txn().unwrap();
let mut map_distinct = MapDistinct::new(fid, &index, &txn);
let excluded = RoaringBitmap::new();

View File

@ -28,7 +28,7 @@ pub trait Distinct<'a> {
#[cfg(test)]
mod test {
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use once_cell::sync::Lazy;
use rand::{seq::SliceRandom, Rng};
@ -74,7 +74,7 @@ mod test {
/// Returns a temporary index populated with random test documents, the FieldId for the
/// distinct attribute, and the RoaringBitmap with the document ids.
pub(crate) fn generate_index(distinct: &str, facets: HashMap<String, String>) -> (TempIndex, FieldId, RoaringBitmap) {
pub(crate) fn generate_index(distinct: &str, facets: HashSet<String>) -> (TempIndex, FieldId, RoaringBitmap) {
let index = TempIndex::new();
let mut txn = index.write_txn().unwrap();

View File

@ -240,7 +240,10 @@ impl FacetCondition {
let value = items.next().unwrap();
let (result, svalue) = pest_parse(value);
Ok(Operator(fid, Equal(Some(result?), svalue)))
// TODO we must normalize instead of lowercase.
let svalue = svalue.to_lowercase();
Ok(Operator(fid, Equal(result.ok(), svalue)))
}
fn greater_than(
@ -473,7 +476,8 @@ mod tests {
use super::*;
use crate::update::Settings;
use heed::EnvOpenOptions;
use maplit::hashmap;
use maplit::hashset;
use big_s::S;
#[test]
fn string() {
@ -485,22 +489,22 @@ mod tests {
// Set the faceted fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashmap!{ "channel".into() => "string".into() });
builder.set_faceted_fields(hashset!{ S("channel") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
// Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap();
let expected = OperatorString(0, FacetStringOperator::equal("Ponce"));
let condition = FacetCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap();
let expected = Operator(0, Operator::Equal(None, S("ponce")));
assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
assert_eq!(condition, expected);
}
@ -514,20 +518,20 @@ mod tests {
// Set the faceted fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashmap!{ "timestamp".into() => "number".into() });
builder.set_faceted_fields(hashset!{ "timestamp".into() });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
// Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
let expected = OperatorNumber(0, Between(22.0, 44.0));
let expected = Operator(0, Between(22.0, 44.0));
assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
let expected = Or(
Box::new(OperatorNumber(0, LowerThan(22.0))),
Box::new(OperatorNumber(0, GreaterThan(44.0))),
Box::new(Operator(0, LowerThan(22.0))),
Box::new(Operator(0, GreaterThan(44.0))),
);
assert_eq!(condition, expected);
}
@ -542,11 +546,8 @@ mod tests {
// Set the faceted fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
builder.set_faceted_fields(hashmap!{
"channel".into() => "string".into(),
"timestamp".into() => "number".into(),
});
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
@ -557,10 +558,10 @@ mod tests {
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
).unwrap();
let expected = Or(
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
Box::new(Operator(0, Operator::Equal(None, S("gotaga")))),
Box::new(And(
Box::new(OperatorNumber(1, Between(22.0, 44.0))),
Box::new(OperatorString(0, FacetStringOperator::not_equal("ponce"))),
Box::new(Operator(1, Between(22.0, 44.0))),
Box::new(Operator(0, Operator::NotEqual(None, S("ponce")))),
))
);
assert_eq!(condition, expected);
@ -570,13 +571,13 @@ mod tests {
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
).unwrap();
let expected = Or(
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
Box::new(Operator(0, Operator::Equal(None, S("gotaga")))),
Box::new(Or(
Box::new(Or(
Box::new(OperatorNumber(1, LowerThan(22.0))),
Box::new(OperatorNumber(1, GreaterThan(44.0))),
Box::new(Operator(1, LowerThan(22.0))),
Box::new(Operator(1, GreaterThan(44.0))),
)),
Box::new(OperatorString(0, FacetStringOperator::equal("ponce"))),
Box::new(Operator(0, Operator::Equal(None, S("ponce")))),
)),
);
assert_eq!(condition, expected);
@ -592,11 +593,8 @@ mod tests {
// Set the faceted fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
builder.set_faceted_fields(hashmap!{
"channel".into() => "string".into(),
"timestamp".into() => "number".into(),
});
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
@ -604,7 +602,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_array(
&rtxn, &index,
vec![Either::Right("channel:gotaga"), Either::Left(vec!["timestamp:44", "channel:-ponce"])],
vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])],
).unwrap().unwrap();
let expected = FacetCondition::from_str(
&rtxn, &index,

View File

@ -118,8 +118,10 @@ mod tests {
assert!(index.docid_word_positions.is_empty(&rtxn).unwrap());
assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap());
assert!(index.facet_field_id_value_docids.is_empty(&rtxn).unwrap());
assert!(index.field_id_docid_facet_values.is_empty(&rtxn).unwrap());
assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
assert!(index.field_id_docid_facet_strings.is_empty(&rtxn).unwrap());
assert!(index.documents.is_empty(&rtxn).unwrap());
}
}

View File

@ -450,8 +450,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
.enumerate()
.map(|(i, documents)| {
let store = Store::new(
primary_key.clone(),
fields_ids_map.clone(),
searchable_fields.clone(),
faceted_fields.clone(),
linked_hash_map_size,
@ -553,7 +551,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
docid_word_positions_readers,
documents_readers,
words_pairs_proximities_docids_readers,
facet_field_numbers_docids_readers,
facet_field_strings_docids_readers,
field_id_docid_facet_numbers_readers,
field_id_docid_facet_strings_readers,
@ -565,7 +562,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
docid_word_positions_readers,
documents_readers,
words_pairs_proximities_docids_readers,
facet_field_numbers_docids_readers,
facet_field_strings_docids_readers,
field_id_docid_facet_numbers_readers,
field_id_docid_facet_strings_readers,
@ -599,7 +595,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
let mut database_count = 0;
let total_databases = 8;
let total_databases = 10;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: 0,
@ -636,6 +632,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
total_databases,
});
debug!("Writing the facet id string docids into LMDB on disk...");
merge_into_lmdb_database(
self.wtxn,
*self.index.facet_id_string_docids.as_polymorph(),
facet_field_strings_docids_readers,
facet_field_value_docids_merge,
write_method,
)?;
database_count += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: database_count,
total_databases,
});
debug!("Writing the field id docid facet numbers into LMDB on disk...");
merge_into_lmdb_database(
self.wtxn,

View File

@ -23,7 +23,7 @@ use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::update::UpdateIndexingStep;
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap};
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId};
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
use super::merge_function::{
@ -53,8 +53,6 @@ pub struct Readers {
pub struct Store<'s, A> {
// Indexing parameters
primary_key: String,
fields_ids_map: FieldsIdsMap,
searchable_fields: HashSet<FieldId>,
faceted_fields: HashSet<FieldId>,
// Caches
@ -87,8 +85,6 @@ pub struct Store<'s, A> {
impl<'s, A: AsRef<[u8]>> Store<'s, A> {
pub fn new(
primary_key: String,
fields_ids_map: FieldsIdsMap,
searchable_fields: HashSet<FieldId>,
faceted_fields: HashSet<FieldId>,
linked_hash_map_size: Option<usize>,
@ -184,8 +180,6 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(Store {
// Indexing parameters.
primary_key,
fields_ids_map,
searchable_fields,
faceted_fields,
// Caches

View File

@ -1,5 +1,4 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::str::FromStr;
use anyhow::Context;
use chrono::Utc;
@ -443,9 +442,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
#[cfg(test)]
mod tests {
use heed::EnvOpenOptions;
use maplit::{btreeset, hashmap};
use heed::types::ByteSlice;
use maplit::{btreeset, hashmap, hashset};
use big_s::S;
use crate::facet::FacetType;
use crate::update::{IndexDocuments, UpdateFormat};
use super::*;
@ -620,37 +620,53 @@ mod tests {
// Set the faceted fields to be the age.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashmap!{ "age".into() => "number".into() });
builder.set_faceted_fields(hashset!{ S("age") });
builder.execute(|_, _| ()).unwrap();
// Then index some documents.
let content = &b"name,age\nkevin,23\nkevina,21\nbenoit,34\n"[..];
let content = &br#"[
{ "name": "kevin", "age": 23 },
{ "name": "kevina", "age": 21 },
{ "name": "benoit", "age": 34 }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
builder.update_format(UpdateFormat::Json);
builder.enable_autogenerate_docids();
builder.update_format(UpdateFormat::Csv);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.faceted_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Number });
assert_eq!(fields_ids, hashset!{ S("age") });
// Only count the field_id 0 and level 0 facet values.
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
// TODO we must support typed CSVs for numbers to be understood.
let count = index.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
assert_eq!(count, 3);
drop(rtxn);
// Index a little more documents with new and current facets values.
let mut wtxn = index.write_txn().unwrap();
let content = &b"name,age\nkevin2,23\nkevina2,21\nbenoit2,35\n"[..];
let content = &br#"[
{ "name": "kevin2", "age": 23 },
{ "name": "kevina2", "age": 21 },
{ "name": "benoit", "age": 35 }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 2);
builder.update_format(UpdateFormat::Csv);
builder.enable_autogenerate_docids();
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
// Only count the field_id 0 and level 0 facet values.
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
// TODO we must support typed CSVs for numbers to be understood.
let count = index.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
assert_eq!(count, 4);
}
@ -817,10 +833,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_displayed_fields(vec!["hello".to_string()]);
builder.set_faceted_fields(hashmap!{
"age".into() => "number".into(),
"toto".into() => "number".into(),
});
builder.set_faceted_fields(hashset!{ S("age"), S("toto") });
builder.set_criteria(vec!["asc(toto)".to_string()]);
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();