Add snapshot tests for indexing of word_prefix_pair_proximity_docids

This commit is contained in:
Loïc Lecrenier 2022-08-03 16:49:03 +02:00
parent 3a734af159
commit 6066256689
5 changed files with 195 additions and 13 deletions

View File

@ -190,17 +190,6 @@ pub fn documents_batch_reader_from_objects(
DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
} }
#[cfg(test)]
pub fn batch_reader_from_documents(
documents: &[Object],
) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
let mut builder = DocumentsBatchBuilder::new(Vec::new());
for object in documents {
builder.append_json_object(&object).unwrap();
}
DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::io::Cursor; use std::io::Cursor;

View File

@ -347,7 +347,11 @@ fn write_string_entry(
mod tests { mod tests {
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex}; use crate::{
db_snap,
documents::{batch_reader_from_documents, documents_batch_reader_from_objects},
index::tests::TempIndex,
};
#[test] #[test]
fn test_facets_number() { fn test_facets_number() {
@ -419,7 +423,7 @@ mod tests {
serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(), serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(),
); );
} }
let documents = batch_reader_from_documents(&documents); let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap(); index.add_documents(documents).unwrap();

View File

@ -0,0 +1,46 @@
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5 a 1 [101, ]
5 a 2 [101, ]
5 b 4 [101, ]
5 be 4 [101, ]
am a 3 [101, ]
amazing a 1 [100, ]
amazing a 2 [100, ]
amazing a 3 [100, ]
amazing b 2 [100, ]
amazing be 2 [100, ]
an a 1 [100, ]
an a 2 [100, ]
an b 3 [100, ]
an be 3 [100, ]
and a 2 [100, ]
and a 3 [100, ]
and a 4 [100, ]
and b 1 [100, ]
and be 1 [100, ]
at a 1 [100, ]
at a 2 [100, 101, ]
at a 3 [100, ]
at b 3 [101, ]
at b 4 [100, ]
at be 3 [101, ]
at be 4 [100, ]
beautiful a 2 [100, ]
beautiful a 3 [100, ]
beautiful a 4 [100, ]
bell a 2 [101, ]
bell a 4 [101, ]
house a 3 [100, ]
house a 4 [100, ]
house b 2 [100, ]
house be 2 [100, ]
rings a 1 [101, ]
rings a 3 [101, ]
rings b 2 [101, ]
rings be 2 [101, ]
the a 3 [101, ]
the b 1 [101, ]
the be 1 [101, ]

View File

@ -0,0 +1,56 @@
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5 a 1 [101, ]
5 a 2 [101, ]
5 am 1 [101, ]
5 b 4 [101, ]
5 be 4 [101, ]
am a 3 [101, ]
amazing a 1 [100, ]
amazing a 2 [100, ]
amazing a 3 [100, ]
amazing b 2 [100, ]
amazing be 2 [100, ]
an a 1 [100, ]
an a 2 [100, 202, ]
an am 1 [100, ]
an b 3 [100, ]
an be 3 [100, ]
and a 2 [100, ]
and a 3 [100, ]
and a 4 [100, ]
and am 2 [100, ]
and b 1 [100, ]
and be 1 [100, ]
at a 1 [100, 202, ]
at a 2 [100, 101, ]
at a 3 [100, ]
at am 2 [100, 101, ]
at b 3 [101, ]
at b 4 [100, ]
at be 3 [101, ]
at be 4 [100, ]
beautiful a 2 [100, ]
beautiful a 3 [100, ]
beautiful a 4 [100, ]
beautiful am 3 [100, ]
bell a 2 [101, ]
bell a 4 [101, ]
bell am 4 [101, ]
extraordinary a 2 [202, ]
extraordinary a 3 [202, ]
house a 3 [100, 202, ]
house a 4 [100, 202, ]
house am 4 [100, ]
house b 2 [100, ]
house be 2 [100, ]
rings a 1 [101, ]
rings a 3 [101, ]
rings am 3 [101, ]
rings b 2 [101, ]
rings be 2 [101, ]
the a 3 [101, ]
the b 1 [101, ]
the be 1 [101, ]

View File

@ -244,3 +244,90 @@ fn insert_current_prefix_data_in_sorter<'a>(
Ok(()) Ok(())
} }
#[cfg(test)]
mod tests {
use std::io::Cursor;
use crate::{
db_snap,
documents::{DocumentsBatchBuilder, DocumentsBatchReader},
index::tests::TempIndex,
};
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
let mut documents = Vec::new();
for prefix in prefixes {
for i in 0..50 {
documents.push(
serde_json::json!({
"text": format!("{prefix}{i:x}"),
})
.as_object()
.unwrap()
.clone(),
)
}
}
documents
}
#[test]
fn test_update() {
let mut index = TempIndex::new();
index.index_documents_config.words_prefix_threshold = Some(50);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| {
settings.set_searchable_fields(vec!["text".to_owned()]);
})
.unwrap();
let batch_reader_from_documents = |documents| {
let mut builder = DocumentsBatchBuilder::new(Vec::new());
for object in documents {
builder.append_json_object(&object).unwrap();
}
DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
};
let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"]);
// now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
documents.push(
serde_json::json!({
"text": "At an amazing and beautiful house"
})
.as_object()
.unwrap()
.clone(),
);
documents.push(
serde_json::json!({
"text": "The bell rings at 5 am"
})
.as_object()
.unwrap()
.clone(),
);
let documents = batch_reader_from_documents(documents);
index.add_documents(documents).unwrap();
db_snap!(index, word_prefix_pair_proximity_docids, "initial");
let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"]);
documents.push(
serde_json::json!({
"text": "At an extraordinary house"
})
.as_object()
.unwrap()
.clone(),
);
let documents = batch_reader_from_documents(documents);
index.add_documents(documents).unwrap();
db_snap!(index, word_prefix_pair_proximity_docids, "update");
}
}