mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Add snapshot tests for indexing of word_prefix_pair_proximity_docids
This commit is contained in:
parent
3a734af159
commit
6066256689
@ -190,17 +190,6 @@ pub fn documents_batch_reader_from_objects(
|
|||||||
DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
|
DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn batch_reader_from_documents(
|
|
||||||
documents: &[Object],
|
|
||||||
) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
|
||||||
for object in documents {
|
|
||||||
builder.append_json_object(&object).unwrap();
|
|
||||||
}
|
|
||||||
DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
|
@ -347,7 +347,11 @@ fn write_string_entry(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex};
|
use crate::{
|
||||||
|
db_snap,
|
||||||
|
documents::{batch_reader_from_documents, documents_batch_reader_from_objects},
|
||||||
|
index::tests::TempIndex,
|
||||||
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_facets_number() {
|
fn test_facets_number() {
|
||||||
@ -419,7 +423,7 @@ mod tests {
|
|||||||
serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(),
|
serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let documents = batch_reader_from_documents(&documents);
|
let documents = documents_batch_reader_from_objects(documents);
|
||||||
|
|
||||||
index.add_documents(documents).unwrap();
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
@ -0,0 +1,46 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/word_prefix_pair_proximity_docids.rs
|
||||||
|
---
|
||||||
|
5 a 1 [101, ]
|
||||||
|
5 a 2 [101, ]
|
||||||
|
5 b 4 [101, ]
|
||||||
|
5 be 4 [101, ]
|
||||||
|
am a 3 [101, ]
|
||||||
|
amazing a 1 [100, ]
|
||||||
|
amazing a 2 [100, ]
|
||||||
|
amazing a 3 [100, ]
|
||||||
|
amazing b 2 [100, ]
|
||||||
|
amazing be 2 [100, ]
|
||||||
|
an a 1 [100, ]
|
||||||
|
an a 2 [100, ]
|
||||||
|
an b 3 [100, ]
|
||||||
|
an be 3 [100, ]
|
||||||
|
and a 2 [100, ]
|
||||||
|
and a 3 [100, ]
|
||||||
|
and a 4 [100, ]
|
||||||
|
and b 1 [100, ]
|
||||||
|
and be 1 [100, ]
|
||||||
|
at a 1 [100, ]
|
||||||
|
at a 2 [100, 101, ]
|
||||||
|
at a 3 [100, ]
|
||||||
|
at b 3 [101, ]
|
||||||
|
at b 4 [100, ]
|
||||||
|
at be 3 [101, ]
|
||||||
|
at be 4 [100, ]
|
||||||
|
beautiful a 2 [100, ]
|
||||||
|
beautiful a 3 [100, ]
|
||||||
|
beautiful a 4 [100, ]
|
||||||
|
bell a 2 [101, ]
|
||||||
|
bell a 4 [101, ]
|
||||||
|
house a 3 [100, ]
|
||||||
|
house a 4 [100, ]
|
||||||
|
house b 2 [100, ]
|
||||||
|
house be 2 [100, ]
|
||||||
|
rings a 1 [101, ]
|
||||||
|
rings a 3 [101, ]
|
||||||
|
rings b 2 [101, ]
|
||||||
|
rings be 2 [101, ]
|
||||||
|
the a 3 [101, ]
|
||||||
|
the b 1 [101, ]
|
||||||
|
the be 1 [101, ]
|
||||||
|
|
@ -0,0 +1,56 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/word_prefix_pair_proximity_docids.rs
|
||||||
|
---
|
||||||
|
5 a 1 [101, ]
|
||||||
|
5 a 2 [101, ]
|
||||||
|
5 am 1 [101, ]
|
||||||
|
5 b 4 [101, ]
|
||||||
|
5 be 4 [101, ]
|
||||||
|
am a 3 [101, ]
|
||||||
|
amazing a 1 [100, ]
|
||||||
|
amazing a 2 [100, ]
|
||||||
|
amazing a 3 [100, ]
|
||||||
|
amazing b 2 [100, ]
|
||||||
|
amazing be 2 [100, ]
|
||||||
|
an a 1 [100, ]
|
||||||
|
an a 2 [100, 202, ]
|
||||||
|
an am 1 [100, ]
|
||||||
|
an b 3 [100, ]
|
||||||
|
an be 3 [100, ]
|
||||||
|
and a 2 [100, ]
|
||||||
|
and a 3 [100, ]
|
||||||
|
and a 4 [100, ]
|
||||||
|
and am 2 [100, ]
|
||||||
|
and b 1 [100, ]
|
||||||
|
and be 1 [100, ]
|
||||||
|
at a 1 [100, 202, ]
|
||||||
|
at a 2 [100, 101, ]
|
||||||
|
at a 3 [100, ]
|
||||||
|
at am 2 [100, 101, ]
|
||||||
|
at b 3 [101, ]
|
||||||
|
at b 4 [100, ]
|
||||||
|
at be 3 [101, ]
|
||||||
|
at be 4 [100, ]
|
||||||
|
beautiful a 2 [100, ]
|
||||||
|
beautiful a 3 [100, ]
|
||||||
|
beautiful a 4 [100, ]
|
||||||
|
beautiful am 3 [100, ]
|
||||||
|
bell a 2 [101, ]
|
||||||
|
bell a 4 [101, ]
|
||||||
|
bell am 4 [101, ]
|
||||||
|
extraordinary a 2 [202, ]
|
||||||
|
extraordinary a 3 [202, ]
|
||||||
|
house a 3 [100, 202, ]
|
||||||
|
house a 4 [100, 202, ]
|
||||||
|
house am 4 [100, ]
|
||||||
|
house b 2 [100, ]
|
||||||
|
house be 2 [100, ]
|
||||||
|
rings a 1 [101, ]
|
||||||
|
rings a 3 [101, ]
|
||||||
|
rings am 3 [101, ]
|
||||||
|
rings b 2 [101, ]
|
||||||
|
rings be 2 [101, ]
|
||||||
|
the a 3 [101, ]
|
||||||
|
the b 1 [101, ]
|
||||||
|
the be 1 [101, ]
|
||||||
|
|
@ -244,3 +244,90 @@ fn insert_current_prefix_data_in_sorter<'a>(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
db_snap,
|
||||||
|
documents::{DocumentsBatchBuilder, DocumentsBatchReader},
|
||||||
|
index::tests::TempIndex,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
for prefix in prefixes {
|
||||||
|
for i in 0..50 {
|
||||||
|
documents.push(
|
||||||
|
serde_json::json!({
|
||||||
|
"text": format!("{prefix}{i:x}"),
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
documents
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_update() {
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.words_prefix_threshold = Some(50);
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let batch_reader_from_documents = |documents| {
|
||||||
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
for object in documents {
|
||||||
|
builder.append_json_object(&object).unwrap();
|
||||||
|
}
|
||||||
|
DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"]);
|
||||||
|
// now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
|
||||||
|
documents.push(
|
||||||
|
serde_json::json!({
|
||||||
|
"text": "At an amazing and beautiful house"
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
documents.push(
|
||||||
|
serde_json::json!({
|
||||||
|
"text": "The bell rings at 5 am"
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let documents = batch_reader_from_documents(documents);
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, word_prefix_pair_proximity_docids, "initial");
|
||||||
|
|
||||||
|
let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"]);
|
||||||
|
documents.push(
|
||||||
|
serde_json::json!({
|
||||||
|
"text": "At an extraordinary house"
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
let documents = batch_reader_from_documents(documents);
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, word_prefix_pair_proximity_docids, "update");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user