mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Add tests for checking that detected script and language associated with document(s) were stored during indexing
This commit is contained in:
parent
b216ddba63
commit
a27f329e3a
@ -4,6 +4,7 @@ use std::fs::File;
|
||||
use std::mem::size_of;
|
||||
use std::path::Path;
|
||||
|
||||
use charabia::{Language, Script};
|
||||
use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||
@ -1194,6 +1195,12 @@ impl Index {
|
||||
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||
}
|
||||
|
||||
/* script language docids */
|
||||
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
|
||||
pub fn script_language_documents_ids(&self, rtxn: &RoTxn, key: &(Script, Language)) -> heed::Result<Option<RoaringBitmap>> {
|
||||
self.script_language_docids.get(rtxn, key)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1907,4 +1907,39 @@ mod tests {
|
||||
|
||||
index.add_documents(doc1).unwrap();
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
#[test]
|
||||
fn store_detected_script_and_language_per_document_during_indexing() {
|
||||
use charabia::{Language, Script};
|
||||
let index = TempIndex::new();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
|
||||
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
|
||||
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
|
||||
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
|
||||
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let key_thai = (Script::Thai, Language::Other);
|
||||
let key_jpn = (Script::Cj, Language::Jpn);
|
||||
let key_cmn = (Script::Cj, Language::Cmn);
|
||||
let thai_docs = index.script_language_documents_ids(&rtxn, &key_thai).unwrap().unwrap();
|
||||
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
|
||||
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
|
||||
let mut expected_thai_docids = RoaringBitmap::new();
|
||||
expected_thai_docids.push(4);
|
||||
assert_eq!(thai_docs, expected_thai_docids);
|
||||
let mut expected_cj_jpn_docids = RoaringBitmap::new();
|
||||
expected_cj_jpn_docids.push(3);
|
||||
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
|
||||
let mut expected_cj_cmn_docids = RoaringBitmap::new();
|
||||
expected_cj_cmn_docids.push(1);
|
||||
expected_cj_cmn_docids.push(5);
|
||||
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user