mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
fix: Reduce the size of the DocIndex type
This commit is contained in:
parent
aef7d7825f
commit
a45cc4b618
@ -126,7 +126,7 @@ fn crop_text(
|
||||
(m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
|
||||
})
|
||||
.map(|match_| {
|
||||
Match { char_index: match_.char_index - start as u32, ..match_ }
|
||||
Match { char_index: match_.char_index - start as u16, ..match_ }
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
@ -56,7 +56,7 @@ where B: TokenizerBuilder
|
||||
|
||||
// FIXME must u32::try_from instead
|
||||
let attribute = self.attribute.0;
|
||||
let word_index = word_index as u32;
|
||||
let word_index = word_index as u16;
|
||||
|
||||
// insert the exact representation
|
||||
let word_lower = word.to_lowercase();
|
||||
@ -69,7 +69,7 @@ where B: TokenizerBuilder
|
||||
let word_unidecoded = unidecode::unidecode(word).to_lowercase();
|
||||
let word_unidecoded = word_unidecoded.trim();
|
||||
if word_lower != word_unidecoded {
|
||||
let char_index = char_index as u32;
|
||||
let char_index = char_index as u16;
|
||||
let char_length = length;
|
||||
|
||||
let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
|
||||
@ -77,7 +77,7 @@ where B: TokenizerBuilder
|
||||
}
|
||||
}
|
||||
|
||||
let char_index = char_index as u32;
|
||||
let char_index = char_index as u16;
|
||||
let char_length = length;
|
||||
|
||||
let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
|
||||
|
14
src/lib.rs
14
src/lib.rs
@ -50,14 +50,14 @@ pub struct DocIndex {
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
pub word_index: u32,
|
||||
pub word_index: u16,
|
||||
|
||||
/// The position in bytes where the word was found
|
||||
/// along with the length of it.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u32,
|
||||
pub char_index: u16,
|
||||
pub char_length: u16,
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ pub struct Match {
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
pub word_index: u32,
|
||||
pub word_index: u16,
|
||||
|
||||
/// Whether the word that match is an exact match or a prefix.
|
||||
pub is_exact: bool,
|
||||
@ -94,7 +94,7 @@ pub struct Match {
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u32,
|
||||
pub char_index: u16,
|
||||
pub char_length: u16,
|
||||
}
|
||||
|
||||
@ -116,9 +116,9 @@ impl Match {
|
||||
query_index: u32::max_value(),
|
||||
distance: u8::max_value(),
|
||||
attribute: u16::max_value(),
|
||||
word_index: u32::max_value(),
|
||||
word_index: u16::max_value(),
|
||||
is_exact: true,
|
||||
char_index: u32::max_value(),
|
||||
char_index: u16::max_value(),
|
||||
char_length: u16::max_value(),
|
||||
}
|
||||
}
|
||||
@ -131,6 +131,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn docindex_mem_size() {
|
||||
assert_eq!(mem::size_of::<DocIndex>(), 24);
|
||||
assert_eq!(mem::size_of::<DocIndex>(), 16);
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ use crate::rank::criterion::Criterion;
|
||||
use crate::rank::RawDocument;
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u32]) -> usize {
|
||||
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
|
||||
let mut sum_word_index = 0;
|
||||
let mut index = 0;
|
||||
|
||||
|
@ -5,14 +5,14 @@ use slice_group_by::GroupBy;
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::rank::RawDocument;
|
||||
|
||||
const MAX_DISTANCE: u32 = 8;
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
#[inline]
|
||||
fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
|
||||
(a.clone(), b.clone())
|
||||
}
|
||||
|
||||
fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
|
||||
if lhs < rhs {
|
||||
cmp::min(rhs - lhs, MAX_DISTANCE)
|
||||
} else {
|
||||
@ -20,13 +20,13 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_proximity((lattr, lwi): (u16, u32), (rattr, rwi): (u16, u32)) -> u32 {
|
||||
fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
|
||||
if lattr != rattr { return MAX_DISTANCE }
|
||||
index_proximity(lwi, rwi)
|
||||
}
|
||||
|
||||
fn min_proximity((lattr, lwi): (&[u16], &[u32]), (rattr, rwi): (&[u16], &[u32])) -> u32 {
|
||||
let mut min_prox = u32::max_value();
|
||||
fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
|
||||
let mut min_prox = u16::max_value();
|
||||
|
||||
for a in lattr.iter().zip(lwi) {
|
||||
for b in rattr.iter().zip(rwi) {
|
||||
@ -43,8 +43,8 @@ fn matches_proximity(
|
||||
query_index: &[u32],
|
||||
distance: &[u8],
|
||||
attribute: &[u16],
|
||||
word_index: &[u32],
|
||||
) -> u32
|
||||
word_index: &[u16],
|
||||
) -> u16
|
||||
{
|
||||
let mut query_index_groups = query_index.linear_group();
|
||||
let mut proximity = 0;
|
||||
|
@ -79,7 +79,7 @@ impl RawDocument {
|
||||
unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
|
||||
}
|
||||
|
||||
pub fn word_index(&self) -> &[u32] {
|
||||
pub fn word_index(&self) -> &[u16] {
|
||||
let r = self.matches.range;
|
||||
// it is safe because construction/modifications
|
||||
// can only be done in this module
|
||||
@ -93,7 +93,7 @@ impl RawDocument {
|
||||
unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
|
||||
}
|
||||
|
||||
pub fn char_index(&self) -> &[u32] {
|
||||
pub fn char_index(&self) -> &[u16] {
|
||||
let r = self.matches.range;
|
||||
// it is safe because construction/modifications
|
||||
// can only be done in this module
|
||||
@ -150,9 +150,9 @@ struct Matches {
|
||||
query_index: Vec<u32>,
|
||||
distance: Vec<u8>,
|
||||
attribute: Vec<u16>,
|
||||
word_index: Vec<u32>,
|
||||
word_index: Vec<u16>,
|
||||
is_exact: Vec<bool>,
|
||||
char_index: Vec<u32>,
|
||||
char_index: Vec<u16>,
|
||||
char_length: Vec<u16>,
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user