Merge pull request #125 from Kerollmops/limit-memory-usage

Limit memory usage
This commit is contained in:
Clément Renault 2019-03-05 16:17:56 +01:00 committed by GitHub
commit 915f2e70a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 25 additions and 23 deletions

View File

@ -126,7 +126,7 @@ fn crop_text(
(m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
})
.map(|match_| {
Match { char_index: match_.char_index - start as u32, ..match_ }
Match { char_index: match_.char_index - start as u16, ..match_ }
})
.collect();

View File

@ -266,6 +266,8 @@ impl DatabaseIndex {
fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
let batch = update.build()?;
self.db.write(batch)?;
self.db.compact_range(None, None);
self.db.flush(true)?;
let snapshot = Snapshot::new(self.db.clone());
let view = Arc::new(DatabaseView::new(snapshot)?);

View File

@ -56,7 +56,7 @@ where B: TokenizerBuilder
// FIXME must u32::try_from instead
let attribute = self.attribute.0;
let word_index = word_index as u32;
let word_index = word_index as u16;
// insert the exact representation
let word_lower = word.to_lowercase();
@ -69,7 +69,7 @@ where B: TokenizerBuilder
let word_unidecoded = unidecode::unidecode(word).to_lowercase();
let word_unidecoded = word_unidecoded.trim();
if word_lower != word_unidecoded {
let char_index = char_index as u32;
let char_index = char_index as u16;
let char_length = length;
let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
@ -77,7 +77,7 @@ where B: TokenizerBuilder
}
}
let char_index = char_index as u32;
let char_index = char_index as u16;
let char_length = length;
let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };

View File

@ -50,14 +50,14 @@ pub struct DocIndex {
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
pub word_index: u32,
pub word_index: u16,
/// The position in bytes where the word was found
/// along with the length of it.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u32,
pub char_index: u16,
pub char_length: u16,
}
@ -84,7 +84,7 @@ pub struct Match {
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
pub word_index: u32,
pub word_index: u16,
/// Whether the word that match is an exact match or a prefix.
pub is_exact: bool,
@ -94,7 +94,7 @@ pub struct Match {
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u32,
pub char_index: u16,
pub char_length: u16,
}
@ -116,9 +116,9 @@ impl Match {
query_index: u32::max_value(),
distance: u8::max_value(),
attribute: u16::max_value(),
word_index: u32::max_value(),
word_index: u16::max_value(),
is_exact: true,
char_index: u32::max_value(),
char_index: u16::max_value(),
char_length: u16::max_value(),
}
}
@ -131,6 +131,6 @@ mod tests {
#[test]
fn docindex_mem_size() {
assert_eq!(mem::size_of::<DocIndex>(), 24);
assert_eq!(mem::size_of::<DocIndex>(), 16);
}
}

View File

@ -6,7 +6,7 @@ use crate::rank::criterion::Criterion;
use crate::rank::RawDocument;
#[inline]
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u32]) -> usize {
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
let mut sum_word_index = 0;
let mut index = 0;

View File

@ -5,14 +5,14 @@ use slice_group_by::GroupBy;
use crate::rank::criterion::Criterion;
use crate::rank::RawDocument;
const MAX_DISTANCE: u32 = 8;
const MAX_DISTANCE: u16 = 8;
#[inline]
fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
(a.clone(), b.clone())
}
fn index_proximity(lhs: u32, rhs: u32) -> u32 {
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
if lhs < rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)
} else {
@ -20,13 +20,13 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 {
}
}
fn attribute_proximity((lattr, lwi): (u16, u32), (rattr, rwi): (u16, u32)) -> u32 {
fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
if lattr != rattr { return MAX_DISTANCE }
index_proximity(lwi, rwi)
}
fn min_proximity((lattr, lwi): (&[u16], &[u32]), (rattr, rwi): (&[u16], &[u32])) -> u32 {
let mut min_prox = u32::max_value();
fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
let mut min_prox = u16::max_value();
for a in lattr.iter().zip(lwi) {
for b in rattr.iter().zip(rwi) {
@ -43,8 +43,8 @@ fn matches_proximity(
query_index: &[u32],
distance: &[u8],
attribute: &[u16],
word_index: &[u32],
) -> u32
word_index: &[u16],
) -> u16
{
let mut query_index_groups = query_index.linear_group();
let mut proximity = 0;

View File

@ -79,7 +79,7 @@ impl RawDocument {
unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
}
pub fn word_index(&self) -> &[u32] {
pub fn word_index(&self) -> &[u16] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
@ -93,7 +93,7 @@ impl RawDocument {
unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
}
pub fn char_index(&self) -> &[u32] {
pub fn char_index(&self) -> &[u16] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
@ -150,9 +150,9 @@ struct Matches {
query_index: Vec<u32>,
distance: Vec<u8>,
attribute: Vec<u16>,
word_index: Vec<u32>,
word_index: Vec<u16>,
is_exact: Vec<bool>,
char_index: Vec<u32>,
char_index: Vec<u16>,
char_length: Vec<u16>,
}