MeiliSearch/src/rank/query_builder.rs

use std::{cmp, mem, vec, str, char};
use std::ops::{Deref, Range};
use std::error::Error;
use std::hash::Hash;
use std::rc::Rc;

use group_by::GroupByMut;
use hashbrown::HashMap;
use fst::Streamer;
use rocksdb::DB;

use crate::automaton::{self, DfaExt, AutomatonExt};
use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
use crate::rank::criterion::Criteria;
use crate::database::DatabaseView;
use crate::{Match, DocumentId};
use crate::rank::Document;

fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
    let mut automatons = Vec::new();
    let mut words = query.split_whitespace().map(str::to_lowercase).peekable();

    while let Some(word) = words.next() {
        let has_following_word = words.peek().is_some();
        let lev = if has_following_word || has_end_whitespace {
            automaton::build_dfa(&word)
        } else {
            automaton::build_prefix_dfa(&word)
        };
        automatons.push(lev);
    }

    automatons
}

pub type FilterFunc<D> = fn(DocumentId, &DatabaseView<D>) -> bool;

pub struct QueryBuilder<'a, D, FI>
where D: Deref<Target=DB>
{
    view: &'a DatabaseView<D>,
    criteria: Criteria<D>,
    filter: Option<FI>,
}

impl<'a, D> QueryBuilder<'a, D, FilterFunc<D>>
where D: Deref<Target=DB>
{
    pub fn new(view: &'a DatabaseView<D>) -> Result<Self, Box<Error>> {
        QueryBuilder::with_criteria(view, Criteria::default())
    }
}

impl<'a, D, FI> QueryBuilder<'a, D, FI>
where D: Deref<Target=DB>,
{
    pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria<D>) -> Result<Self, Box<Error>> {
        Ok(QueryBuilder { view, criteria, filter: None })
    }

    pub fn with_filter<F>(self, function: F) -> QueryBuilder<'a, D, F>
    where F: Fn(DocumentId, &DatabaseView<D>) -> bool,
    {
        QueryBuilder {
            view: self.view,
            criteria: self.criteria,
            filter: Some(function)
        }
    }

    pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, FI, F>
    where F: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
          K: Hash + Eq,
    {
        DistinctQueryBuilder {
            inner: self,
            function: function,
            size: size
        }
    }

    fn query_all(&self, query: &str) -> Vec<Document> {
        let automatons = split_whitespace_automatons(query);

        let mut stream = {
            let mut op_builder = fst::map::OpBuilder::new();
            for automaton in &automatons {
                let stream = self.view.blob().as_map().search(automaton);
                op_builder.push(stream);
            }
            op_builder.union()
        };

        let mut matches = HashMap::new();

        while let Some((input, indexed_values)) = stream.next() {
            for iv in indexed_values {
                let automaton = &automatons[iv.index];
                let distance = automaton.eval(input).to_u8();
                let is_exact = distance == 0 && input.len() == automaton.query_len();

                let doc_indexes = self.view.blob().as_indexes();
                let doc_indexes = &doc_indexes[iv.value as usize];

                for doc_index in doc_indexes {
                    let match_ = Match {
                        query_index: iv.index as u32,
                        distance: distance,
                        attribute: doc_index.attribute,
                        is_exact: is_exact,
                        word_area: doc_index.word_area,
                    };
                    matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);
                }
            }
        }

        matches.into_iter().map(|(id, matches)| Document::from_matches(id, matches)).collect()
    }
}

impl<'a, D, FI> QueryBuilder<'a, D, FI>
where D: Deref<Target=DB>,
      FI: Fn(DocumentId, &DatabaseView<D>) -> bool,
{
    pub fn query(&self, query: &str, range: Range<usize>) -> Vec<Document> {
        let mut documents = self.query_all(query);
        let mut groups = vec![documents.as_mut_slice()];
        let view = &self.view;

        'criteria: for criterion in self.criteria.as_ref() {
            let tmp_groups = mem::replace(&mut groups, Vec::new());
            let mut documents_seen = 0;

            for group in tmp_groups {
                // if this group does not overlap with the requested range,
                // push it without sorting and splitting it
                if documents_seen + group.len() < range.start {
                    documents_seen += group.len();
                    groups.push(group);
                    continue;
                }

                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));

                for group in GroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
                    documents_seen += group.len();
                    groups.push(group);

                    // we have sort enough documents if the last document sorted is after
                    // the end of the requested range, we can continue to the next criterion
                    if documents_seen >= range.end { continue 'criteria }
                }
            }
        }

        // `drain` removes the documents efficiently using `ptr::copy`
        // TODO it could be more efficient to have a custom iterator
        let offset = cmp::min(documents.len(), range.start);
        documents.drain(0..offset);
        documents.truncate(range.len());
        documents
    }
}

pub struct DistinctQueryBuilder<'a, D, FI, FD>
where D: Deref<Target=DB>
{
    inner: QueryBuilder<'a, D, FI>,
    function: FD,
    size: usize,
}

impl<'a, D, FI, FD> DistinctQueryBuilder<'a, D, FI, FD>
where D: Deref<Target=DB>,
{
    pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'a, D, F, FD>
    where F: Fn(DocumentId, &DatabaseView<D>) -> bool,
    {
        DistinctQueryBuilder {
            inner: self.inner.with_filter(function),
            function: self.function,
            size: self.size
        }
    }
}

impl<'a, D, FI, FD, K> DistinctQueryBuilder<'a, D, FI, FD>
where D: Deref<Target=DB>,
      FI: Fn(DocumentId, &DatabaseView<D>) -> bool,
      FD: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
      K: Hash + Eq,
{
    pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
        let mut documents = self.inner.query_all(query);
        let mut groups = vec![documents.as_mut_slice()];
        let mut key_cache = HashMap::new();
        let view = &self.inner.view;

        // these two variables informs on the current distinct map and
        // on the raw offset of the start of the group where the
        // range.start bound is located according to the distinct function
        let mut distinct_map = DistinctMap::new(self.size);
        let mut distinct_raw_offset = 0;

        'criteria: for criterion in self.inner.criteria.as_ref() {
            let tmp_groups = mem::replace(&mut groups, Vec::new());
            let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
            let mut documents_seen = 0;

            for group in tmp_groups {
                // if this group does not overlap with the requested range,
                // push it without sorting and splitting it
                if documents_seen + group.len() < distinct_raw_offset {
                    documents_seen += group.len();
                    groups.push(group);
                    continue;
                }

                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));

                for group in GroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
                    // we must compute the real distinguished len of this sub-group
                    for document in group.iter() {
                        let entry = key_cache.entry(document.id);
                        let key = entry.or_insert_with(|| (self.function)(document.id, view).map(Rc::new));

                        match key.clone() {
                            Some(key) => buf_distinct.register(key),
                            None      => buf_distinct.register_without_key(),
                        };

                        // the requested range end is reached: stop computing distinct
                        if buf_distinct.len() >= range.end { break }
                    }

                    documents_seen += group.len();
                    groups.push(group);

                    // if this sub-group does not overlap with the requested range
                    // we must update the distinct map and its start index
                    if buf_distinct.len() < range.start {
                        buf_distinct.transfert_to_internal();
                        distinct_raw_offset = documents_seen;
                    }

                    // we have sort enough documents if the last document sorted is after
                    // the end of the requested range, we can continue to the next criterion
                    if buf_distinct.len() >= range.end { continue 'criteria }
                }
            }
        }

        let mut out_documents = Vec::with_capacity(range.len());
        let mut seen = BufferedDistinctMap::new(&mut distinct_map);

        for document in documents.into_iter().skip(distinct_raw_offset) {
            let key = key_cache.remove(&document.id).expect("BUG: cached key not found");

            let accepted = match key {
                Some(key) => seen.register(key),
                None      => seen.register_without_key(),
            };

            if accepted && seen.len() > range.start {
                out_documents.push(document);
                if out_documents.len() == range.len() { break }
            }
        }

        out_documents
    }
}
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			`use std::{cmp, mem, vec, str, char};`
			`use std::ops::{Deref, Range};`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`use std::error::Error;`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`use std::hash::Hash;`
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			`use std::rc::Rc;`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00
feat: Replace the fnv hashmap by the hashbrown one 2018-12-02 13:11:02 +01:00			`use group_by::GroupByMut;`
			`use hashbrown::HashMap;`
			`use fst::Streamer;`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`use rocksdb::DB;`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`use crate::automaton::{self, DfaExt, AutomatonExt};`
feat: Introduce the BufferedDistinctMap struct 2018-12-16 14:21:41 +01:00			`use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};`
feat: Introduce the Criteria type 2018-12-10 20:14:16 +01:00			`use crate::rank::criterion::Criteria;`
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`use crate::database::DatabaseView;`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`use crate::{Match, DocumentId};`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`use crate::rank::Document;`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {`
feat: Consider the last query word be a prefix if the last word is not followed by a space. 2018-12-11 14:49:45 +01:00			`let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`let mut automatons = Vec::new();`
feat: Implement a better automaton builder This new implementation only allows the last word of a query string to be a prefix. 2018-12-10 12:16:24 +01:00			`let mut words = query.split_whitespace().map(str::to_lowercase).peekable();`
feat: Consider the last query word be a prefix if the last word is not followed by a space. 2018-12-11 14:49:45 +01:00
feat: Implement a better automaton builder This new implementation only allows the last word of a query string to be a prefix. 2018-12-10 12:16:24 +01:00			`while let Some(word) = words.next() {`
feat: Consider the last query word be a prefix if the last word is not followed by a space. 2018-12-11 14:49:45 +01:00			`let has_following_word = words.peek().is_some();`
			`let lev = if has_following_word \|\| has_end_whitespace {`
			`automaton::build_dfa(&word)`
			`} else {`
			`automaton::build_prefix_dfa(&word)`
feat: Implement a better automaton builder This new implementation only allows the last word of a query string to be a prefix. 2018-12-10 12:16:24 +01:00			`};`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`automatons.push(lev);`
			`}`
feat: Consider the last query word be a prefix if the last word is not followed by a space. 2018-12-11 14:49:45 +01:00
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`automatons`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`}`

feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`pub type FilterFunc<D> = fn(DocumentId, &DatabaseView<D>) -> bool;`

			`pub struct QueryBuilder<'a, D, FI>`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`where D: Deref<Target=DB>`
			`{`
			`view: &'a DatabaseView<D>,`
feat: Introduce the Criteria type 2018-12-10 20:14:16 +01:00			`criteria: Criteria<D>,`
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`filter: Option<FI>,`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00			`}`

feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`impl<'a, D> QueryBuilder<'a, D, FilterFunc<D>>`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`where D: Deref<Target=DB>`
			`{`
			`pub fn new(view: &'a DatabaseView<D>) -> Result<Self, Box<Error>> {`
feat: Introduce the Criteria type 2018-12-10 20:14:16 +01:00			`QueryBuilder::with_criteria(view, Criteria::default())`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`}`
			`}`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`impl<'a, D, FI> QueryBuilder<'a, D, FI>`
			`where D: Deref<Target=DB>,`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`{`
feat: Introduce the Criteria type 2018-12-10 20:14:16 +01:00			`pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria<D>) -> Result<Self, Box<Error>> {`
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`Ok(QueryBuilder { view, criteria, filter: None })`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`}`

feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`pub fn with_filter<F>(self, function: F) -> QueryBuilder<'a, D, F>`
			`where F: Fn(DocumentId, &DatabaseView<D>) -> bool,`
			`{`
			`QueryBuilder {`
			`view: self.view,`
			`criteria: self.criteria,`
			`filter: Some(function)`
			`}`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`}`

feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, FI, F>`
			`where F: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,`
			`K: Hash + Eq,`
			`{`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`DistinctQueryBuilder {`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`inner: self,`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`function: function,`
			`size: size`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`}`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00			`}`

feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`fn query_all(&self, query: &str) -> Vec<Document> {`
			`let automatons = split_whitespace_automatons(query);`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00
			`let mut stream = {`
			`let mut op_builder = fst::map::OpBuilder::new();`
			`for automaton in &automatons {`
fix: Update the DatabaseView to retrieve the index at creation remove this computation from the QueryBuilder 2018-12-07 16:20:12 +01:00			`let stream = self.view.blob().as_map().search(automaton);`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`op_builder.push(stream);`
			`}`
			`op_builder.union()`
			`};`

feat: Replace the fnv hashmap by the hashbrown one 2018-12-02 13:11:02 +01:00			`let mut matches = HashMap::new();`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`while let Some((input, indexed_values)) = stream.next() {`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`for iv in indexed_values {`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`let automaton = &automatons[iv.index];`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`let distance = automaton.eval(input).to_u8();`
			`let is_exact = distance == 0 && input.len() == automaton.query_len();`

fix: Update the DatabaseView to retrieve the index at creation remove this computation from the QueryBuilder 2018-12-07 16:20:12 +01:00			`let doc_indexes = self.view.blob().as_indexes();`
fix: Make the merge operator work 2018-12-01 18:37:21 +01:00			`let doc_indexes = &doc_indexes[iv.value as usize];`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`for doc_index in doc_indexes {`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`let match_ = Match {`
			`query_index: iv.index as u32,`
			`distance: distance,`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`attribute: doc_index.attribute,`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`is_exact: is_exact,`
feat: Introduce a WordArea struct Useful to highlight matching areas in the original text. 2018-12-23 16:46:49 +01:00			`word_area: doc_index.word_area,`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`};`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`}`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00			`}`
			`}`

fix: Remove stop-words from the serve examples 2018-10-21 16:42:19 +02:00			`matches.into_iter().map(\|(id, matches)\| Document::from_matches(id, matches)).collect()`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`}`
			`}`

feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`impl<'a, D, FI> QueryBuilder<'a, D, FI>`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`where D: Deref<Target=DB>,`
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`FI: Fn(DocumentId, &DatabaseView<D>) -> bool,`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`{`
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			`pub fn query(&self, query: &str, range: Range<usize>) -> Vec<Document> {`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`let mut documents = self.query_all(query);`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`let mut groups = vec![documents.as_mut_slice()];`
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`let view = &self.view;`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			`'criteria: for criterion in self.criteria.as_ref() {`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`let tmp_groups = mem::replace(&mut groups, Vec::new());`
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			`let mut documents_seen = 0;`

			`for group in tmp_groups {`
			`// if this group does not overlap with the requested range,`
			`// push it without sorting and splitting it`
			`if documents_seen + group.len() < range.start {`
			`documents_seen += group.len();`
			`groups.push(group);`
			`continue;`
			`}`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`group.sort_unstable_by(\|a, b\| criterion.evaluate(a, b, view));`
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`for group in GroupByMut::new(group, \|a, b\| criterion.eq(a, b, view)) {`
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			`documents_seen += group.len();`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`groups.push(group);`
feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00
			`// we have sort enough documents if the last document sorted is after`
			`// the end of the requested range, we can continue to the next criterion`
			`if documents_seen >= range.end { continue 'criteria }`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`}`
			`}`
			`}`

feat: Allow querying a range of documents 2018-12-16 14:21:06 +01:00			// `drain` removes the documents efficiently using `ptr::copy`
			`// TODO it could be more efficient to have a custom iterator`
			`let offset = cmp::min(documents.len(), range.start);`
			`documents.drain(0..offset);`
			`documents.truncate(range.len());`
fix: Improve the bucket sort algorithm 2018-12-07 11:53:17 +01:00			`documents`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`}`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`}`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`pub struct DistinctQueryBuilder<'a, D, FI, FD>`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`where D: Deref<Target=DB>`
			`{`
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`inner: QueryBuilder<'a, D, FI>,`
			`function: FD,`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`size: usize,`
			`}`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`impl<'a, D, FI, FD> DistinctQueryBuilder<'a, D, FI, FD>`
			`where D: Deref<Target=DB>,`
			`{`
			`pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'a, D, F, FD>`
			`where F: Fn(DocumentId, &DatabaseView<D>) -> bool,`
			`{`
			`DistinctQueryBuilder {`
			`inner: self.inner.with_filter(function),`
			`function: self.function,`
			`size: self.size`
			`}`
			`}`
			`}`

			`impl<'a, D, FI, FD, K> DistinctQueryBuilder<'a, D, FI, FD>`
feat: Make the DatabaseView become Sync + Send 2018-12-07 17:59:03 +01:00			`where D: Deref<Target=DB>,`
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`FI: Fn(DocumentId, &DatabaseView<D>) -> bool,`
			`FD: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`K: Hash + Eq,`
feat: Introduce the QueryBuilder struct 2018-11-27 19:11:33 +01:00			`{`
feat: Introduce filtering methods for Distinct/QueryBuilder 2018-12-29 20:16:29 +01:00			`pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`let mut documents = self.inner.query_all(query);`
			`let mut groups = vec![documents.as_mut_slice()];`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`let mut key_cache = HashMap::new();`
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`let view = &self.inner.view;`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`// these two variables informs on the current distinct map and`
			`// on the raw offset of the start of the group where the`
			`// range.start bound is located according to the distinct function`
			`let mut distinct_map = DistinctMap::new(self.size);`
			`let mut distinct_raw_offset = 0;`

			`'criteria: for criterion in self.inner.criteria.as_ref() {`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`let tmp_groups = mem::replace(&mut groups, Vec::new());`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);`
			`let mut documents_seen = 0;`

			`for group in tmp_groups {`
			`// if this group does not overlap with the requested range,`
			`// push it without sorting and splitting it`
			`if documents_seen + group.len() < distinct_raw_offset {`
			`documents_seen += group.len();`
			`groups.push(group);`
			`continue;`
			`}`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`group.sort_unstable_by(\|a, b\| criterion.evaluate(a, b, view));`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00
feat: Simplify the steps to query the database 2018-12-07 14:41:06 +01:00			`for group in GroupByMut::new(group, \|a, b\| criterion.eq(a, b, view)) {`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`// we must compute the real distinguished len of this sub-group`
feat: Rename registers methods of DistinctMap 2018-12-13 11:54:09 +01:00			`for document in group.iter() {`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`let entry = key_cache.entry(document.id);`
			`let key = entry.or_insert_with(\|\| (self.function)(document.id, view).map(Rc::new));`

			`match key.clone() {`
			`Some(key) => buf_distinct.register(key),`
			`None => buf_distinct.register_without_key(),`
feat: Rename registers methods of DistinctMap 2018-12-13 11:54:09 +01:00			`};`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00
			`// the requested range end is reached: stop computing distinct`
			`if buf_distinct.len() >= range.end { break }`
feat: Rename registers methods of DistinctMap 2018-12-13 11:54:09 +01:00			`}`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00
			`documents_seen += group.len();`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`groups.push(group);`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00
			`// if this sub-group does not overlap with the requested range`
			`// we must update the distinct map and its start index`
			`if buf_distinct.len() < range.start {`
			`buf_distinct.transfert_to_internal();`
			`distinct_raw_offset = documents_seen;`
			`}`

			`// we have sort enough documents if the last document sorted is after`
			`// the end of the requested range, we can continue to the next criterion`
			`if buf_distinct.len() >= range.end { continue 'criteria }`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`}`
			`}`
fix: Allow documents to bypass attribute distinction In situations where the attribute is not present. 2018-10-18 15:08:04 +02:00			`}`

feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`let mut out_documents = Vec::with_capacity(range.len());`
			`let mut seen = BufferedDistinctMap::new(&mut distinct_map);`

			`for document in documents.into_iter().skip(distinct_raw_offset) {`
			`let key = key_cache.remove(&document.id).expect("BUG: cached key not found");`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`let accepted = match key {`
feat: Rename registers methods of DistinctMap 2018-12-13 11:54:09 +01:00			`Some(key) => seen.register(key),`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`None => seen.register_without_key(),`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`};`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`if accepted && seen.len() > range.start {`
feat: Improve the query distinct performances 2018-12-13 11:54:47 +01:00			`out_documents.push(document);`
feat: Allow query distinct a range of documents 2018-12-16 14:22:04 +01:00			`if out_documents.len() == range.len() { break }`
feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`}`
feat: Introduce a way to distinct documents 2018-10-17 13:35:34 +02:00			`}`

feat: Make the OpBuilder work only for PositiveBlob 2018-11-28 17:12:24 +01:00			`out_documents`
feat: Introduce the Criterion trait 2018-10-11 14:04:41 +02:00			`}`
feat: Introduce the Criteria struct 2018-10-10 16:57:21 +02:00			`}`