From b5df87a403f3337511d89778607a73295b6efd22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 10 Dec 2018 20:14:16 +0100 Subject: [PATCH] feat: Introduce the Criteria type --- src/database/database_view.rs | 3 +- src/rank/criterion/mod.rs | 124 +++++++++------------------------- src/rank/query_builder.rs | 32 ++++----- 3 files changed, 48 insertions(+), 111 deletions(-) diff --git a/src/database/database_view.rs b/src/database/database_view.rs index 3fc79091f..f43d65439 100644 --- a/src/database/database_view.rs +++ b/src/database/database_view.rs @@ -11,7 +11,6 @@ use crate::database::{DocumentKey, DocumentKeyAttr}; use crate::database::{retrieve_data_schema, retrieve_data_index}; use crate::database::blob::positive::PositiveBlob; use crate::database::deserializer::Deserializer; -use crate::rank::criterion::Criterion; use crate::database::schema::Schema; use crate::rank::QueryBuilder; use crate::DocumentId; @@ -72,7 +71,7 @@ where D: Deref Ok(()) } - pub fn query_builder(&self) -> Result>>, Box> { + pub fn query_builder(&self) -> Result, Box> { QueryBuilder::new(self) } diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs index 3d8598608..0252176c9 100644 --- a/src/rank/criterion/mod.rs +++ b/src/rank/criterion/mod.rs @@ -4,12 +4,12 @@ mod words_proximity; mod sum_of_words_attribute; mod sum_of_words_position; mod exact; +mod sort_by; +mod document_id; use std::cmp::Ordering; use std::ops::Deref; -use std::marker; -use serde::de::DeserializeOwned; use rocksdb::DB; use crate::database::DatabaseView; @@ -22,6 +22,8 @@ pub use self::{ sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_position::SumOfWordsPosition, exact::Exact, + sort_by::SortBy, + document_id::DocumentId, }; pub trait Criterion @@ -60,84 +62,6 @@ where D: Deref } } -#[derive(Debug, Clone, Copy)] -pub struct DocumentId; - -impl Criterion for DocumentId -where D: Deref -{ - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { - lhs.id.cmp(&rhs.id) - } -} - -/// An helper struct that permit to sort documents by -/// some of their stored attributes. -/// -/// # Note -/// -/// If a document cannot be deserialized it will be considered [`None`][]. -/// -/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`, -/// so you must check the [`Ord`] of `Option` implementation. -/// -/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None -/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord -/// -/// # Example -/// -/// ``` -/// use serde_derive::Deserialize; -/// use meilidb::rank::criterion::*; -/// -/// #[derive(Deserialize, PartialOrd, Ord, PartialEq, Eq)] -/// struct TimeOnly { -/// time: String, -/// } -/// -/// let builder = CriteriaBuilder::with_capacity(7) -/// .add(SumOfTypos) -/// .add(NumberOfWords) -/// .add(WordsProximity) -/// .add(SumOfWordsAttribute) -/// .add(SumOfWordsPosition) -/// .add(Exact) -/// .add(SortBy::::new()) -/// .add(DocumentId); -/// -/// let criterion = builder.build(); -/// -/// ``` -#[derive(Default)] -pub struct SortBy { - _phantom: marker::PhantomData, -} - -impl SortBy { - pub fn new() -> Self { - SortBy { _phantom: marker::PhantomData } - } -} - -impl Criterion for SortBy -where D: Deref, - T: DeserializeOwned + Ord, -{ - fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { - let lhs = match view.retrieve_document::(lhs.id) { - Ok(doc) => Some(doc), - Err(e) => { eprintln!("{}", e); None }, - }; - - let rhs = match view.retrieve_document::(rhs.id) { - Ok(doc) => Some(doc), - Err(e) => { eprintln!("{}", e); None }, - }; - - lhs.cmp(&rhs) - } -} - pub struct CriteriaBuilder where D: Deref { @@ -172,21 +96,37 @@ where D: Deref self.inner.push(Box::new(criterion)); } - pub fn build(self) -> Vec>> { - self.inner + pub fn build(self) -> Criteria { + Criteria { inner: self.inner } } } -pub fn default() -> Vec>> +pub struct Criteria where D: Deref { - CriteriaBuilder::with_capacity(7) - .add(SumOfTypos) - .add(NumberOfWords) - .add(WordsProximity) - .add(SumOfWordsAttribute) - .add(SumOfWordsPosition) - .add(Exact) - .add(DocumentId) - .build() + inner: Vec>>, +} + +impl Default for Criteria +where D: Deref +{ + fn default() -> Self { + CriteriaBuilder::with_capacity(7) + .add(SumOfTypos) + .add(NumberOfWords) + .add(WordsProximity) + .add(SumOfWordsAttribute) + .add(SumOfWordsPosition) + .add(Exact) + .add(DocumentId) + .build() + } +} + +impl AsRef<[Box>]> for Criteria +where D: Deref +{ + fn as_ref(&self) -> &[Box>] { + &self.inner + } } diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index fec11819b..dbabe6203 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -9,8 +9,8 @@ use fst::Streamer; use rocksdb::DB; use crate::automaton::{self, DfaExt, AutomatonExt}; -use crate::rank::criterion::{self, Criterion}; use crate::rank::distinct_map::DistinctMap; +use crate::rank::criterion::Criteria; use crate::database::DatabaseView; use crate::{Match, DocumentId}; use crate::rank::Document; @@ -28,34 +28,34 @@ fn split_whitespace_automatons(query: &str) -> Vec { automatons } -pub struct QueryBuilder<'a, D, C> +pub struct QueryBuilder<'a, D> where D: Deref { view: &'a DatabaseView, - criteria: Vec, + criteria: Criteria, } -impl<'a, D> QueryBuilder<'a, D, Box>> +impl<'a, D> QueryBuilder<'a, D> where D: Deref { pub fn new(view: &'a DatabaseView) -> Result> { - QueryBuilder::with_criteria(view, criterion::default()) + QueryBuilder::with_criteria(view, Criteria::default()) } } -impl<'a, D, C> QueryBuilder<'a, D, C> +impl<'a, D> QueryBuilder<'a, D> where D: Deref { - pub fn with_criteria(view: &'a DatabaseView, criteria: Vec) -> Result> { + pub fn with_criteria(view: &'a DatabaseView, criteria: Criteria) -> Result> { Ok(QueryBuilder { view, criteria }) } - pub fn criteria(&mut self, criteria: Vec) -> &mut Self { + pub fn criteria(&mut self, criteria: Criteria) -> &mut Self { self.criteria = criteria; self } - pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, F, C> { + pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, F> { DistinctQueryBuilder { inner: self, function: function, @@ -103,16 +103,15 @@ where D: Deref } } -impl<'a, D, C> QueryBuilder<'a, D, C> +impl<'a, D> QueryBuilder<'a, D> where D: Deref, - C: Criterion { pub fn query(&self, query: &str, limit: usize) -> Vec { let mut documents = self.query_all(query); let mut groups = vec![documents.as_mut_slice()]; let view = &self.view; - for criterion in &self.criteria { + for criterion in self.criteria.as_ref() { let tmp_groups = mem::replace(&mut groups, Vec::new()); let mut computed = 0; @@ -131,26 +130,25 @@ where D: Deref, } } -pub struct DistinctQueryBuilder<'a, D, F, C> +pub struct DistinctQueryBuilder<'a, D, F> where D: Deref { - inner: QueryBuilder<'a, D, C>, + inner: QueryBuilder<'a, D>, function: F, size: usize, } -impl<'a, D, F, K, C> DistinctQueryBuilder<'a, D, F, C> +impl<'a, D, F, K> DistinctQueryBuilder<'a, D, F> where D: Deref, F: Fn(DocumentId, &DatabaseView) -> Option, K: Hash + Eq, - C: Criterion, { pub fn query(&self, query: &str, range: Range) -> Vec { let mut documents = self.inner.query_all(query); let mut groups = vec![documents.as_mut_slice()]; let view = &self.inner.view; - for criterion in &self.inner.criteria { + for criterion in self.inner.criteria.as_ref() { let tmp_groups = mem::replace(&mut groups, Vec::new()); for group in tmp_groups {