From a2f0f953375ac4029aa210d67b21ac448f6ea1e7 Mon Sep 17 00:00:00 2001 From: qdequele Date: Mon, 2 Mar 2020 14:34:29 +0100 Subject: [PATCH] use distinct on search --- meilisearch-core/src/query_builder.rs | 2 +- meilisearch-core/src/store/mod.rs | 15 +++++++++++++++ meilisearch-core/src/update/settings_update.rs | 3 +-- meilisearch-http/src/helpers/meilisearch.rs | 17 +++++++++++++++++ .../tests/settings_ranking_rules.rs | 12 ++++++------ meilisearch-schema/src/schema.rs | 4 ++++ 6 files changed, 44 insertions(+), 9 deletions(-) diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index 5bde93665..6e20d2463 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -75,7 +75,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { self.timeout = Some(timeout) } - pub fn with_distinct(&mut self, function: F, size: usize) + pub fn with_distinct(&mut self, size: usize, function: F) where F: Fn(DocumentId) -> Option + 'd, { diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index 90c69cc5e..0b12f94dc 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -254,6 +254,21 @@ impl Index { } } + pub fn document_attribute_bytes<'txn>( + &self, + reader: &'txn heed::RoTxn, + document_id: DocumentId, + attribute: FieldId, + ) -> MResult> { + let bytes = self + .documents_fields + .document_attribute(reader, document_id, attribute)?; + match bytes { + Some(bytes) => Ok(Some(bytes)), + None => Ok(None), + } + } + pub fn customs_update(&self, writer: &mut heed::RwTxn, customs: Vec) -> ZResult { let _ = self.updates_notifier.send(UpdateEvent::NewUpdate); update::push_customs_update(writer, self.updates, self.updates_results, customs) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index d0f10eb80..4fb6a8f47 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -50,9 +50,8 @@ pub fn apply_settings_update( must_reindex = true; }, UpdateState::Clear => { - let clear: Vec<&str> = Vec::new(); - schema.update_ranked(clear)?; index.main.delete_ranking_rules(writer)?; + schema.clear_ranked(); must_reindex = true; }, UpdateState::Nothing => (), diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index 6e15dcf0b..3d2b2f399 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -3,6 +3,7 @@ use std::collections::{HashMap, HashSet}; use std::convert::From; use std::error; use std::fmt; +use std::hash::{Hash, Hasher}; use std::time::{Duration, Instant}; use indexmap::IndexMap; @@ -13,6 +14,7 @@ use meilisearch_core::{Highlight, Index, MainT, RankedMap}; use meilisearch_schema::{FieldId, Schema}; use serde::{Deserialize, Serialize}; use serde_json::Value; +use siphasher::sip::SipHasher; #[derive(Debug)] pub enum Error { @@ -205,6 +207,21 @@ impl<'a> SearchBuilder<'a> { query_builder.with_fetch_timeout(self.timeout); + if let Some(field) = self.index.main.distinct_attribute(reader)? { + if let Some(field_id) = schema.id(&field) { + query_builder.with_distinct(1, move |id| { + match self.index.document_attribute_bytes(reader, id, field_id) { + Ok(Some(bytes)) => { + let mut s = SipHasher::new(); + bytes.hash(&mut s); + Some(s.finish()) + } + _ => None, + } + }); + } + } + let start = Instant::now(); let docs = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); diff --git a/meilisearch-http/tests/settings_ranking_rules.rs b/meilisearch-http/tests/settings_ranking_rules.rs index f221aacfb..786f72dd5 100644 --- a/meilisearch-http/tests/settings_ranking_rules.rs +++ b/meilisearch-http/tests/settings_ranking_rules.rs @@ -96,12 +96,12 @@ fn write_all_and_delete() { let res_value: Value = serde_json::from_slice(&buf).unwrap(); let json = json!([ - "typo", - "words", - "proximity", - "attribute", - "wordsPosition", - "exactness" + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" ]); assert_json_eq!(json, res_value, ordered: false); diff --git a/meilisearch-schema/src/schema.rs b/meilisearch-schema/src/schema.rs index 7a4180cb5..4283f203d 100644 --- a/meilisearch-schema/src/schema.rs +++ b/meilisearch-schema/src/schema.rs @@ -125,6 +125,10 @@ impl Schema { Ok((id, pos.into())) } + pub fn clear_ranked(&mut self) { + self.ranked.clear(); + } + pub fn remove_ranked(&mut self, name: &str) { if let Some(id) = self.fields_map.id(name) { self.ranked.remove(&id);