use distinct on search

This commit is contained in:
qdequele 2020-03-02 14:34:29 +01:00
parent 72450c765d
commit a2f0f95337
No known key found for this signature in database
GPG Key ID: B3F0A000EBF11745
6 changed files with 44 additions and 9 deletions

View File

@ -75,7 +75,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
self.timeout = Some(timeout) self.timeout = Some(timeout)
} }
pub fn with_distinct<F, K>(&mut self, function: F, size: usize) pub fn with_distinct<F>(&mut self, size: usize, function: F)
where where
F: Fn(DocumentId) -> Option<u64> + 'd, F: Fn(DocumentId) -> Option<u64> + 'd,
{ {

View File

@ -254,6 +254,21 @@ impl Index {
} }
} }
pub fn document_attribute_bytes<'txn>(
&self,
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: FieldId,
) -> MResult<Option<&'txn [u8]>> {
let bytes = self
.documents_fields
.document_attribute(reader, document_id, attribute)?;
match bytes {
Some(bytes) => Ok(Some(bytes)),
None => Ok(None),
}
}
pub fn customs_update(&self, writer: &mut heed::RwTxn<UpdateT>, customs: Vec<u8>) -> ZResult<u64> { pub fn customs_update(&self, writer: &mut heed::RwTxn<UpdateT>, customs: Vec<u8>) -> ZResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate); let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_customs_update(writer, self.updates, self.updates_results, customs) update::push_customs_update(writer, self.updates, self.updates_results, customs)

View File

@ -50,9 +50,8 @@ pub fn apply_settings_update(
must_reindex = true; must_reindex = true;
}, },
UpdateState::Clear => { UpdateState::Clear => {
let clear: Vec<&str> = Vec::new();
schema.update_ranked(clear)?;
index.main.delete_ranking_rules(writer)?; index.main.delete_ranking_rules(writer)?;
schema.clear_ranked();
must_reindex = true; must_reindex = true;
}, },
UpdateState::Nothing => (), UpdateState::Nothing => (),

View File

@ -3,6 +3,7 @@ use std::collections::{HashMap, HashSet};
use std::convert::From; use std::convert::From;
use std::error; use std::error;
use std::fmt; use std::fmt;
use std::hash::{Hash, Hasher};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use indexmap::IndexMap; use indexmap::IndexMap;
@ -13,6 +14,7 @@ use meilisearch_core::{Highlight, Index, MainT, RankedMap};
use meilisearch_schema::{FieldId, Schema}; use meilisearch_schema::{FieldId, Schema};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use siphasher::sip::SipHasher;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
@ -205,6 +207,21 @@ impl<'a> SearchBuilder<'a> {
query_builder.with_fetch_timeout(self.timeout); query_builder.with_fetch_timeout(self.timeout);
if let Some(field) = self.index.main.distinct_attribute(reader)? {
if let Some(field_id) = schema.id(&field) {
query_builder.with_distinct(1, move |id| {
match self.index.document_attribute_bytes(reader, id, field_id) {
Ok(Some(bytes)) => {
let mut s = SipHasher::new();
bytes.hash(&mut s);
Some(s.finish())
}
_ => None,
}
});
}
}
let start = Instant::now(); let start = Instant::now();
let docs = let docs =
query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));

View File

@ -96,12 +96,12 @@ fn write_all_and_delete() {
let res_value: Value = serde_json::from_slice(&buf).unwrap(); let res_value: Value = serde_json::from_slice(&buf).unwrap();
let json = json!([ let json = json!([
"typo", "typo",
"words", "words",
"proximity", "proximity",
"attribute", "attribute",
"wordsPosition", "wordsPosition",
"exactness" "exactness"
]); ]);
assert_json_eq!(json, res_value, ordered: false); assert_json_eq!(json, res_value, ordered: false);

View File

@ -125,6 +125,10 @@ impl Schema {
Ok((id, pos.into())) Ok((id, pos.into()))
} }
pub fn clear_ranked(&mut self) {
self.ranked.clear();
}
pub fn remove_ranked(&mut self, name: &str) { pub fn remove_ranked(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name) { if let Some(id) = self.fields_map.id(name) {
self.ranked.remove(&id); self.ranked.remove(&id);