Introduce a way to filter documents with a basic syntax

This commit is contained in:
Clément Renault 2019-10-09 14:20:37 +02:00
parent 663714bb6d
commit 683b6afbfb
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 50 additions and 10 deletions

View File

@ -53,6 +53,11 @@ struct SearchCommand {
#[structopt(short = "C", long, default_value = "35")] #[structopt(short = "C", long, default_value = "35")]
char_context: usize, char_context: usize,
/// A filter string that can be `!adult` or `adult` to
/// filter documents on this specfied field
#[structopt(short, long)]
filter: Option<String>,
/// Fields that must be displayed. /// Fields that must be displayed.
displayed_fields: Vec<String>, displayed_fields: Vec<String>,
} }
@ -269,8 +274,29 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
Ok(query) => { Ok(query) => {
let start_total = Instant::now(); let start_total = Instant::now();
let documents = match command.filter {
Some(ref filter) => {
let filter = filter.as_str();
let (positive, filter) = if filter.chars().next() == Some('!') {
(false, &filter[1..])
} else {
(true, filter)
};
let attr = schema.attribute(&filter).expect("Could not find filtered attribute");
let builder = index.query_builder(); let builder = index.query_builder();
let documents = builder.query(&reader, &query, 0..command.number_results)?; let builder = builder.with_filter(|document_id| {
let string: String = index.document_attribute(&reader, document_id, attr).unwrap().unwrap();
(string == "true") == positive
});
builder.query(&reader, &query, 0..command.number_results)?
},
None => {
let builder = index.query_builder();
builder.query(&reader, &query, 0..command.number_results)?
}
};
let mut retrieve_duration = Duration::default(); let mut retrieve_duration = Duration::default();

View File

@ -52,7 +52,7 @@ pub struct Deserializer<'a, R> {
pub reader: &'a R, pub reader: &'a R,
pub documents_fields: DocumentsFields, pub documents_fields: DocumentsFields,
pub schema: &'a Schema, pub schema: &'a Schema,
pub fields: Option<&'a HashSet<SchemaAttr>>, pub attributes: Option<&'a HashSet<SchemaAttr>>,
} }
impl<'de, 'a, 'b, R: 'a> de::Deserializer<'de> for &'b mut Deserializer<'a, R> impl<'de, 'a, 'b, R: 'a> de::Deserializer<'de> for &'b mut Deserializer<'a, R>
@ -86,7 +86,7 @@ where R: rkv::Readable,
}; };
let is_displayed = self.schema.props(attr).is_displayed(); let is_displayed = self.schema.props(attr).is_displayed();
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) { if is_displayed && self.attributes.map_or(true, |f| f.contains(&attr)) {
let attribute_name = self.schema.attribute_name(attr); let attribute_name = self.schema.attribute_name(attr);
Some((attribute_name, Value::new(value))) Some((attribute_name, Value::new(value)))
} else { } else {

View File

@ -74,7 +74,7 @@ impl DocumentsFields {
Ok(count) Ok(count)
} }
pub fn document_field<'a>( pub fn document_attribute<'a>(
&self, &self,
reader: &'a impl rkv::Readable, reader: &'a impl rkv::Readable,
document_id: DocumentId, document_id: DocumentId,

View File

@ -15,7 +15,7 @@ pub use self::updates::Updates;
pub use self::updates_results::UpdatesResults; pub use self::updates_results::UpdatesResults;
use std::collections::HashSet; use std::collections::HashSet;
use meilidb_schema::Schema; use meilidb_schema::{Schema, SchemaAttr};
use serde::de; use serde::de;
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error}; use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
use crate::serde::Deserializer; use crate::serde::Deserializer;
@ -69,15 +69,15 @@ impl Index {
pub fn document<T: de::DeserializeOwned, R: rkv::Readable>( pub fn document<T: de::DeserializeOwned, R: rkv::Readable>(
&self, &self,
reader: &R, reader: &R,
fields: Option<&HashSet<&str>>, attributes: Option<&HashSet<&str>>,
document_id: DocumentId, document_id: DocumentId,
) -> MResult<Option<T>> ) -> MResult<Option<T>>
{ {
let schema = self.main.schema(reader)?; let schema = self.main.schema(reader)?;
let schema = schema.ok_or(Error::SchemaMissing)?; let schema = schema.ok_or(Error::SchemaMissing)?;
let fields = match fields { let attributes = match attributes {
Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(), Some(attributes) => attributes.into_iter().map(|name| schema.attribute(name)).collect(),
None => None, None => None,
}; };
@ -86,7 +86,7 @@ impl Index {
reader, reader,
documents_fields: self.documents_fields, documents_fields: self.documents_fields,
schema: &schema, schema: &schema,
fields: fields.as_ref(), attributes: attributes.as_ref(),
}; };
// TODO: currently we return an error if all document fields are missing, // TODO: currently we return an error if all document fields are missing,
@ -94,6 +94,20 @@ impl Index {
Ok(T::deserialize(&mut deserializer).map(Some)?) Ok(T::deserialize(&mut deserializer).map(Some)?)
} }
pub fn document_attribute<T: de::DeserializeOwned, R: rkv::Readable>(
&self,
reader: &R,
document_id: DocumentId,
attribute: SchemaAttr,
) -> MResult<Option<T>>
{
let bytes = self.documents_fields.document_attribute(reader, document_id, attribute)?;
match bytes {
Some(bytes) => Ok(Some(rmp_serde::from_read_ref(bytes)?)),
None => Ok(None),
}
}
pub fn schema_update(&self, mut writer: rkv::Writer, schema: Schema) -> MResult<()> { pub fn schema_update(&self, mut writer: rkv::Writer, schema: Schema) -> MResult<()> {
update::push_schema_update(&mut writer, self.updates, self.updates_results, schema)?; update::push_schema_update(&mut writer, self.updates, self.updates_results, schema)?;
writer.commit()?; writer.commit()?;