From 8fd9dc231c05cf1ab277f0d3a3c0b36163a074e5 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 10 Feb 2021 17:08:37 +0100 Subject: [PATCH] implement retrieve all documents --- src/data/search.rs | 43 ++++++++++++++++++++++++++++++++++++++++-- src/routes/document.rs | 34 ++++++++++++++++++++++++++------- src/routes/index.rs | 1 - 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/data/search.rs b/src/data/search.rs index d0858d704..329efc3ea 100644 --- a/src/data/search.rs +++ b/src/data/search.rs @@ -1,8 +1,9 @@ use std::collections::HashSet; use std::mem; use std::time::Instant; +use std::ops::RangeBounds; -use anyhow::bail; +use anyhow::{bail, Context}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use milli::{Index, obkv_to_json, FacetCondition}; use serde::{Deserialize, Serialize}; @@ -70,7 +71,7 @@ impl SearchQuery { let highlighter = Highlighter::new(&stop_words); for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() { - let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap(); + let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv)?; if let Some(ref attributes_to_highlight) = self.attributes_to_highlight { highlighter.highlight_record(&mut object, &found_words, attributes_to_highlight); } @@ -165,4 +166,42 @@ impl Data { None => bail!("index {:?} doesn't exists", index.as_ref()), } } + + pub fn retrieve_documents( + &self, + index: impl AsRef, + offset: usize, + count: usize, + attributes_to_retrieve: Option<&[&str]>, + ) -> anyhow::Result>> { + let index = self.index_controller + .index(&index)? + .with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?; + let txn = index.read_txn()?; + + let mut documents = Vec::new(); + + let fields_ids_map = index.fields_ids_map(&txn)?; + + let attributes_to_retrieve_ids = match attributes_to_retrieve { + Some(attrs) => attrs + .as_ref() + .iter() + .filter_map(|f| fields_ids_map.id(f)) + .collect::>(), + None => fields_ids_map.iter().map(|(id, _)| id).collect(), + }; + + let iter = index.documents.range(&txn, &(..))? + .skip(offset) + .take(count); + + for entry in iter { + let (_id, obkv) = entry?; + let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?; + documents.push(object); + } + + Ok(documents) + } } diff --git a/src/routes/document.rs b/src/routes/document.rs index dcc669f85..1114ddcb3 100644 --- a/src/routes/document.rs +++ b/src/routes/document.rs @@ -12,6 +12,9 @@ use crate::error::ResponseError; use crate::helpers::Authentication; use crate::routes::IndexParam; +const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0; +const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20; + macro_rules! guard_content_type { ($fn_name:ident, $guard_value:literal) => { fn $fn_name(head: &actix_web::dev::RequestHead) -> bool { @@ -69,18 +72,35 @@ async fn delete_document( #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] struct BrowseQuery { - _offset: Option, - _limit: Option, - _attributes_to_retrieve: Option, + offset: Option, + limit: Option, + attributes_to_retrieve: Option, } #[get("/indexes/{index_uid}/documents", wrap = "Authentication::Public")] async fn get_all_documents( - _data: web::Data, - _path: web::Path, - _params: web::Query, + data: web::Data, + path: web::Path, + params: web::Query, ) -> Result { - todo!() + let attributes_to_retrieve = params + .attributes_to_retrieve + .as_ref() + .map(|attrs| attrs + .split(",") + .collect::>()); + + match data.retrieve_documents( + &path.index_uid, + params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET), + params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT), + attributes_to_retrieve.as_deref()) { + Ok(docs) => { + let json = serde_json::to_string(&docs).unwrap(); + Ok(HttpResponse::Ok().body(json)) + } + Err(_) => { todo!() } + } } #[derive(Deserialize)] diff --git a/src/routes/index.rs b/src/routes/index.rs index d682376e3..a77f26e1f 100644 --- a/src/routes/index.rs +++ b/src/routes/index.rs @@ -32,7 +32,6 @@ async fn list_indexes(data: web::Data) -> Result