mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
implement retrieve documents
This commit is contained in:
parent
17b84691f2
commit
f3d65ec5e9
@ -12,59 +12,22 @@ impl Data {
|
||||
self.index_controller.search(index.as_ref().to_string(), search_query).await
|
||||
}
|
||||
|
||||
pub async fn retrieve_documents<S>(
|
||||
pub async fn retrieve_documents(
|
||||
&self,
|
||||
_index: String,
|
||||
_offset: usize,
|
||||
_limit: usize,
|
||||
_attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> anyhow::Result<Vec<Map<String, Value>>>
|
||||
where
|
||||
S: AsRef<str> + Send + Sync + 'static,
|
||||
{
|
||||
todo!()
|
||||
//let index_controller = self.index_controller.clone();
|
||||
//let documents: anyhow::Result<_> = tokio::task::spawn_blocking(move || {
|
||||
//let index = index_controller
|
||||
//.index(index.clone())?
|
||||
//.with_context(|| format!("Index {:?} doesn't exist", index))?;
|
||||
|
||||
//let txn = index.read_txn()?;
|
||||
|
||||
//let fields_ids_map = index.fields_ids_map(&txn)?;
|
||||
|
||||
//let attributes_to_retrieve_ids = match attributes_to_retrieve {
|
||||
//Some(attrs) => attrs
|
||||
//.iter()
|
||||
//.filter_map(|f| fields_ids_map.id(f.as_ref()))
|
||||
//.collect::<Vec<_>>(),
|
||||
//None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
//};
|
||||
|
||||
//let iter = index.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
||||
|
||||
//let mut documents = Vec::new();
|
||||
|
||||
//for entry in iter {
|
||||
//let (_id, obkv) = entry?;
|
||||
//let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?;
|
||||
//documents.push(object);
|
||||
//}
|
||||
|
||||
//Ok(documents)
|
||||
//})
|
||||
//.await?;
|
||||
//documents
|
||||
index: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> anyhow::Result<Vec<Map<String, Value>>> {
|
||||
self.index_controller.documents(index, offset, limit, attributes_to_retrieve).await
|
||||
}
|
||||
|
||||
pub async fn retrieve_document<S>(
|
||||
&self,
|
||||
_index: impl AsRef<str> + Sync + Send + 'static,
|
||||
_document_id: impl AsRef<str> + Sync + Send + 'static,
|
||||
_attributes_to_retrieve: Option<Vec<S>>,
|
||||
_attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> anyhow::Result<Map<String, Value>>
|
||||
where
|
||||
S: AsRef<str> + Sync + Send + 'static,
|
||||
{
|
||||
todo!()
|
||||
//let index_controller = self.index_controller.clone();
|
||||
|
@ -4,9 +4,14 @@ mod updates;
|
||||
use std::sync::Arc;
|
||||
use std::ops::Deref;
|
||||
|
||||
use serde_json::{Value, Map};
|
||||
use milli::obkv_to_json;
|
||||
|
||||
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
||||
pub use updates::{Settings, Facets, UpdateResult};
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index(pub Arc<milli::Index>);
|
||||
|
||||
@ -45,4 +50,38 @@ impl Index {
|
||||
criteria: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn retrieve_documents<S>(
|
||||
&self,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> anyhow::Result<Vec<Map<String, Value>>>
|
||||
where
|
||||
S: AsRef<str> + Send + Sync + 'static,
|
||||
{
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
|
||||
let attributes_to_retrieve_ids = match attributes_to_retrieve {
|
||||
Some(attrs) => attrs
|
||||
.iter()
|
||||
.filter_map(|f| fields_ids_map.id(f.as_ref()))
|
||||
.collect::<Vec<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for entry in iter {
|
||||
let (_id, obkv) = entry?;
|
||||
let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?;
|
||||
documents.push(object);
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::collections::{HashMap, hash_map::Entry};
|
||||
use std::fs::{File, create_dir_all};
|
||||
use std::path::{PathBuf, Path};
|
||||
use std::collections::{hash_map::Entry, HashMap};
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
@ -8,25 +8,51 @@ use chrono::Utc;
|
||||
use futures::stream::StreamExt;
|
||||
use heed::EnvOpenOptions;
|
||||
use log::info;
|
||||
use serde_json::{Map, Value};
|
||||
use thiserror::Error;
|
||||
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::update_handler::UpdateHandler;
|
||||
use crate::index_controller::{IndexMetadata, UpdateMeta, updates::{Processed, Failed, Processing}};
|
||||
use crate::index::UpdateResult as UResult;
|
||||
use crate::index::{Document, Index, SearchQuery, SearchResult, Settings};
|
||||
use crate::index_controller::{
|
||||
updates::{Failed, Processed, Processing},
|
||||
IndexMetadata, UpdateMeta,
|
||||
};
|
||||
use crate::option::IndexerOpts;
|
||||
use crate::index::{Index, SearchQuery, SearchResult, Settings};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
||||
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
|
||||
type UpdateResult = std::result::Result<Processed<UpdateMeta, UResult>, Failed<UpdateMeta, String>>;
|
||||
|
||||
enum IndexMsg {
|
||||
CreateIndex { uuid: Uuid, primary_key: Option<String>, ret: oneshot::Sender<Result<IndexMetadata>> },
|
||||
Update { meta: Processing<UpdateMeta>, data: std::fs::File, ret: oneshot::Sender<UpdateResult>},
|
||||
Search { uuid: Uuid, query: SearchQuery, ret: oneshot::Sender<anyhow::Result<SearchResult>> },
|
||||
Settings { uuid: Uuid, ret: oneshot::Sender<Result<Settings>> },
|
||||
CreateIndex {
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
ret: oneshot::Sender<Result<IndexMetadata>>,
|
||||
},
|
||||
Update {
|
||||
meta: Processing<UpdateMeta>,
|
||||
data: std::fs::File,
|
||||
ret: oneshot::Sender<UpdateResult>,
|
||||
},
|
||||
Search {
|
||||
uuid: Uuid,
|
||||
query: SearchQuery,
|
||||
ret: oneshot::Sender<anyhow::Result<SearchResult>>,
|
||||
},
|
||||
Settings {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<Settings>>,
|
||||
},
|
||||
Documents {
|
||||
uuid: Uuid,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
ret: oneshot::Sender<Result<Vec<Map<String, Value>>>>,
|
||||
},
|
||||
}
|
||||
|
||||
struct IndexActor<S> {
|
||||
@ -56,11 +82,20 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
let update_handler = UpdateHandler::new(&options).unwrap();
|
||||
let update_handler = Arc::new(update_handler);
|
||||
let inbox = Some(inbox);
|
||||
Self { inbox, store, update_handler }
|
||||
Self {
|
||||
inbox,
|
||||
store,
|
||||
update_handler,
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(mut self) {
|
||||
let mut inbox = self.inbox.take().expect("Index Actor must have a inbox at this point.");
|
||||
use IndexMsg::*;
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
.take()
|
||||
.expect("Index Actor must have a inbox at this point.");
|
||||
|
||||
let stream = stream! {
|
||||
loop {
|
||||
@ -73,31 +108,59 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
|
||||
let fut = stream.for_each_concurrent(Some(10), |msg| async {
|
||||
match msg {
|
||||
IndexMsg::CreateIndex { uuid, primary_key, ret } => self.handle_create_index(uuid, primary_key, ret).await,
|
||||
IndexMsg::Update { ret, meta, data } => self.handle_update(meta, data, ret).await,
|
||||
IndexMsg::Search { ret, query, uuid } => self.handle_search(uuid, query, ret).await,
|
||||
IndexMsg::Settings { ret, uuid } => self.handle_settings(uuid, ret).await,
|
||||
CreateIndex {
|
||||
uuid,
|
||||
primary_key,
|
||||
ret,
|
||||
} => self.handle_create_index(uuid, primary_key, ret).await,
|
||||
Update { ret, meta, data } => self.handle_update(meta, data, ret).await,
|
||||
Search { ret, query, uuid } => self.handle_search(uuid, query, ret).await,
|
||||
Settings { ret, uuid } => self.handle_settings(uuid, ret).await,
|
||||
Documents {
|
||||
ret,
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
offset,
|
||||
limit,
|
||||
} => {
|
||||
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve, ret)
|
||||
.await
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
fut.await;
|
||||
}
|
||||
|
||||
async fn handle_search(&self, uuid: Uuid, query: SearchQuery, ret: oneshot::Sender<anyhow::Result<SearchResult>>) {
|
||||
async fn handle_search(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
query: SearchQuery,
|
||||
ret: oneshot::Sender<anyhow::Result<SearchResult>>,
|
||||
) {
|
||||
let index = self.store.get(uuid).await.unwrap().unwrap();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = index.perform_search(query);
|
||||
ret.send(result)
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
async fn handle_create_index(&self, uuid: Uuid, primary_key: Option<String>, ret: oneshot::Sender<Result<IndexMetadata>>) {
|
||||
async fn handle_create_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
ret: oneshot::Sender<Result<IndexMetadata>>,
|
||||
) {
|
||||
let result = self.store.create_index(uuid, primary_key).await;
|
||||
let _ = ret.send(result);
|
||||
}
|
||||
|
||||
async fn handle_update(&self, meta: Processing<UpdateMeta>, data: File, ret: oneshot::Sender<UpdateResult>) {
|
||||
async fn handle_update(
|
||||
&self,
|
||||
meta: Processing<UpdateMeta>,
|
||||
data: File,
|
||||
ret: oneshot::Sender<UpdateResult>,
|
||||
) {
|
||||
info!("Processing update {}", meta.id());
|
||||
let uuid = meta.index_uuid().clone();
|
||||
let index = self.store.get_or_create(uuid).await.unwrap();
|
||||
@ -105,13 +168,30 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = update_handler.handle_update(meta, data, index);
|
||||
let _ = ret.send(result);
|
||||
}).await;
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_settings(&self, uuid: Uuid, ret: oneshot::Sender<Result<Settings>>) {
|
||||
let index = self.store.get(uuid).await.unwrap().unwrap();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = index.settings()
|
||||
let result = index.settings().map_err(|e| IndexError::Error(e));
|
||||
let _ = ret.send(result);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_fetch_documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
ret: oneshot::Sender<Result<Vec<Document>>>,
|
||||
) {
|
||||
let index = self.store.get(uuid).await.unwrap().unwrap();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = index.retrieve_documents(offset, limit, attributes_to_retrieve)
|
||||
.map_err(|e| IndexError::Error(e));
|
||||
let _ = ret.send(result);
|
||||
}).await;
|
||||
@ -133,9 +213,17 @@ impl IndexActorHandle {
|
||||
Self { sender }
|
||||
}
|
||||
|
||||
pub async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMetadata> {
|
||||
pub async fn create_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
) -> Result<IndexMetadata> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::CreateIndex { ret, uuid, primary_key };
|
||||
let msg = IndexMsg::CreateIndex {
|
||||
ret,
|
||||
uuid,
|
||||
primary_key,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
@ -160,6 +248,25 @@ impl IndexActorHandle {
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
pub async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Documents {
|
||||
uuid,
|
||||
ret,
|
||||
offset,
|
||||
attributes_to_retrieve,
|
||||
limit,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
}
|
||||
|
||||
struct MapIndexStore {
|
||||
@ -190,29 +297,31 @@ impl IndexStore for MapIndexStore {
|
||||
create_dir_all(&db_path).expect("can't create db");
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100_000);
|
||||
let index = milli::Index::new(options, &db_path)
|
||||
.map_err(|e| IndexError::Error(e))?;
|
||||
let index = milli::Index::new(options, &db_path).map_err(|e| IndexError::Error(e))?;
|
||||
let index = Index(Arc::new(index));
|
||||
Ok(index)
|
||||
}).await.expect("thread died");
|
||||
})
|
||||
.await
|
||||
.expect("thread died");
|
||||
|
||||
self.index_store.write().await.insert(meta.uuid.clone(), index?);
|
||||
self.index_store
|
||||
.write()
|
||||
.await
|
||||
.insert(meta.uuid.clone(), index?);
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
async fn get_or_create(&self, uuid: Uuid) -> Result<Index> {
|
||||
match self.index_store.write().await.entry(uuid.clone()) {
|
||||
Entry::Vacant(entry) => {
|
||||
match self.meta_store.write().await.entry(uuid.clone()) {
|
||||
Entry::Vacant(entry) => match self.meta_store.write().await.entry(uuid.clone()) {
|
||||
Entry::Vacant(_) => {
|
||||
todo!()
|
||||
}
|
||||
Entry::Occupied(entry) => {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Entry::Occupied(entry) => Ok(entry.get().clone()),
|
||||
}
|
||||
}
|
||||
@ -228,6 +337,10 @@ impl MapIndexStore {
|
||||
root.push("indexes/");
|
||||
let meta_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
let index_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
Self { meta_store, index_store, root }
|
||||
Self {
|
||||
meta_store,
|
||||
index_store,
|
||||
root,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,6 @@ use actix_web::web::Bytes;
|
||||
use actix_web::web::Payload;
|
||||
use anyhow::Context;
|
||||
use chrono::{DateTime, Utc};
|
||||
use crate::index::{SearchResult, SearchQuery};
|
||||
use futures::stream::StreamExt;
|
||||
use milli::update::{IndexDocumentsMethod, UpdateFormat};
|
||||
use serde::{Serialize, Deserialize};
|
||||
@ -19,6 +18,7 @@ use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub use updates::{Processed, Processing, Failed};
|
||||
use crate::index::{SearchResult, SearchQuery, Document};
|
||||
use crate::index::{UpdateResult, Settings, Facets};
|
||||
|
||||
pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>;
|
||||
@ -157,6 +157,21 @@ impl IndexController {
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
pub async fn documents(
|
||||
&self,
|
||||
index: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> anyhow::Result<Vec<Document>> {
|
||||
let uuid = self.uuid_resolver
|
||||
.resolve(index.clone())
|
||||
.await?
|
||||
.with_context(|| format!("Index {:?} doesn't exist", index))?;
|
||||
let documents = self.index_handle.documents(uuid, offset, limit, attributes_to_retrieve).await?;
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
fn update_index(&self, name: String, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
|
||||
todo!()
|
||||
}
|
||||
|
@ -55,17 +55,18 @@ async fn get_document(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<DocumentParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = path.index_uid.clone();
|
||||
let id = path.document_id.clone();
|
||||
match data.retrieve_document(index, id, None as Option<Vec<String>>).await {
|
||||
Ok(document) => {
|
||||
let json = serde_json::to_string(&document).unwrap();
|
||||
Ok(HttpResponse::Ok().body(json))
|
||||
}
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
}
|
||||
todo!()
|
||||
//let index = path.index_uid.clone();
|
||||
//let id = path.document_id.clone();
|
||||
//match data.retrieve_document(index, id, None as Option<Vec<String>>).await {
|
||||
//Ok(document) => {
|
||||
//let json = serde_json::to_string(&document).unwrap();
|
||||
//Ok(HttpResponse::Ok().body(json))
|
||||
//}
|
||||
//Err(e) => {
|
||||
//Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
|
||||
//}
|
||||
//}
|
||||
}
|
||||
|
||||
#[delete(
|
||||
|
Loading…
x
Reference in New Issue
Block a user