mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
jsonl support
This commit is contained in:
parent
5bac65f8b8
commit
1f537e1b60
10 changed files with 121 additions and 41 deletions
|
@ -11,7 +11,7 @@ use milli::update::Setting;
|
|||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::document_formats::read_jsonl;
|
||||
use crate::document_formats::read_ndjson;
|
||||
use crate::index::apply_settings_to_builder;
|
||||
use crate::index::update_handler::UpdateHandler;
|
||||
use crate::index_controller::index_resolver::uuid_store::HeedUuidStore;
|
||||
|
@ -124,7 +124,7 @@ fn load_index(
|
|||
|
||||
let mut tmp_doc_file = tempfile::tempfile()?;
|
||||
|
||||
read_jsonl(reader, &mut tmp_doc_file)?;
|
||||
read_ndjson(reader, &mut tmp_doc_file)?;
|
||||
|
||||
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
|
@ -213,7 +213,7 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
|
|||
}
|
||||
}
|
||||
|
||||
// /// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> {
|
||||
let path = dir_path.as_ref().join("settings.json");
|
||||
let file = File::open(path)?;
|
||||
|
|
|
@ -72,6 +72,7 @@ pub struct IndexController {
|
|||
pub enum DocumentAdditionFormat {
|
||||
Json,
|
||||
Csv,
|
||||
Ndjson,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
|
|
|
@ -10,7 +10,7 @@ use uuid::Uuid;
|
|||
|
||||
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
||||
|
||||
use crate::document_formats::read_jsonl;
|
||||
use crate::document_formats::read_ndjson;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
|
@ -86,7 +86,7 @@ impl UpdateFileStore {
|
|||
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
|
||||
let dst_path = dst_update_files_path.join(file_uuid);
|
||||
let dst_file = BufWriter::new(File::create(dst_path)?);
|
||||
read_jsonl(update_file, dst_file)?;
|
||||
read_ndjson(update_file, dst_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -98,9 +98,9 @@ impl UpdateFileStore {
|
|||
Ok(Self { path })
|
||||
}
|
||||
|
||||
/// Created a new temporary update file.
|
||||
/// Creates a new temporary update file.
|
||||
///
|
||||
/// A call to persist is needed to persist in the database.
|
||||
/// A call to `persist` is needed to persist the file in the database.
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new()?;
|
||||
let uuid = Uuid::new_v4();
|
||||
|
@ -110,14 +110,14 @@ impl UpdateFileStore {
|
|||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
/// Returns a the file corresponding to the requested uuid.
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let file = File::open(path)?;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
/// Copies the content of the update file poited to by uuid to dst directory.
|
||||
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let src = self.path.join(uuid.to_string());
|
||||
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
|
@ -127,7 +127,7 @@ impl UpdateFileStore {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Peform a dump of the given update file uuid into the provided snapshot path.
|
||||
/// Peforms a dump of the given update file uuid into the provided dump path.
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_string = uuid.to_string();
|
||||
let update_file_path = self.path.join(&uuid_string);
|
||||
|
@ -140,7 +140,8 @@ impl UpdateFileStore {
|
|||
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
|
||||
|
||||
let mut document_buffer = Map::new();
|
||||
// TODO: we need to find a way to do this more efficiently. (create a custom serializer to
|
||||
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
|
||||
// for
|
||||
// jsonl for example...)
|
||||
while let Some((index, document)) = document_reader.next_document_with_index()? {
|
||||
for (field_id, content) in document.iter() {
|
||||
|
|
|
@ -17,8 +17,6 @@ pub enum UpdateLoopError {
|
|||
UnexistingUpdate(u64),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
//#[error("{0}")]
|
||||
//IndexActor(#[from] IndexActorError),
|
||||
#[error(
|
||||
"update store was shut down due to a fatal error, please check your logs for more info."
|
||||
)]
|
||||
|
|
|
@ -21,7 +21,7 @@ use uuid::Uuid;
|
|||
use self::error::{Result, UpdateLoopError};
|
||||
pub use self::message::UpdateMsg;
|
||||
use self::store::{UpdateStore, UpdateStoreInfo};
|
||||
use crate::document_formats::{read_csv, read_json};
|
||||
use crate::document_formats::{read_csv, read_json, read_ndjson};
|
||||
use crate::index::{Index, Settings, Unchecked};
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use status::UpdateStatus;
|
||||
|
@ -40,7 +40,7 @@ pub fn create_update_handler(
|
|||
let (sender, receiver) = mpsc::channel(100);
|
||||
let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?;
|
||||
|
||||
tokio::task::spawn_local(actor.run());
|
||||
tokio::task::spawn(actor.run());
|
||||
|
||||
Ok(sender)
|
||||
}
|
||||
|
@ -197,6 +197,7 @@ impl UpdateLoop {
|
|||
match format {
|
||||
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
|
||||
}
|
||||
|
||||
update_file.persist()?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue