fix empty index error

This commit is contained in:
Marin Postma 2021-05-31 10:58:51 +02:00
parent bc5a5e37ea
commit b3c8f0e1f6
No known key found for this signature in database
GPG Key ID: D5241F0C0C865F30
2 changed files with 38 additions and 30 deletions

View File

@ -1,15 +1,15 @@
use std::{fs::{create_dir_all, File}, path::Path, sync::Arc}; use std::{fs::{create_dir_all, File}, io::{BufRead, BufReader}, path::Path, sync::Arc};
use anyhow::bail;
use anyhow::Context; use anyhow::Context;
use heed::RoTxn; use heed::RoTxn;
use indexmap::IndexMap; use indexmap::IndexMap;
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use anyhow::bail;
use crate::option::IndexerOpts; use crate::option::IndexerOpts;
use super::{Unchecked, Index, Settings, update_handler::UpdateHandler}; use super::{update_handler::UpdateHandler, Index, Settings, Unchecked};
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
struct DumpMeta { struct DumpMeta {
@ -64,7 +64,10 @@ impl Index {
let settings = self.settings_txn(txn)?.into_unchecked(); let settings = self.settings_txn(txn)?.into_unchecked();
let primary_key = self.primary_key(txn)?.map(String::from); let primary_key = self.primary_key(txn)?.map(String::from);
let meta = DumpMeta { settings, primary_key }; let meta = DumpMeta {
settings,
primary_key,
};
serde_json::to_writer(&mut meta_file, &meta)?; serde_json::to_writer(&mut meta_file, &meta)?;
@ -86,7 +89,10 @@ impl Index {
let meta_path = src.as_ref().join(META_FILE_NAME); let meta_path = src.as_ref().join(META_FILE_NAME);
let mut meta_file = File::open(meta_path)?; let mut meta_file = File::open(meta_path)?;
let DumpMeta { settings, primary_key } = serde_json::from_reader(&mut meta_file)?; let DumpMeta {
settings,
primary_key,
} = serde_json::from_reader(&mut meta_file)?;
let settings = settings.check(); let settings = settings.check();
let index = Self::open(&dst_dir_path, size as usize)?; let index = Self::open(&dst_dir_path, size as usize)?;
let mut txn = index.write_txn()?; let mut txn = index.write_txn()?;
@ -96,15 +102,21 @@ impl Index {
index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
let document_file_path = src.as_ref().join(DATA_FILE_NAME); let document_file_path = src.as_ref().join(DATA_FILE_NAME);
let document_file = File::open(&document_file_path)?; let reader = File::open(&document_file_path)?;
index.update_documents_txn( let mut reader = BufReader::new(reader);
&mut txn, reader.fill_buf()?;
JsonStream, // If the document file is empty, we don't perform the document addition, to prevent
IndexDocumentsMethod::UpdateDocuments, // a primary key error to be thrown.
Some(document_file), if !reader.buffer().is_empty() {
handler.update_builder(0), index.update_documents_txn(
primary_key.as_deref(), &mut txn,
)?; JsonStream,
IndexDocumentsMethod::UpdateDocuments,
Some(reader),
handler.update_builder(0),
primary_key.as_deref(),
)?;
}
txn.commit()?; txn.commit()?;

View File

@ -1,10 +1,4 @@
use std::{ use std::{collections::{BTreeMap, BTreeSet}, fs::File, io::BufRead, marker::PhantomData, path::Path, sync::Arc};
collections::{BTreeMap, BTreeSet},
fs::File,
marker::PhantomData,
path::Path,
sync::Arc,
};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use log::{error, info, warn}; use log::{error, info, warn};
@ -103,15 +97,17 @@ fn load_index(
let update_builder = UpdateBuilder::new(0); let update_builder = UpdateBuilder::new(0);
let file = File::open(&src.as_ref().join("documents.jsonl"))?; let file = File::open(&src.as_ref().join("documents.jsonl"))?;
let reader = std::io::BufReader::new(file); let mut reader = std::io::BufReader::new(file);
reader.fill_buf()?;
index.update_documents( if !reader.buffer().is_empty() {
UpdateFormat::JsonStream, index.update_documents(
IndexDocumentsMethod::ReplaceDocuments, UpdateFormat::JsonStream,
Some(reader), IndexDocumentsMethod::ReplaceDocuments,
update_builder, Some(reader),
primary_key, update_builder,
)?; primary_key,
)?;
}
// the last step: we extract the original milli::Index and close it // the last step: we extract the original milli::Index and close it
Arc::try_unwrap(index.0) Arc::try_unwrap(index.0)