fix empty index error

This commit is contained in:
Marin Postma 2021-05-31 10:58:51 +02:00
parent bc5a5e37ea
commit b3c8f0e1f6
No known key found for this signature in database
GPG Key ID: D5241F0C0C865F30
2 changed files with 38 additions and 30 deletions

View File

@ -1,15 +1,15 @@
use std::{fs::{create_dir_all, File}, path::Path, sync::Arc};
use std::{fs::{create_dir_all, File}, io::{BufRead, BufReader}, path::Path, sync::Arc};
use anyhow::bail;
use anyhow::Context;
use heed::RoTxn;
use indexmap::IndexMap;
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
use serde::{Deserialize, Serialize};
use anyhow::bail;
use crate::option::IndexerOpts;
use super::{Unchecked, Index, Settings, update_handler::UpdateHandler};
use super::{update_handler::UpdateHandler, Index, Settings, Unchecked};
#[derive(Serialize, Deserialize)]
struct DumpMeta {
@ -64,7 +64,10 @@ impl Index {
let settings = self.settings_txn(txn)?.into_unchecked();
let primary_key = self.primary_key(txn)?.map(String::from);
let meta = DumpMeta { settings, primary_key };
let meta = DumpMeta {
settings,
primary_key,
};
serde_json::to_writer(&mut meta_file, &meta)?;
@ -86,7 +89,10 @@ impl Index {
let meta_path = src.as_ref().join(META_FILE_NAME);
let mut meta_file = File::open(meta_path)?;
let DumpMeta { settings, primary_key } = serde_json::from_reader(&mut meta_file)?;
let DumpMeta {
settings,
primary_key,
} = serde_json::from_reader(&mut meta_file)?;
let settings = settings.check();
let index = Self::open(&dst_dir_path, size as usize)?;
let mut txn = index.write_txn()?;
@ -96,15 +102,21 @@ impl Index {
index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
let document_file = File::open(&document_file_path)?;
index.update_documents_txn(
&mut txn,
JsonStream,
IndexDocumentsMethod::UpdateDocuments,
Some(document_file),
handler.update_builder(0),
primary_key.as_deref(),
)?;
let reader = File::open(&document_file_path)?;
let mut reader = BufReader::new(reader);
reader.fill_buf()?;
// If the document file is empty, we don't perform the document addition, to prevent
// a primary key error to be thrown.
if !reader.buffer().is_empty() {
index.update_documents_txn(
&mut txn,
JsonStream,
IndexDocumentsMethod::UpdateDocuments,
Some(reader),
handler.update_builder(0),
primary_key.as_deref(),
)?;
}
txn.commit()?;

View File

@ -1,10 +1,4 @@
use std::{
collections::{BTreeMap, BTreeSet},
fs::File,
marker::PhantomData,
path::Path,
sync::Arc,
};
use std::{collections::{BTreeMap, BTreeSet}, fs::File, io::BufRead, marker::PhantomData, path::Path, sync::Arc};
use heed::EnvOpenOptions;
use log::{error, info, warn};
@ -103,15 +97,17 @@ fn load_index(
let update_builder = UpdateBuilder::new(0);
let file = File::open(&src.as_ref().join("documents.jsonl"))?;
let reader = std::io::BufReader::new(file);
index.update_documents(
UpdateFormat::JsonStream,
IndexDocumentsMethod::ReplaceDocuments,
Some(reader),
update_builder,
primary_key,
)?;
let mut reader = std::io::BufReader::new(file);
reader.fill_buf()?;
if !reader.buffer().is_empty() {
index.update_documents(
UpdateFormat::JsonStream,
IndexDocumentsMethod::ReplaceDocuments,
Some(reader),
update_builder,
primary_key,
)?;
}
// the last step: we extract the original milli::Index and close it
Arc::try_unwrap(index.0)