integrate the new Settings in the dumps

This commit is contained in:
tamo 2021-05-10 20:48:06 +02:00
parent d767990424
commit 7d748fa384
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
5 changed files with 16 additions and 13 deletions

View File

@ -8,7 +8,7 @@ use serde_json::{Map, Value};
use crate::helpers::EnvSizer; use crate::helpers::EnvSizer;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Facets, Settings, Checked, Unchecked, UpdateResult}; pub use updates::{Facets, Settings, Checked, Unchecked};
use serde::{de::Deserializer, Deserialize}; use serde::{de::Deserializer, Deserialize};
mod search; mod search;

View File

@ -8,9 +8,10 @@ use log::info;
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::{deserialize_some, Index};
use crate::index_controller::UpdateResult; use crate::index_controller::UpdateResult;
use super::{deserialize_some, Index};
#[derive(Clone, Default, Debug)] #[derive(Clone, Default, Debug)]
pub struct Checked; pub struct Checked;
@ -35,7 +36,11 @@ pub struct Settings<T> {
)] )]
pub searchable_attributes: Option<Option<Vec<String>>>, pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)] #[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>, pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
#[serde( #[serde(

View File

@ -244,9 +244,6 @@ pub fn load_dump(
// finally we can move all the unprocessed update file into our new DB // finally we can move all the unprocessed update file into our new DB
let update_path = tmp_dir_path.join("update_files"); let update_path = tmp_dir_path.join("update_files");
let files: Vec<_> = std::fs::read_dir(&db_path.join("updates"))?
.map(|file| file.unwrap().path())
.collect();
let db_update_path = db_path.join("updates/update_files"); let db_update_path = db_path.join("updates/update_files");
eprintln!("path {:?} exists: {:?}", update_path, update_path.exists()); eprintln!("path {:?} exists: {:?}", update_path, update_path.exists());
eprintln!( eprintln!(

View File

@ -1,8 +1,8 @@
use std::collections::{BTreeMap, BTreeSet}; use std::{collections::{BTreeMap, BTreeSet}, marker::PhantomData};
use log::warn; use log::warn;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::index_controller; use crate::{index::Unchecked, index_controller};
use crate::index::deserialize_some; use crate::index::deserialize_some;
use super::*; use super::*;
@ -27,7 +27,7 @@ struct Settings {
} }
/// we need to **always** be able to convert the old settings to the settings currently being used /// we need to **always** be able to convert the old settings to the settings currently being used
impl From<Settings> for index_controller::Settings { impl From<Settings> for index_controller::Settings<Unchecked> {
fn from(settings: Settings) -> Self { fn from(settings: Settings) -> Self {
if settings.synonyms.flatten().is_some() { if settings.synonyms.flatten().is_some() {
error!("`synonyms` are not yet implemented and thus will be ignored"); error!("`synonyms` are not yet implemented and thus will be ignored");
@ -63,6 +63,7 @@ impl From<Settings> for index_controller::Settings {
}).collect())), }).collect())),
// we need to convert the old `Vec<String>` into a `BTreeSet<String>` // we need to convert the old `Vec<String>` into a `BTreeSet<String>`
stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())),
_kind: PhantomData,
} }
} }
} }
@ -89,9 +90,9 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_ke
// extract `settings.json` file and import content // extract `settings.json` file and import content
let settings = import_settings(&dump_path)?; let settings = import_settings(&dump_path)?;
let settings: index_controller::Settings = settings.into(); let settings: index_controller::Settings<Unchecked> = settings.into();
let update_builder = UpdateBuilder::new(0); let update_builder = UpdateBuilder::new(0);
index.update_settings(&settings, update_builder)?; index.update_settings(&settings.check(), update_builder)?;
let update_builder = UpdateBuilder::new(1); let update_builder = UpdateBuilder::new(1);
let file = File::open(&dump_path.join("documents.jsonl"))?; let file = File::open(&dump_path.join("documents.jsonl"))?;

View File

@ -1,11 +1,11 @@
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
use crate::index::Index; use crate::index::{Checked, Index};
use crate::index_controller::Settings; use crate::index_controller::Settings;
use std::{fs::File, path::Path, sync::Arc}; use std::{fs::File, path::Path, sync::Arc};
/// Extract Settings from `settings.json` file present at provided `dir_path` /// Extract Settings from `settings.json` file present at provided `dir_path`
fn import_settings(dir_path: &Path) -> anyhow::Result<Settings> { fn import_settings(dir_path: &Path) -> anyhow::Result<Settings<Checked>> {
let path = dir_path.join("settings.json"); let path = dir_path.join("settings.json");
let file = File::open(path)?; let file = File::open(path)?;
let reader = std::io::BufReader::new(file); let reader = std::io::BufReader::new(file);