mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Make the changes necessary to use milli 0.31.1
This commit is contained in:
parent
7feb15df28
commit
dad86fc3d6
11 changed files with 51 additions and 61 deletions
|
@ -27,7 +27,7 @@ const DATA_FILE_NAME: &str = "documents.jsonl";
|
|||
impl Index {
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
// acquire write txn make sure any ongoing write is finished before we start.
|
||||
let txn = self.env.write_txn()?;
|
||||
let txn = self.write_txn()?;
|
||||
let path = path.as_ref().join(format!("indexes/{}", self.uuid));
|
||||
|
||||
create_dir_all(&path)?;
|
||||
|
|
|
@ -4,9 +4,10 @@ use std::marker::PhantomData;
|
|||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use milli::heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn};
|
||||
use milli::update::{IndexerConfig, Setting};
|
||||
use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -14,8 +15,7 @@ use serde_json::{Map, Value};
|
|||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::search::DEFAULT_PAGINATION_LIMITED_TO;
|
||||
use crate::EnvSizer;
|
||||
use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
|
||||
use super::error::IndexError;
|
||||
use super::error::Result;
|
||||
|
@ -202,9 +202,9 @@ impl Index {
|
|||
};
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
limited_to: Setting::Set(
|
||||
self.pagination_limited_to(txn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO),
|
||||
max_total_hits: Setting::Set(
|
||||
self.pagination_max_total_hits(txn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
|
@ -245,7 +245,7 @@ impl Index {
|
|||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
||||
let iter = self.all_documents(&txn)?.skip(offset).take(limit);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
|
@ -302,7 +302,12 @@ impl Index {
|
|||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.env.size()
|
||||
WalkDir::new(self.inner.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len())
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
|
@ -310,9 +315,7 @@ impl Index {
|
|||
create_dir_all(&dst)?;
|
||||
dst.push("data.mdb");
|
||||
let _txn = self.write_txn()?;
|
||||
self.inner
|
||||
.env
|
||||
.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
|
||||
self.inner.copy_to_path(dst, CompactionOption::Enabled)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
|||
|
||||
/// The maximimum number of results that the engine
|
||||
/// will be able to return in one search call.
|
||||
pub const DEFAULT_PAGINATION_LIMITED_TO: usize = 1000;
|
||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
|
@ -91,14 +91,14 @@ impl Index {
|
|||
search.query(query);
|
||||
}
|
||||
|
||||
let pagination_limited_to = self
|
||||
.pagination_limited_to(&rtxn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO);
|
||||
let max_total_hits = self
|
||||
.pagination_max_total_hits(&rtxn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
// Make sure that a user can't get more documents than the hard limit,
|
||||
// we align that on the offset too.
|
||||
let offset = min(query.offset.unwrap_or(0), pagination_limited_to);
|
||||
let limit = min(query.limit, pagination_limited_to.saturating_sub(offset));
|
||||
let offset = min(query.offset.unwrap_or(0), max_total_hits);
|
||||
let limit = min(query.limit, max_total_hits.saturating_sub(offset));
|
||||
|
||||
search.offset(offset);
|
||||
search.limit(limit);
|
||||
|
|
|
@ -86,7 +86,7 @@ pub struct FacetingSettings {
|
|||
pub struct PaginationSettings {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub limited_to: Setting<usize>,
|
||||
pub max_total_hits: Setting<usize>,
|
||||
}
|
||||
|
||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||
|
@ -474,12 +474,12 @@ pub fn apply_settings_to_builder(
|
|||
}
|
||||
|
||||
match settings.pagination {
|
||||
Setting::Set(ref value) => match value.limited_to {
|
||||
Setting::Set(val) => builder.set_pagination_limited_to(val),
|
||||
Setting::Reset => builder.reset_pagination_limited_to(),
|
||||
Setting::Set(ref value) => match value.max_total_hits {
|
||||
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
|
||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||
Setting::NotSet => (),
|
||||
},
|
||||
Setting::Reset => builder.reset_pagination_limited_to(),
|
||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::fs::{create_dir_all, File};
|
|||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use milli::heed::types::{SerdeBincode, Str};
|
||||
use milli::heed::{CompactionOption, Database, Env};
|
||||
|
@ -11,7 +12,6 @@ use uuid::Uuid;
|
|||
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::tasks::task::TaskId;
|
||||
use crate::EnvSizer;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct DumpEntry {
|
||||
|
@ -131,7 +131,12 @@ impl HeedMetaStore {
|
|||
}
|
||||
|
||||
fn get_size(&self) -> Result<u64> {
|
||||
Ok(self.env.size())
|
||||
Ok(WalkDir::new(self.env.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len()))
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
|
|
|
@ -20,23 +20,6 @@ pub use milli::heed;
|
|||
mod compression;
|
||||
pub mod document_formats;
|
||||
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub trait EnvSizer {
|
||||
fn size(&self) -> u64;
|
||||
}
|
||||
|
||||
impl EnvSizer for milli::heed::Env {
|
||||
fn size(&self) -> u64 {
|
||||
WalkDir::new(self.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len())
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
/// We consider a database as non empty when it's a non empty directory.
|
||||
|
|
|
@ -7,6 +7,7 @@ use anyhow::bail;
|
|||
use fs_extra::dir::{self, CopyOptions};
|
||||
use log::{info, trace};
|
||||
use meilisearch_auth::open_auth_store_env;
|
||||
use milli::heed::CompactionOption;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::time::sleep;
|
||||
use walkdir::WalkDir;
|
||||
|
@ -181,9 +182,7 @@ impl SnapshotJob {
|
|||
let mut options = milli::heed::EnvOpenOptions::new();
|
||||
options.map_size(self.index_size);
|
||||
let index = milli::Index::new(options, entry.path())?;
|
||||
index
|
||||
.env
|
||||
.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
|
||||
index.copy_to_path(dst, CompactionOption::Enabled)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue