mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
It works perfectly with some Rhai
This commit is contained in:
parent
5644af10ef
commit
02123a3326
3 changed files with 145 additions and 91 deletions
119
Cargo.lock
generated
119
Cargo.lock
generated
|
@ -262,6 +262,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"const-random",
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
|
@ -1049,6 +1050,26 @@ dependencies = [
|
|||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
|
||||
dependencies = [
|
||||
"const-random-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random-macro"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"tiny-keccak",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
|
@ -1987,30 +2008,6 @@ dependencies = [
|
|||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gc-arena"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d4a9d8c3c1ef4301b8afc383e53e102a13f9947da2181bf82828480dcc5165"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"gc-arena-derive",
|
||||
"hashbrown",
|
||||
"sptr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gc-arena-derive"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c952d28a64896b1c4ac382dcd7beeaeaabc13e8c7c7f800ea2938abd828ed30"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm"
|
||||
version = "0.17.1"
|
||||
|
@ -3535,11 +3532,11 @@ dependencies = [
|
|||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"piccolo",
|
||||
"puffin",
|
||||
"rand",
|
||||
"rand_pcg",
|
||||
"rayon",
|
||||
"rhai",
|
||||
"roaring",
|
||||
"rstar",
|
||||
"serde",
|
||||
|
@ -4039,21 +4036,6 @@ dependencies = [
|
|||
"siphasher 0.3.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "piccolo"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93dd1815b42446904bb2689d1c5d7680e8c68113d5b15a5a3297ba6c7a5f84af"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"allocator-api2",
|
||||
"anyhow",
|
||||
"gc-arena",
|
||||
"hashbrown",
|
||||
"rand",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.4"
|
||||
|
@ -4462,6 +4444,35 @@ version = "0.1.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
|
||||
|
||||
[[package]]
|
||||
name = "rhai"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a7d88770120601ba1e548bb6bc2a05019e54ff01b51479e38e64ec3b59d4759"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bitflags 2.5.0",
|
||||
"instant",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"rhai_codegen",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"smartstring",
|
||||
"thin-vec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rhai_codegen"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59aecf17969c04b9c0c5d21f6bc9da9fec9dd4980e64d1871443a476589d8c86"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.8"
|
||||
|
@ -4875,6 +4886,9 @@ name = "smallvec"
|
|||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
|
@ -4883,6 +4897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"serde",
|
||||
"static_assertions",
|
||||
"version_check",
|
||||
]
|
||||
|
@ -4939,12 +4954,6 @@ dependencies = [
|
|||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sptr"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a"
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
|
@ -5135,6 +5144,15 @@ dependencies = [
|
|||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thin-vec"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a38c90d48152c236a3ab59271da4f4ae63d678c5d7ad6b7714d7cb9760be5e4b"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.58"
|
||||
|
@ -5213,6 +5231,15 @@ dependencies = [
|
|||
"time-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tiny-keccak"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
|
||||
dependencies = [
|
||||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinytemplate"
|
||||
version = "1.2.1"
|
||||
|
|
|
@ -87,7 +87,7 @@ rand = "0.8.5"
|
|||
tracing = "0.1.40"
|
||||
ureq = { version = "2.9.7", features = ["json"] }
|
||||
url = "2.5.0"
|
||||
piccolo = "0.3.1"
|
||||
rhai = { version = "1.18.0", features = ["serde", "no_module", "no_custom_syntax"] }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
|
|
|
@ -15,6 +15,7 @@ use grenad::{Merger, MergerBuilder};
|
|||
use heed::types::Str;
|
||||
use heed::Database;
|
||||
use rand::SeedableRng;
|
||||
use rhai::{Engine, Scope};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slice_group_by::GroupBy;
|
||||
|
@ -31,7 +32,7 @@ pub use self::helpers::{
|
|||
};
|
||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
|
@ -39,7 +40,7 @@ use crate::update::{
|
|||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||
};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{fields_ids_map, CboRoaringBitmapCodec, Index, Result};
|
||||
use crate::{CboRoaringBitmapCodec, FieldsIdsMap, Index, Object, Result};
|
||||
|
||||
static MERGED_DATABASE_COUNT: usize = 7;
|
||||
static PREFIX_DATABASE_COUNT: usize = 4;
|
||||
|
@ -175,7 +176,7 @@ where
|
|||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn edit_documents(
|
||||
mut self,
|
||||
self,
|
||||
documents: &RoaringBitmap,
|
||||
code: &str,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
|
@ -184,49 +185,75 @@ where
|
|||
return Ok((self, Ok(0)));
|
||||
}
|
||||
|
||||
let mut lua = piccolo::Lua::core();
|
||||
let executor = lua.enter(|ctx| ctx.stash(piccolo::Executor::new(ctx)));
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
for docid in documents {
|
||||
let document = match self.index.documents.get(self.wtxn, &docid)? {
|
||||
Some(document) => document,
|
||||
None => panic!("a document should always exists"),
|
||||
};
|
||||
|
||||
lua.try_enter(|ctx| {
|
||||
let closure = match piccolo::Closure::load(
|
||||
ctx,
|
||||
None,
|
||||
("return ".to_string() + code).as_bytes(),
|
||||
) {
|
||||
Ok(closure) => closure,
|
||||
Err(_) => piccolo::Closure::load(ctx, None, code.as_bytes())?,
|
||||
};
|
||||
let function = piccolo::Function::Closure(closure);
|
||||
|
||||
let table = piccolo::Table::new(&ctx);
|
||||
table.set(ctx, "internal-id", docid)?;
|
||||
table.set(ctx, "title", "hello")?;
|
||||
table.set(ctx, "description", "world")?;
|
||||
dbg!(&table);
|
||||
ctx.set_global("doc", table)?;
|
||||
|
||||
ctx.fetch(&executor).restart(ctx, function, ());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
lua.execute::<()>(&executor).unwrap();
|
||||
lua.try_enter(|ctx| {
|
||||
let value = ctx.get_global("doc");
|
||||
dbg!(value);
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
/// Transform every field of a raw obkv store into a JSON Object.
|
||||
pub fn all_obkv_to_rhaimap(
|
||||
obkv: obkv::KvReaderU16,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> Result<rhai::Map> {
|
||||
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
|
||||
all_keys
|
||||
.iter()
|
||||
.copied()
|
||||
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
||||
.map(|(id, value)| {
|
||||
let name = fields_ids_map.name(id).ok_or(
|
||||
crate::error::FieldIdMapMissingEntry::FieldId {
|
||||
field_id: id,
|
||||
process: "allobkv_to_rhaimap",
|
||||
},
|
||||
)?;
|
||||
let value = serde_json::from_slice(value)
|
||||
.map_err(crate::error::InternalError::SerdeJson)?;
|
||||
Ok((name.into(), value))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
Ok((self, Ok(documents.len())))
|
||||
fn rhaimap_to_object(map: rhai::Map) -> Object {
|
||||
let mut output = Object::new();
|
||||
for (key, value) in map {
|
||||
let value = serde_json::to_value(&value).unwrap();
|
||||
output.insert(key.into(), value);
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
let engine = Engine::new();
|
||||
let ast = engine.compile(code).unwrap();
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
|
||||
let primary_key_id = fields_ids_map.id(primary_key).unwrap();
|
||||
let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?;
|
||||
|
||||
for docid in documents {
|
||||
let (document, document_id) = match self.index.documents.get(self.wtxn, &docid)? {
|
||||
Some(obkv) => {
|
||||
let document_id_bytes = obkv.get(primary_key_id).unwrap();
|
||||
let document_id: serde_json::Value =
|
||||
serde_json::from_slice(document_id_bytes).unwrap();
|
||||
let document = all_obkv_to_rhaimap(obkv, &fields_ids_map)?;
|
||||
(document, document_id)
|
||||
}
|
||||
None => panic!("documents must exist"),
|
||||
};
|
||||
|
||||
let mut scope = Scope::new();
|
||||
scope.push("doc", document);
|
||||
|
||||
let new_document = engine.eval_ast_with_scope::<rhai::Map>(&mut scope, &ast).unwrap();
|
||||
let new_document = rhaimap_to_object(new_document);
|
||||
|
||||
assert_eq!(
|
||||
document_id, new_document[primary_key],
|
||||
"you cannot change the document id when editing documents"
|
||||
);
|
||||
documents_batch_builder.append_json_object(&new_document)?;
|
||||
}
|
||||
|
||||
let file = documents_batch_builder.into_inner()?;
|
||||
let reader = DocumentsBatchReader::from_reader(file)?;
|
||||
|
||||
self.add_documents(reader)
|
||||
}
|
||||
|
||||
pub fn with_embedders(mut self, embedders: EmbeddingConfigs) -> Self {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue