This commit is contained in:
ManyTheFish 2024-04-03 19:10:19 +02:00
parent 02c3d6b265
commit a489b406b4
5 changed files with 15 additions and 19 deletions

View File

@ -113,7 +113,8 @@ async fn secrets_are_hidden_in_settings() {
"default": {
"source": "rest",
"url": "https://localhost:7777",
"apiKey": "My super secret value you will never guess"
"apiKey": "My super secret value you will never guess",
"dimensions": 4,
}
}
}))
@ -184,6 +185,7 @@ async fn secrets_are_hidden_in_settings() {
"default": {
"source": "rest",
"apiKey": "My suXXXXXX...",
"dimensions": 4,
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
"url": "https://localhost:7777",
"query": null,

View File

@ -55,7 +55,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
let mut value_buffer = Vec::new();
// initialize tokenizer.
/// TODO: Fix ugly allocation
let old_stop_words = settings_diff.old.stop_words.as_ref();
let old_separators: Option<Vec<_>> = settings_diff
.old
@ -72,7 +71,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
);
let del_tokenizer = del_builder.build();
/// TODO: Fix ugly allocation
let new_stop_words = settings_diff.new.stop_words.as_ref();
let new_separators: Option<Vec<_>> = settings_diff
.new
@ -267,7 +265,6 @@ fn lang_safe_tokens_from_document<'a>(
// then we don't rerun the extraction.
if !script_language.is_empty() {
// build a new temporary tokenizer including the allow list.
/// TODO: Fix ugly allocation
let stop_words = settings.stop_words.as_ref();
let separators: Option<Vec<_>> = settings
.allowed_separators

View File

@ -198,7 +198,6 @@ fn run_extraction_task<FE, FS, M>(
M: Send,
{
let current_span = tracing::Span::current();
/// TODO: remove clone
let settings_diff = settings_diff.clone();
rayon::spawn(move || {
@ -236,7 +235,6 @@ fn send_original_documents_data(
.build()?;
if settings_diff.reindex_vectors() || !settings_diff.settings_update_only() {
/// TODO: remove clone
let settings_diff = settings_diff.clone();
rayon::spawn(move || {
for (name, (embedder, prompt)) in settings_diff.new.embedding_configs.clone() {
@ -250,17 +248,17 @@ fn send_original_documents_data(
match result {
Ok(ExtractedVectorPoints { manual_vectors, remove_vectors, prompts }) => {
let embeddings = match extract_embeddings(
prompts,
indexer,
embedder.clone(),
&request_threads,
) {
Ok(results) => Some(results),
Err(error) => {
let _ = lmdb_writer_sx_cloned.send(Err(error));
None
}
};
prompts,
indexer,
embedder.clone(),
&request_threads,
) {
Ok(results) => Some(results),
Err(error) => {
let _ = lmdb_writer_sx_cloned.send(Err(error));
None
}
};
if !(remove_vectors.is_empty()
&& manual_vectors.is_empty()

View File

@ -252,7 +252,7 @@ where
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
}
let mut output = self
let output = self
.transform
.take()
.expect("Invalid document addition state")

View File

@ -400,7 +400,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
{
puffin::profile_function!();
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// if the settings are set before any document update, we don't need to do anything, and
// will set the primary key during the first document addition.
if self.index.number_of_documents(self.wtxn)? == 0 {