get rids of log in milli and add logs for the bucket sort

This commit is contained in:
Tamo 2024-02-06 10:49:23 +01:00 committed by Louis Dureuil
parent f158e96fe7
commit e773dfa9ba
No known key found for this signature in database
14 changed files with 24 additions and 18 deletions

1
Cargo.lock generated
View File

@ -3813,7 +3813,6 @@ dependencies = [
"json-depth-checker",
"levenshtein_automata",
"liquid",
"log",
"logging_timer",
"maplit",
"md5",

View File

@ -71,7 +71,6 @@ itertools = "0.11.0"
puffin = "0.16.0"
# logging
log = "0.4.20"
logging_timer = "1.1.0"
csv = "1.3.0"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }

View File

@ -6,9 +6,9 @@ use charabia::Normalize;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};

View File

@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
}
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx,
logger,
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!();
continue;
};
drop(entered);
ranking_rule_scores.push(next_bucket.score);

View File

@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm};
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;

View File

@ -78,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
},
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
[(field_id, name)] => {
log::info!("Primary key was not specified in index. Inferred to '{name}'");
tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
PrimaryKey::Flat { name, field_id: *field_id }
}
multiple => {

View File

@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
if let Ok(float) = original.parse() {
output_numbers.push(float);
} else {
log::warn!(
tracing::warn!(
"Internal error, could not parse a geofield that has been validated. Please open an issue."
)
}

View File

@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
if old_prompt != new_prompt {
log::trace!(
tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
);
VectorStateDelta::NowGenerated(new_prompt)
} else {
log::trace!("⏭️ Prompt unmodified, skipping");
tracing::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange
}
} else {

View File

@ -14,8 +14,8 @@ use std::fs::File;
use std::io::BufReader;
use crossbeam_channel::Sender;
use log::debug;
use rayon::prelude::*;
use tracing::debug;
use self::extract_docid_word_positions::extract_docid_word_positions;
use self::extract_facet_number_docids::extract_facet_number_docids;

View File

@ -13,11 +13,11 @@ use std::result::Result as StdResult;
use crossbeam_channel::{Receiver, Sender};
use heed::types::Str;
use heed::Database;
use log::debug;
use rand::SeedableRng;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy;
use tracing::debug;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
use self::enrich::enrich_documents_batch;

View File

@ -517,7 +517,7 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
log::debug!("Finished vector chunk for {}", embedder_name);
tracing::debug!("Finished vector chunk for {}", embedder_name);
}
TypedChunk::ScriptLanguageDocids(sl_map) => {
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");

View File

@ -4,7 +4,7 @@ use std::str;
use grenad::CompressionType;
use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database};
use log::debug;
use tracing::debug;
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;

View File

@ -73,7 +73,7 @@ impl Embedder {
let device = match candle_core::Device::cuda_if_available(0) {
Ok(device) => device,
Err(error) => {
log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
candle_core::Device::Cpu
}
};

View File

@ -173,12 +173,16 @@ impl Embedder {
let retry_duration = match result {
Ok(embeddings) => return Ok(embeddings),
Err(retry) => {
log::warn!("Failed: {}", retry.error);
tracing::warn!("Failed: {}", retry.error);
tokenized |= retry.must_tokenize();
retry.into_duration(attempt)
}
}?;
log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
tracing::warn!(
"Attempt #{}, retrying after {}ms.",
attempt,
retry_duration.as_millis()
);
tokio::time::sleep(retry_duration).await;
}
@ -244,7 +248,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
error_response.error,
@ -266,7 +270,7 @@ impl Embedder {
client: &reqwest::Client,
) -> Result<Vec<Embeddings<f32>>, Retry> {
for text in texts {
log::trace!("Received prompt: {}", text.as_ref())
tracing::trace!("Received prompt: {}", text.as_ref())
}
let request = OpenAiRequest {
model: self.options.embedding_model.name(),
@ -289,7 +293,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
log::trace!("response: {:?}", response.data);
tracing::trace!("response: {:?}", response.data);
Ok(response
.data