get rids of log in milli and add logs for the bucket sort

2025-06-29 01:48:30 +02:00 · 2024-02-06 10:49:23 +01:00 · 2024-02-06 10:49:23 +01:00 · e773dfa9ba
commit e773dfa9ba
parent f158e96fe7
14 changed files with 24 additions and 18 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3813,7 +3813,6 @@ dependencies = [
 "json-depth-checker",
 "levenshtein_automata",
 "liquid",
 "log",
 "logging_timer",
 "maplit",
 "md5",
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -71,7 +71,6 @@ itertools = "0.11.0"
 puffin = "0.16.0"
 # logging
 log = "0.4.20"
 logging_timer = "1.1.0"
 csv = "1.3.0"
 candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -6,9 +6,9 @@ use charabia::Normalize;
 use fst::automaton::{Automaton, Str};
 use fst::{IntoStreamer, Streamer};
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
 use log::error;
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
 use tracing::error;
 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
 pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            continue;
        }
        let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
        let entered = span.enter();
        let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
            ctx,
            logger,
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            back!();
            continue;
        };
        drop(entered);
        ranking_rule_scores.push(next_bucket.score);
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
 use grenad::{CompressionType, SortAlgorithm};
 use heed::types::{Bytes, DecodeIgnore, SerdeJson};
 use heed::BytesEncode;
 use log::debug;
 use time::OffsetDateTime;
 use tracing::debug;
 use self::incremental::FacetsUpdateIncremental;
 use super::FacetsUpdateBulk;
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -78,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
                },
                [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
                [(field_id, name)] => {
-                    log::info!("Primary key was not specified in index. Inferred to '{name}'");
+                    tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
                    PrimaryKey::Flat { name, field_id: *field_id }
                }
                multiple => {
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
                    if let Ok(float) = original.parse() {
                        output_numbers.push(float);
                    } else {
-                        log::warn!(
+                        tracing::warn!(
                            "Internal error, could not parse a geofield that has been validated. Please open an issue."
                        )
                    }
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                        prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
                    let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
                    if old_prompt != new_prompt {
-                        log::trace!(
+                        tracing::trace!(
                            "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
                        );
                        VectorStateDelta::NowGenerated(new_prompt)
                    } else {
-                        log::trace!("⏭️ Prompt unmodified, skipping");
+                        tracing::trace!("⏭️ Prompt unmodified, skipping");
                        VectorStateDelta::NoChange
                    }
                } else {
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -14,8 +14,8 @@ use std::fs::File;
 use std::io::BufReader;
 use crossbeam_channel::Sender;
 use log::debug;
 use rayon::prelude::*;
 use tracing::debug;
 use self::extract_docid_word_positions::extract_docid_word_positions;
 use self::extract_facet_number_docids::extract_facet_number_docids;
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -13,11 +13,11 @@ use std::result::Result as StdResult;
 use crossbeam_channel::{Receiver, Sender};
 use heed::types::Str;
 use heed::Database;
 use log::debug;
 use rand::SeedableRng;
 use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize};
 use slice_group_by::GroupBy;
 use tracing::debug;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
 use self::enrich::enrich_documents_batch;
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -517,7 +517,7 @@ pub(crate) fn write_typed_chunk_into_index(
                }
            }
-            log::debug!("Finished vector chunk for {}", embedder_name);
+            tracing::debug!("Finished vector chunk for {}", embedder_name);
        }
        TypedChunk::ScriptLanguageDocids(sl_map) => {
            let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
--- a/milli/src/update/words_prefix_integer_docids.rs
+++ b/milli/src/update/words_prefix_integer_docids.rs
@ -4,7 +4,7 @@ use std::str;
 use grenad::CompressionType;
 use heed::types::Bytes;
 use heed::{BytesDecode, BytesEncode, Database};
-use log::debug;
+use tracing::debug;
 use crate::error::SerializationError;
 use crate::heed_codec::StrBEU16Codec;
--- a/milli/src/vector/hf.rs
+++ b/milli/src/vector/hf.rs
@ -73,7 +73,7 @@ impl Embedder {
        let device = match candle_core::Device::cuda_if_available(0) {
            Ok(device) => device,
            Err(error) => {
-                log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
+                tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
                candle_core::Device::Cpu
            }
        };
--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@ -173,12 +173,16 @@ impl Embedder {
            let retry_duration = match result {
                Ok(embeddings) => return Ok(embeddings),
                Err(retry) => {
-                    log::warn!("Failed: {}", retry.error);
+                    tracing::warn!("Failed: {}", retry.error);
                    tokenized |= retry.must_tokenize();
                    retry.into_duration(attempt)
                }
            }?;
-            log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
+            tracing::warn!(
                "Attempt #{}, retrying after {}ms.",
                attempt,
                retry_duration.as_millis()
            );
            tokio::time::sleep(retry_duration).await;
        }
@ -244,7 +248,7 @@ impl Embedder {
                        .map_err(EmbedError::openai_unexpected)
                        .map_err(Retry::retry_later)?;
-                    log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
+                    tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
                    return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
                        error_response.error,
@ -266,7 +270,7 @@ impl Embedder {
        client: &reqwest::Client,
    ) -> Result<Vec<Embeddings<f32>>, Retry> {
        for text in texts {
-            log::trace!("Received prompt: {}", text.as_ref())
+            tracing::trace!("Received prompt: {}", text.as_ref())
        }
        let request = OpenAiRequest {
            model: self.options.embedding_model.name(),
@ -289,7 +293,7 @@ impl Embedder {
            .map_err(EmbedError::openai_unexpected)
            .map_err(Retry::retry_later)?;
-        log::trace!("response: {:?}", response.data);
+        tracing::trace!("response: {:?}", response.data);
        Ok(response
            .data