Replace logging timer by spans

This commit is contained in:
Louis Dureuil 2024-03-05 11:05:20 +01:00
parent f4a6261dea
commit 25f64ce7df
No known key found for this signature in database
5 changed files with 15 additions and 30 deletions

23
Cargo.lock generated
View File

@ -3023,28 +3023,6 @@ version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "logging_timer"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e96f261d684b7089aa576bb74e823241dccd994b27d30fabf1dcb3af284fe9"
dependencies = [
"log",
"logging_timer_proc_macros",
]
[[package]]
name = "logging_timer_proc_macros"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81"
dependencies = [
"log",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "lz4_flex"
version = "0.10.0"
@ -3316,7 +3294,6 @@ dependencies = [
"json-depth-checker",
"levenshtein_automata",
"liquid",
"logging_timer",
"maplit",
"md5",
"meili-snap",

View File

@ -70,13 +70,13 @@ itertools = "0.11.0"
# profiling
puffin = "0.16.0"
# logging
logging_timer = "1.1.0"
csv = "1.3.0"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = [
"onig",
] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
"online",
] }

View File

@ -15,7 +15,7 @@ pub struct BucketSortOutput {
// TODO: would probably be good to regroup some of these inside of a struct?
#[allow(clippy::too_many_arguments)]
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "search::bucket_sort")]
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,

View File

@ -191,7 +191,7 @@ fn resolve_maximally_reduced_query_graph(
Ok(docids)
}
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
fn resolve_universe(
ctx: &mut SearchContext,
initial_universe: &RoaringBitmap,
@ -557,7 +557,7 @@ pub fn execute_vector_search(
}
#[allow(clippy::too_many_arguments)]
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
pub fn execute_search(
ctx: &mut SearchContext,
query: Option<&str>,
@ -577,6 +577,9 @@ pub fn execute_search(
let mut located_query_terms = None;
let query_terms = if let Some(query) = query {
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
let entered = span.enter();
// We make sure that the analyzer is aware of the stop words
// this ensures that the query builder is able to properly remove them.
let mut tokbuilder = TokenizerBuilder::new();
@ -605,7 +608,12 @@ pub fn execute_search(
}
let tokenizer = tokbuilder.build();
drop(entered);
let span = tracing::trace_span!(target: "search::tokens", "tokenize");
let entered = span.enter();
let tokens = tokenizer.tokenize(query);
drop(entered);
let query_terms = located_query_terms_from_tokens(ctx, tokens, words_limit)?;
if query_terms.is_empty() {

View File

@ -9,7 +9,7 @@ use crate::search::new::query_term::{Lazy, Phrase, QueryTerm};
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
/// Convert the tokenised search query into a list of located query terms.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
pub fn located_query_terms_from_tokens(
ctx: &mut SearchContext,
query: NormalizedTokenIter,