mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Introduce a better higlighting system
This commit is contained in:
parent
00336c5154
commit
74fa9ee4df
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
@ -28,7 +29,8 @@ use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
|||||||
use crate::raw_document::RawDocument;
|
use crate::raw_document::RawDocument;
|
||||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||||
use crate::{store, Document, DocumentId, MResult};
|
use crate::{store, Document, DocumentId, MResult};
|
||||||
use crate::query_tree::{create_query_tree, traverse_query_tree, QueryResult, PostingsKey};
|
use crate::query_tree::{create_query_tree, traverse_query_tree};
|
||||||
|
use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey};
|
||||||
use crate::query_tree::Context as QTContext;
|
use crate::query_tree::Context as QTContext;
|
||||||
use crate::store::Postings;
|
use crate::store::Postings;
|
||||||
|
|
||||||
@ -88,6 +90,17 @@ where
|
|||||||
println!("{:?}", operation);
|
println!("{:?}", operation);
|
||||||
println!("{:?}", mapping);
|
println!("{:?}", mapping);
|
||||||
|
|
||||||
|
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
|
||||||
|
match operation {
|
||||||
|
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||||
|
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||||
|
Operation::Query(query) => { map.insert(query.id, &query.kind); },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut queries_kinds = HashMap::new();
|
||||||
|
recurs_operation(&mut queries_kinds, &operation);
|
||||||
|
|
||||||
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap();
|
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap();
|
||||||
println!("found {} documents", docids.len());
|
println!("found {} documents", docids.len());
|
||||||
println!("number of postings {:?}", queries.len());
|
println!("number of postings {:?}", queries.len());
|
||||||
@ -99,7 +112,6 @@ where
|
|||||||
mk_arena!(arena);
|
mk_arena!(arena);
|
||||||
|
|
||||||
for (PostingsKey{ query, input, distance, is_exact }, matches) in queries {
|
for (PostingsKey{ query, input, distance, is_exact }, matches) in queries {
|
||||||
|
|
||||||
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
|
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
|
||||||
let pllen = postings_list_view.len() as f32;
|
let pllen = postings_list_view.len() as f32;
|
||||||
|
|
||||||
@ -126,7 +138,6 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
for id in docids.as_slice() {
|
for id in docids.as_slice() {
|
||||||
let di = DocIndex { document_id: *id, ..DocIndex::default() };
|
let di = DocIndex { document_id: *id, ..DocIndex::default() };
|
||||||
@ -234,7 +245,7 @@ where
|
|||||||
debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed));
|
debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed));
|
||||||
|
|
||||||
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
|
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
|
||||||
let iter = iter.map(|rd| Document::from_raw(rd, &arena, searchable_attrs.as_ref()));
|
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref()));
|
||||||
let documents = iter.collect();
|
let documents = iter.collect();
|
||||||
|
|
||||||
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
||||||
|
@ -31,9 +31,13 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus
|
|||||||
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
||||||
pub use query_words_mapper::QueryWordsMapper;
|
pub use query_words_mapper::QueryWordsMapper;
|
||||||
|
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
use std::collections::HashMap;
|
||||||
use compact_arena::SmallArena;
|
use compact_arena::SmallArena;
|
||||||
|
|
||||||
use crate::bucket_sort::PostingsListView;
|
use crate::bucket_sort::PostingsListView;
|
||||||
use crate::levenshtein::prefix_damerau_levenshtein;
|
use crate::levenshtein::prefix_damerau_levenshtein;
|
||||||
|
use crate::query_tree::{QueryId, QueryKind};
|
||||||
use crate::reordered_attrs::ReorderedAttrs;
|
use crate::reordered_attrs::ReorderedAttrs;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
@ -47,6 +51,7 @@ pub struct Document {
|
|||||||
|
|
||||||
fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
||||||
raw_document: &RawDocument<'a, 'tag>,
|
raw_document: &RawDocument<'a, 'tag>,
|
||||||
|
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
searchable_attrs: Option<&ReorderedAttrs>,
|
searchable_attrs: Option<&ReorderedAttrs>,
|
||||||
) -> Vec<Highlight>
|
) -> Vec<Highlight>
|
||||||
@ -56,14 +61,20 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
|||||||
for bm in raw_document.bare_matches.iter() {
|
for bm in raw_document.bare_matches.iter() {
|
||||||
let postings_list = &arena[bm.postings_list];
|
let postings_list = &arena[bm.postings_list];
|
||||||
let input = postings_list.input();
|
let input = postings_list.input();
|
||||||
// let query = &automatons[bm.query_index as usize].query;
|
let kind = &queries_kinds.get(&bm.query_index);
|
||||||
|
|
||||||
for di in postings_list.iter() {
|
for di in postings_list.iter() {
|
||||||
// let covered_area = if query.len() > input.len() {
|
let covered_area = match kind {
|
||||||
// input.len()
|
Some(QueryKind::Exact(query)) | Some(QueryKind::Tolerant(query)) => {
|
||||||
// } else {
|
let len = if query.len() > input.len() {
|
||||||
// prefix_damerau_levenshtein(query.as_bytes(), input).1
|
input.len()
|
||||||
// };
|
} else {
|
||||||
|
prefix_damerau_levenshtein(query.as_bytes(), input).1
|
||||||
|
};
|
||||||
|
u16::try_from(len).unwrap_or(u16::max_value())
|
||||||
|
},
|
||||||
|
_ => di.char_length,
|
||||||
|
};
|
||||||
|
|
||||||
let attribute = searchable_attrs
|
let attribute = searchable_attrs
|
||||||
.and_then(|sa| sa.reverse(di.attribute))
|
.and_then(|sa| sa.reverse(di.attribute))
|
||||||
@ -72,7 +83,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
|||||||
let highlight = Highlight {
|
let highlight = Highlight {
|
||||||
attribute: attribute,
|
attribute: attribute,
|
||||||
char_index: di.char_index,
|
char_index: di.char_index,
|
||||||
char_length: di.char_length,
|
char_length: covered_area,
|
||||||
};
|
};
|
||||||
|
|
||||||
highlights.push(highlight);
|
highlights.push(highlight);
|
||||||
@ -96,12 +107,14 @@ impl Document {
|
|||||||
#[cfg(not(test))]
|
#[cfg(not(test))]
|
||||||
pub fn from_raw<'a, 'tag, 'txn>(
|
pub fn from_raw<'a, 'tag, 'txn>(
|
||||||
raw_document: RawDocument<'a, 'tag>,
|
raw_document: RawDocument<'a, 'tag>,
|
||||||
|
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
searchable_attrs: Option<&ReorderedAttrs>,
|
searchable_attrs: Option<&ReorderedAttrs>,
|
||||||
) -> Document
|
) -> Document
|
||||||
{
|
{
|
||||||
let highlights = highlights_from_raw_document(
|
let highlights = highlights_from_raw_document(
|
||||||
&raw_document,
|
&raw_document,
|
||||||
|
queries_kinds,
|
||||||
arena,
|
arena,
|
||||||
searchable_attrs,
|
searchable_attrs,
|
||||||
);
|
);
|
||||||
@ -112,6 +125,7 @@ impl Document {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn from_raw<'a, 'tag, 'txn>(
|
pub fn from_raw<'a, 'tag, 'txn>(
|
||||||
raw_document: RawDocument<'a, 'tag>,
|
raw_document: RawDocument<'a, 'tag>,
|
||||||
|
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
searchable_attrs: Option<&ReorderedAttrs>,
|
searchable_attrs: Option<&ReorderedAttrs>,
|
||||||
) -> Document
|
) -> Document
|
||||||
@ -120,6 +134,7 @@ impl Document {
|
|||||||
|
|
||||||
let highlights = highlights_from_raw_document(
|
let highlights = highlights_from_raw_document(
|
||||||
&raw_document,
|
&raw_document,
|
||||||
|
queries_kinds,
|
||||||
arena,
|
arena,
|
||||||
searchable_attrs,
|
searchable_attrs,
|
||||||
);
|
);
|
||||||
|
@ -285,7 +285,6 @@ pub struct PostingsKey<'o> {
|
|||||||
pub is_exact: bool,
|
pub is_exact: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Distance = u8;
|
|
||||||
pub type Postings<'o, 'txn> = HashMap<PostingsKey<'o>, Cow<'txn, Set<DocIndex>>>;
|
pub type Postings<'o, 'txn> = HashMap<PostingsKey<'o>, Cow<'txn, Set<DocIndex>>>;
|
||||||
pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>;
|
pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user