mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
feat: Add distinction on start of title
This commit is contained in:
parent
86cb01b243
commit
e1936ae3f7
@ -6,7 +6,7 @@ use std::path::PathBuf;
|
|||||||
use elapsed::measure_time;
|
use elapsed::measure_time;
|
||||||
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
||||||
use raptor::rank::{criterion, Config, RankedStream, Document};
|
use raptor::rank::{criterion, Config, RankedStream, Document};
|
||||||
use raptor::{automaton, Metadata, CommonWords};
|
use raptor::{automaton, DocumentId, Metadata, CommonWords};
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
pub struct CommandConsole {
|
pub struct CommandConsole {
|
||||||
@ -62,13 +62,6 @@ impl ConsoleSearch {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// "Sony" "PlayStation 4 500GB"
|
|
||||||
fn starts_with_playstation(doc: &Document, database: &DB) -> Vec<u8> {
|
|
||||||
let title_key = format!("{}-title", doc.id);
|
|
||||||
let title = database.get(title_key.as_bytes()).unwrap().unwrap();
|
|
||||||
title.get(0..4).map(|s| s.to_vec()).unwrap_or(Vec::new())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: &str) {
|
fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: &str) {
|
||||||
let mut automatons = Vec::new();
|
let mut automatons = Vec::new();
|
||||||
for query in query.split_whitespace().filter(|q| !common_words.contains(*q)) {
|
for query in query.split_whitespace().filter(|q| !common_words.contains(*q)) {
|
||||||
@ -76,16 +69,31 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query:
|
|||||||
automatons.push(lev);
|
automatons.push(lev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let distinct_by_title_first_four_chars = |id: &DocumentId| {
|
||||||
|
let title_key = format!("{}-title", id);
|
||||||
|
match database.get(title_key.as_bytes()) {
|
||||||
|
Ok(Some(value)) => {
|
||||||
|
value.to_utf8().map(|s| s.chars().take(4).collect::<String>())
|
||||||
|
},
|
||||||
|
Ok(None) => None,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("{:?}", err);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// "Sony" "PlayStation 4 500GB"
|
||||||
let config = Config {
|
let config = Config {
|
||||||
metadata: metadata,
|
metadata: metadata,
|
||||||
automatons: automatons,
|
automatons: automatons,
|
||||||
criteria: criterion::default(),
|
criteria: criterion::default(),
|
||||||
distinct: ((), 1),
|
distinct: (distinct_by_title_first_four_chars, 1),
|
||||||
};
|
};
|
||||||
let stream = RankedStream::new(config);
|
let stream = RankedStream::new(config);
|
||||||
|
|
||||||
// let documents = stream.retrieve_distinct_documents(|doc| starts_with_playstation(doc, database), 0..20);
|
let documents = stream.retrieve_distinct_documents(0..20);
|
||||||
let documents = stream.retrieve_documents(0..20);
|
// let documents = stream.retrieve_documents(0..20);
|
||||||
|
|
||||||
for document in documents {
|
for document in documents {
|
||||||
let id_key = format!("{}-id", document.id);
|
let id_key = format!("{}-id", document.id);
|
||||||
|
Loading…
Reference in New Issue
Block a user