From e1936ae3f7999ba7a5137dcaff437a8afc0e7367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 18 Oct 2018 17:08:51 +0200 Subject: [PATCH] feat: Add distinction on start of title --- examples/serve-console.rs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/examples/serve-console.rs b/examples/serve-console.rs index 97919a3ee..36e0b3cda 100644 --- a/examples/serve-console.rs +++ b/examples/serve-console.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use elapsed::measure_time; use rocksdb::{DB, DBOptions, IngestExternalFileOptions}; use raptor::rank::{criterion, Config, RankedStream, Document}; -use raptor::{automaton, Metadata, CommonWords}; +use raptor::{automaton, DocumentId, Metadata, CommonWords}; #[derive(Debug, StructOpt)] pub struct CommandConsole { @@ -62,13 +62,6 @@ impl ConsoleSearch { } } -// "Sony" "PlayStation 4 500GB" -fn starts_with_playstation(doc: &Document, database: &DB) -> Vec { - let title_key = format!("{}-title", doc.id); - let title = database.get(title_key.as_bytes()).unwrap().unwrap(); - title.get(0..4).map(|s| s.to_vec()).unwrap_or(Vec::new()) -} - fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: &str) { let mut automatons = Vec::new(); for query in query.split_whitespace().filter(|q| !common_words.contains(*q)) { @@ -76,16 +69,31 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: automatons.push(lev); } + let distinct_by_title_first_four_chars = |id: &DocumentId| { + let title_key = format!("{}-title", id); + match database.get(title_key.as_bytes()) { + Ok(Some(value)) => { + value.to_utf8().map(|s| s.chars().take(4).collect::()) + }, + Ok(None) => None, + Err(err) => { + eprintln!("{:?}", err); + None + } + } + }; + + // "Sony" "PlayStation 4 500GB" let config = Config { metadata: metadata, automatons: automatons, criteria: criterion::default(), - distinct: ((), 1), + distinct: (distinct_by_title_first_four_chars, 1), }; let stream = RankedStream::new(config); - // let documents = stream.retrieve_distinct_documents(|doc| starts_with_playstation(doc, database), 0..20); - let documents = stream.retrieve_documents(0..20); + let documents = stream.retrieve_distinct_documents(0..20); + // let documents = stream.retrieve_documents(0..20); for document in documents { let id_key = format!("{}-id", document.id);