mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
feat: Introduce a way to distinct documents
This commit is contained in:
parent
3acac1458f
commit
37c709c9a9
8 changed files with 167 additions and 84 deletions
|
@ -117,14 +117,14 @@ impl CsvIndexer {
|
|||
}
|
||||
}
|
||||
|
||||
fn insert_document_words<'a, I, A, B>(builder: &mut MetadataBuilder<A, B>, doc_index: u64, attr: u8, words: I)
|
||||
fn insert_document_words<'a, I, A, B>(builder: &mut MetadataBuilder<A, B>, doc_id: u64, attr: u8, words: I)
|
||||
where A: io::Write,
|
||||
B: io::Write,
|
||||
I: IntoIterator<Item=(usize, &'a str)>,
|
||||
{
|
||||
for (index, word) in words {
|
||||
let doc_index = DocIndex {
|
||||
document: doc_index,
|
||||
document_id: doc_id,
|
||||
attribute: attr,
|
||||
attribute_index: index as u32,
|
||||
};
|
||||
|
|
|
@ -122,14 +122,14 @@ impl JsonLinesIndexer {
|
|||
}
|
||||
}
|
||||
|
||||
fn insert_document_words<'a, I, A, B>(builder: &mut MetadataBuilder<A, B>, doc_index: u64, attr: u8, words: I)
|
||||
fn insert_document_words<'a, I, A, B>(builder: &mut MetadataBuilder<A, B>, doc_id: u64, attr: u8, words: I)
|
||||
where A: io::Write,
|
||||
B: io::Write,
|
||||
I: IntoIterator<Item=(usize, &'a str)>,
|
||||
{
|
||||
for (index, word) in words {
|
||||
let doc_index = DocIndex {
|
||||
document: doc_index,
|
||||
document_id: doc_id,
|
||||
attribute: attr,
|
||||
attribute_index: index as u32,
|
||||
};
|
||||
|
|
|
@ -5,8 +5,8 @@ use std::path::PathBuf;
|
|||
|
||||
use elapsed::measure_time;
|
||||
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
||||
use raptor::rank::{criterion, Config, RankedStream, Document};
|
||||
use raptor::{automaton, Metadata, CommonWords};
|
||||
use raptor::rank::{criterion, RankedStreamBuilder};
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
pub struct CommandConsole {
|
||||
|
@ -62,6 +62,13 @@ impl ConsoleSearch {
|
|||
}
|
||||
}
|
||||
|
||||
// "Sony" "PlayStation 4 500GB"
|
||||
fn starts_with_playstation(doc: &Document, database: &DB) -> Vec<u8> {
|
||||
let title_key = format!("{}-title", doc.id);
|
||||
let title = database.get(title_key.as_bytes()).unwrap().unwrap();
|
||||
title.get(0..4).map(|s| s.to_vec()).unwrap_or(Vec::new())
|
||||
}
|
||||
|
||||
fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: &str) {
|
||||
let mut automatons = Vec::new();
|
||||
for query in query.split_whitespace().filter(|q| !common_words.contains(*q)) {
|
||||
|
@ -69,10 +76,15 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query:
|
|||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut builder = RankedStreamBuilder::new(metadata, automatons);
|
||||
builder.criteria(criterion::default());
|
||||
let config = Config {
|
||||
metadata: metadata,
|
||||
automatons: automatons,
|
||||
criteria: criterion::default(),
|
||||
distinct: ((), 1),
|
||||
};
|
||||
let stream = RankedStream::new(config);
|
||||
|
||||
let mut stream = builder.build();
|
||||
// let documents = stream.retrieve_distinct_documents(|doc| starts_with_playstation(doc, database), 0..20);
|
||||
let documents = stream.retrieve_documents(0..20);
|
||||
|
||||
for document in documents {
|
||||
|
|
|
@ -7,11 +7,10 @@ use std::path::PathBuf;
|
|||
use std::error::Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
use raptor::rank::{criterion, RankedStreamBuilder};
|
||||
use raptor::rank::{criterion, Config, RankedStream};
|
||||
use raptor::{automaton, Metadata, CommonWords};
|
||||
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
||||
use warp::Filter;
|
||||
|
||||
use structopt::StructOpt;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
|
@ -99,10 +98,14 @@ where M: AsRef<Metadata>,
|
|||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut builder = RankedStreamBuilder::new(metadata.as_ref(), automatons);
|
||||
builder.criteria(criterion::default());
|
||||
let config = Config {
|
||||
metadata: metadata.as_ref(),
|
||||
automatons: automatons,
|
||||
criteria: criterion::default(),
|
||||
distinct: ((), 1),
|
||||
};
|
||||
let stream = RankedStream::new(config);
|
||||
|
||||
let mut stream = builder.build();
|
||||
let documents = stream.retrieve_documents(0..20);
|
||||
|
||||
let mut body = Vec::new();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue