mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge pull request #49 from Kerollmops/serialize-any-map
Serialize any map
This commit is contained in:
commit
76ef2cceeb
@ -9,7 +9,7 @@ bincode = "1.0"
|
|||||||
byteorder = "1.2"
|
byteorder = "1.2"
|
||||||
crossbeam = "0.6"
|
crossbeam = "0.6"
|
||||||
fst = "0.3"
|
fst = "0.3"
|
||||||
hashbrown = "0.1"
|
hashbrown = { version = "0.1", features = ["serde"] }
|
||||||
lazy_static = "1.1"
|
lazy_static = "1.1"
|
||||||
levenshtein_automata = { version = "0.1", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.1", features = ["fst_automaton"] }
|
||||||
linked-hash-map = { version = "0.5", features = ["serde_impl"] }
|
linked-hash-map = { version = "0.5", features = ["serde_impl"] }
|
||||||
|
@ -1,41 +1,35 @@
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::fs::File;
|
||||||
|
|
||||||
|
use hashbrown::HashMap;
|
||||||
use serde_derive::{Serialize, Deserialize};
|
use serde_derive::{Serialize, Deserialize};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED};
|
use meilidb::database::{Database, Schema, UpdateBuilder};
|
||||||
use meilidb::database::UpdateBuilder;
|
|
||||||
use meilidb::tokenizer::DefaultBuilder;
|
use meilidb::tokenizer::DefaultBuilder;
|
||||||
use meilidb::database::Database;
|
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
pub struct Opt {
|
pub struct Opt {
|
||||||
/// The destination where the database must be created
|
/// The destination where the database must be created.
|
||||||
#[structopt(parse(from_os_str))]
|
#[structopt(parse(from_os_str))]
|
||||||
pub database_path: PathBuf,
|
pub database_path: PathBuf,
|
||||||
|
|
||||||
/// The csv file to index.
|
/// The csv file to index.
|
||||||
#[structopt(parse(from_os_str))]
|
#[structopt(parse(from_os_str))]
|
||||||
pub csv_data_path: PathBuf,
|
pub csv_data_path: PathBuf,
|
||||||
|
|
||||||
|
/// The path to the schema.
|
||||||
|
#[structopt(long = "schema", parse(from_os_str))]
|
||||||
|
pub schema_path: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
struct Document<'a> {
|
struct Document<'a> (
|
||||||
id: &'a str,
|
#[serde(borrow)]
|
||||||
title: &'a str,
|
HashMap<Cow<'a, str>, Cow<'a, str>>
|
||||||
description: &'a str,
|
);
|
||||||
image: &'a str,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_schema() -> Schema {
|
|
||||||
let mut schema = SchemaBuilder::with_identifier("id");
|
|
||||||
schema.new_attribute("id", STORED);
|
|
||||||
schema.new_attribute("title", STORED | INDEXED);
|
|
||||||
schema.new_attribute("description", STORED | INDEXED);
|
|
||||||
schema.new_attribute("image", STORED);
|
|
||||||
schema.build()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<Database, Box<Error>> {
|
fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<Database, Box<Error>> {
|
||||||
let database = Database::create(database_path, schema.clone())?;
|
let database = Database::create(database_path, schema.clone())?;
|
||||||
@ -71,7 +65,10 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<D
|
|||||||
fn main() -> Result<(), Box<Error>> {
|
fn main() -> Result<(), Box<Error>> {
|
||||||
let opt = Opt::from_args();
|
let opt = Opt::from_args();
|
||||||
|
|
||||||
let schema = create_schema();
|
let schema = {
|
||||||
|
let file = File::open(&opt.schema_path)?;
|
||||||
|
Schema::from_toml(file)?
|
||||||
|
};
|
||||||
|
|
||||||
let (elapsed, result) = elapsed::measure_time(|| {
|
let (elapsed, result) = elapsed::measure_time(|| {
|
||||||
index(schema, &opt.database_path, &opt.csv_data_path)
|
index(schema, &opt.database_path, &opt.csv_data_path)
|
||||||
@ -82,6 +79,5 @@ fn main() -> Result<(), Box<Error>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
println!("database created in {} at: {:?}", elapsed, opt.database_path);
|
println!("database created in {} at: {:?}", elapsed, opt.database_path);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
|
use std::collections::btree_map::{BTreeMap, Entry};
|
||||||
|
use std::iter::FromIterator;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
|
use hashbrown::{HashMap, HashSet};
|
||||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||||
use serde_derive::{Serialize, Deserialize};
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
use meilidb::database::schema::SchemaAttr;
|
||||||
use meilidb::database::Database;
|
use meilidb::database::Database;
|
||||||
use meilidb::Match;
|
use meilidb::Match;
|
||||||
|
|
||||||
@ -15,18 +18,15 @@ pub struct Opt {
|
|||||||
#[structopt(parse(from_os_str))]
|
#[structopt(parse(from_os_str))]
|
||||||
pub database_path: PathBuf,
|
pub database_path: PathBuf,
|
||||||
|
|
||||||
|
/// Fields that must be displayed.
|
||||||
|
pub displayed_fields: Vec<String>,
|
||||||
|
|
||||||
/// The number of returned results
|
/// The number of returned results
|
||||||
#[structopt(short = "n", long = "number-results", default_value = "10")]
|
#[structopt(short = "n", long = "number-results", default_value = "10")]
|
||||||
pub number_results: usize,
|
pub number_results: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
type Document = HashMap<String, String>;
|
||||||
struct Document {
|
|
||||||
id: String,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
image: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
||||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||||
@ -45,20 +45,30 @@ fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_highlight_areas(text: &str, matches: &[Match], attribute: u16) -> Vec<usize> {
|
fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr) -> Vec<usize> {
|
||||||
let mut title_areas = Vec::new();
|
let mut byte_indexes = BTreeMap::new();
|
||||||
|
|
||||||
title_areas.push(0);
|
|
||||||
for match_ in matches {
|
for match_ in matches {
|
||||||
if match_.attribute.attribute() == attribute {
|
let match_attribute = match_.attribute.attribute();
|
||||||
|
if SchemaAttr::new(match_attribute) == attribute {
|
||||||
let word_area = match_.word_area;
|
let word_area = match_.word_area;
|
||||||
let byte_index = word_area.byte_index() as usize;
|
let byte_index = word_area.byte_index() as usize;
|
||||||
let length = word_area.length() as usize;
|
let length = word_area.length() as usize;
|
||||||
title_areas.push(byte_index);
|
match byte_indexes.entry(byte_index) {
|
||||||
title_areas.push(byte_index + length);
|
Entry::Vacant(entry) => { entry.insert(length); },
|
||||||
|
Entry::Occupied(mut entry) => if *entry.get() < length { entry.insert(length); },
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut title_areas = Vec::new();
|
||||||
|
title_areas.push(0);
|
||||||
|
for (byte_index, length) in byte_indexes {
|
||||||
|
title_areas.push(byte_index);
|
||||||
|
title_areas.push(byte_index + length);
|
||||||
|
}
|
||||||
title_areas.push(text.len());
|
title_areas.push(text.len());
|
||||||
|
title_areas.sort_unstable();
|
||||||
title_areas
|
title_areas
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,6 +90,7 @@ fn main() -> Result<(), Box<Error>> {
|
|||||||
let query = buffer.trim_end_matches('\n');
|
let query = buffer.trim_end_matches('\n');
|
||||||
|
|
||||||
let view = database.view();
|
let view = database.view();
|
||||||
|
let schema = view.schema();
|
||||||
|
|
||||||
let (elapsed, documents) = elapsed::measure_time(|| {
|
let (elapsed, documents) = elapsed::measure_time(|| {
|
||||||
let builder = view.query_builder().unwrap();
|
let builder = view.query_builder().unwrap();
|
||||||
@ -90,22 +101,39 @@ fn main() -> Result<(), Box<Error>> {
|
|||||||
for doc in documents {
|
for doc in documents {
|
||||||
match view.document_by_id::<Document>(doc.id) {
|
match view.document_by_id::<Document>(doc.id) {
|
||||||
Ok(document) => {
|
Ok(document) => {
|
||||||
|
for name in &opt.displayed_fields {
|
||||||
|
let attr = match schema.attribute(name) {
|
||||||
|
Some(attr) => attr,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let text = match document.get(name) {
|
||||||
|
Some(text) => text,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
print!("title: ");
|
print!("{}: ", name);
|
||||||
let title_areas = create_highlight_areas(&document.title, &doc.matches, 1);
|
let areas = create_highlight_areas(&text, &doc.matches, attr);
|
||||||
display_highlights(&document.title, &title_areas)?;
|
display_highlights(&text, &areas)?;
|
||||||
println!();
|
println!();
|
||||||
|
}
|
||||||
print!("description: ");
|
|
||||||
let description_areas = create_highlight_areas(&document.description, &doc.matches, 2);
|
|
||||||
display_highlights(&document.description, &description_areas)?;
|
|
||||||
println!();
|
|
||||||
},
|
},
|
||||||
Err(e) => eprintln!("{}", e),
|
Err(e) => eprintln!("{}", e),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut matching_attributes = HashSet::new();
|
||||||
|
for _match in doc.matches {
|
||||||
|
let attr = SchemaAttr::new(_match.attribute.attribute());
|
||||||
|
let name = schema.attribute_name(attr);
|
||||||
|
matching_attributes.insert(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
let matching_attributes = Vec::from_iter(matching_attributes);
|
||||||
|
println!("matching in: {:?}", matching_attributes);
|
||||||
|
|
||||||
|
println!();
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("Found {} results in {}", number_of_documents, elapsed);
|
println!("===== Found {} results in {} =====", number_of_documents, elapsed);
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
19
examples/schema-example.toml
Normal file
19
examples/schema-example.toml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# This schema has been generated ...
|
||||||
|
# The order in which the attributes are declared is important,
|
||||||
|
# it specify the attribute xxx...
|
||||||
|
|
||||||
|
identifier = "id"
|
||||||
|
|
||||||
|
[attributes.id]
|
||||||
|
stored = true
|
||||||
|
|
||||||
|
[attributes.title]
|
||||||
|
stored = true
|
||||||
|
indexed = true
|
||||||
|
|
||||||
|
[attributes.description]
|
||||||
|
stored = true
|
||||||
|
indexed = true
|
||||||
|
|
||||||
|
[attributes.image]
|
||||||
|
stored = true
|
@ -63,10 +63,12 @@ impl Attribute {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn attribute(&self) -> u16 {
|
pub fn attribute(&self) -> u16 {
|
||||||
(self.0 >> 22) as u16
|
(self.0 >> 22) as u16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn word_index(&self) -> u32 {
|
pub fn word_index(&self) -> u32 {
|
||||||
self.0 & 0b0000_0000_0011_1111_1111_1111_1111
|
self.0 & 0b0000_0000_0011_1111_1111_1111_1111
|
||||||
}
|
}
|
||||||
@ -129,10 +131,12 @@ impl WordArea {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn byte_index(&self) -> u32 {
|
pub fn byte_index(&self) -> u32 {
|
||||||
self.0 >> 10
|
self.0 >> 10
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn length(&self) -> u16 {
|
pub fn length(&self) -> u16 {
|
||||||
(self.0 & 0b0000_0000_0000_0000_0011_1111_1111) as u16
|
(self.0 & 0b0000_0000_0000_0000_0011_1111_1111) as u16
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user