mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Extract the vectors from the documents
This commit is contained in:
parent
34349faeae
commit
7ac2f1489d
@ -47,6 +47,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
faceted_fields: HashSet<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vector_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
@ -71,6 +72,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
&faceted_fields,
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
vector_field_id,
|
||||
&stop_words,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
@ -281,6 +283,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vector_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
@ -309,6 +312,20 @@ fn send_and_extract_flattened_documents_data(
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(vector_field_id) = vector_field_id {
|
||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let result = extract_vector_points(documents_chunk_cloned, indexer, vector_field_id);
|
||||
let _ = match result {
|
||||
Ok(vector_points) => {
|
||||
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
|
||||
}
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||
rayon::join(
|
||||
|| {
|
||||
|
@ -304,6 +304,8 @@ where
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
// get the fid of the `_vector` field.
|
||||
let vector_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vector");
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||
@ -340,6 +342,7 @@ where
|
||||
faceted_fields,
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
vector_field_id,
|
||||
stop_words,
|
||||
max_positions_per_attributes,
|
||||
exact_attributes,
|
||||
|
@ -38,6 +38,7 @@ pub(crate) enum TypedChunk {
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
|
||||
GeoPoints(grenad::Reader<File>),
|
||||
VectorPoints(grenad::Reader<File>),
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
@ -221,6 +222,29 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
index.put_geo_rtree(wtxn, &rtree)?;
|
||||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
}
|
||||
TypedChunk::VectorPoints(vector_points) => {
|
||||
// let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||
// let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
|
||||
// let mut cursor = geo_points.into_cursor()?;
|
||||
// while let Some((key, value)) = cursor.move_on_next()? {
|
||||
// // convert the key back to a u32 (4 bytes)
|
||||
// let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||
|
||||
// // convert the latitude and longitude back to a f64 (8 bytes)
|
||||
// let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
||||
// let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||
// let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||
// let xyz_point = lat_lng_to_xyz(&point);
|
||||
|
||||
// rtree.insert(GeoPoint::new(xyz_point, (docid, point)));
|
||||
// geo_faceted_docids.insert(docid);
|
||||
// }
|
||||
// index.put_geo_rtree(wtxn, &rtree)?;
|
||||
// index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
|
||||
todo!("index vector points")
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(hash_pair) => {
|
||||
let mut buffer = Vec::new();
|
||||
for (key, value) in hash_pair {
|
||||
|
Loading…
Reference in New Issue
Block a user