Display the _semanticSimilarity even if the _vectors field is not displayed

This commit is contained in:
Kerollmops 2023-06-20 15:54:28 +02:00 committed by Clément Renault
parent 737aec1705
commit 7aa1275337
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
3 changed files with 53 additions and 20 deletions

View file

@ -286,6 +286,23 @@ pub fn normalize_facet(original: &str) -> String {
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
}
/// Represents either a vector or an array of multiple vectors.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(transparent)]
pub struct VectorOrArrayOfVectors {
#[serde(with = "either::serde_untagged")]
inner: either::Either<Vec<f32>, Vec<Vec<f32>>>,
}
impl VectorOrArrayOfVectors {
pub fn into_array_of_vectors(self) -> Vec<Vec<f32>> {
match self.inner {
either::Either::Left(vector) => vec![vector],
either::Either::Right(vectors) => vectors,
}
}
}
/// Normalize a vector by dividing the dimensions by the lenght of it.
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
let squared: f32 = vector.iter().map(|x| x * x).sum();

View file

@ -3,11 +3,10 @@ use std::fs::File;
use std::io;
use bytemuck::cast_slice;
use either::Either;
use serde_json::from_slice;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::{FieldId, InternalError, Result};
use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
/// Extracts the embedding vector contained in each document under the `_vectors` field.
///
@ -31,9 +30,11 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
// first we retrieve the _vectors field
if let Some(vectors) = obkv.get(vectors_fid) {
// extract the vectors
let vectors: Either<Vec<Vec<f32>>, Vec<f32>> =
from_slice(vectors).map_err(InternalError::SerdeJson).unwrap();
let vectors = vectors.map_right(|v| vec![v]).into_inner();
// TODO return a user error before unwrapping
let vectors = from_slice(vectors)
.map_err(InternalError::SerdeJson)
.map(VectorOrArrayOfVectors::into_array_of_vectors)
.unwrap();
for (i, vector) in vectors.into_iter().enumerate() {
match u16::try_from(i) {