mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Remove the useless euclidean distance implementation
This commit is contained in:
parent
29d8268c94
commit
ebad1f396f
@ -7,10 +7,10 @@ pub struct DotProduct;
|
|||||||
impl Metric<Vec<f32>> for DotProduct {
|
impl Metric<Vec<f32>> for DotProduct {
|
||||||
type Unit = u32;
|
type Unit = u32;
|
||||||
|
|
||||||
// TODO explain me this function, I don't understand why f32.to_bits is ordered.
|
|
||||||
// I tried to do this and it wasn't OK <https://stackoverflow.com/a/43305015/1941280>
|
|
||||||
//
|
|
||||||
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
||||||
|
//
|
||||||
|
// Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0:
|
||||||
|
// <https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=6c59e31a3cc5036b32edf51e8937b56e>
|
||||||
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
||||||
let dist = 1.0 - dot_product_similarity(a, b);
|
let dist = 1.0 - dot_product_similarity(a, b);
|
||||||
debug_assert!(!dist.is_nan());
|
debug_assert!(!dist.is_nan());
|
||||||
@ -23,22 +23,3 @@ impl Metric<Vec<f32>> for DotProduct {
|
|||||||
pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
|
pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||||
a.iter().zip(b).map(|(a, b)| a * b).sum()
|
a.iter().zip(b).map(|(a, b)| a * b).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
|
||||||
pub struct Euclidean;
|
|
||||||
|
|
||||||
impl Metric<Vec<f32>> for Euclidean {
|
|
||||||
type Unit = u32;
|
|
||||||
|
|
||||||
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
|
||||||
let dist = euclidean_squared_distance(a, b).sqrt();
|
|
||||||
debug_assert!(!dist.is_nan());
|
|
||||||
dist.to_bits()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the squared euclidean distance between both vectors that will
|
|
||||||
/// between 0.0 and +inf. The smaller the nearer the vectors are.
|
|
||||||
pub fn euclidean_squared_distance(a: &[f32], b: &[f32]) -> f32 {
|
|
||||||
a.iter().zip(b).map(|(a, b)| (a - b).powi(2)).sum()
|
|
||||||
}
|
|
||||||
|
@ -32,7 +32,7 @@ use std::convert::{TryFrom, TryInto};
|
|||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
|
||||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||||
pub use distance::{dot_product_similarity, euclidean_squared_distance};
|
pub use distance::dot_product_similarity;
|
||||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||||
use fxhash::{FxHasher32, FxHasher64};
|
use fxhash::{FxHasher32, FxHasher64};
|
||||||
pub use grenad::CompressionType;
|
pub use grenad::CompressionType;
|
||||||
@ -304,7 +304,7 @@ impl VectorOrArrayOfVectors {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Normalize a vector by dividing the dimensions by the lenght of it.
|
/// Normalize a vector by dividing the dimensions by the length of it.
|
||||||
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
|
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
|
||||||
let squared: f32 = vector.iter().map(|x| x * x).sum();
|
let squared: f32 = vector.iter().map(|x| x * x).sum();
|
||||||
let length = squared.sqrt();
|
let length = squared.sqrt();
|
||||||
|
Loading…
Reference in New Issue
Block a user