mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-26 21:27:28 +01:00
Reduce the DocumentId size from 64 to 32bits
This commit is contained in:
parent
3bca31856d
commit
788e2202c9
@ -191,6 +191,6 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn docindex_mem_size() {
|
fn docindex_mem_size() {
|
||||||
assert_eq!(mem::size_of::<DocIndex>(), 16);
|
assert_eq!(mem::size_of::<DocIndex>(), 12);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -228,7 +228,7 @@ mod tests {
|
|||||||
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn doc_index(document_id: u64, word_index: u16) -> DocIndex {
|
const fn doc_index(document_id: u32, word_index: u16) -> DocIndex {
|
||||||
DocIndex {
|
DocIndex {
|
||||||
document_id: DocumentId(document_id),
|
document_id: DocumentId(document_id),
|
||||||
attribute: 0,
|
attribute: 0,
|
||||||
@ -238,7 +238,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn doc_char_index(document_id: u64, word_index: u16, char_index: u16) -> DocIndex {
|
const fn doc_char_index(document_id: u32, word_index: u16, char_index: u16) -> DocIndex {
|
||||||
DocIndex {
|
DocIndex {
|
||||||
document_id: DocumentId(document_id),
|
document_id: DocumentId(document_id),
|
||||||
attribute: 0,
|
attribute: 0,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use super::BEU64;
|
use super::BEU32;
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use heed::types::{ByteSlice, OwnedType};
|
use heed::types::{ByteSlice, OwnedType};
|
||||||
@ -7,7 +7,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct DocsWords {
|
pub struct DocsWords {
|
||||||
pub(crate) docs_words: heed::Database<OwnedType<BEU64>, ByteSlice>,
|
pub(crate) docs_words: heed::Database<OwnedType<BEU32>, ByteSlice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocsWords {
|
impl DocsWords {
|
||||||
@ -17,13 +17,13 @@ impl DocsWords {
|
|||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
words: &fst::Set,
|
words: &fst::Set,
|
||||||
) -> ZResult<()> {
|
) -> ZResult<()> {
|
||||||
let document_id = BEU64::new(document_id.0);
|
let document_id = BEU32::new(document_id.0);
|
||||||
let bytes = words.as_fst().as_bytes();
|
let bytes = words.as_fst().as_bytes();
|
||||||
self.docs_words.put(writer, &document_id, bytes)
|
self.docs_words.put(writer, &document_id, bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> {
|
pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> {
|
||||||
let document_id = BEU64::new(document_id.0);
|
let document_id = BEU32::new(document_id.0);
|
||||||
self.docs_words.delete(writer, &document_id)
|
self.docs_words.delete(writer, &document_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ impl DocsWords {
|
|||||||
reader: &heed::RoTxn<MainT>,
|
reader: &heed::RoTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
) -> ZResult<Option<fst::Set>> {
|
) -> ZResult<Option<fst::Set>> {
|
||||||
let document_id = BEU64::new(document_id.0);
|
let document_id = BEU32::new(document_id.0);
|
||||||
match self.docs_words.get(reader, &document_id)? {
|
match self.docs_words.get(reader, &document_id)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
|
@ -26,16 +26,16 @@ impl<'a> BytesDecode<'a> for DocumentsIds {
|
|||||||
|
|
||||||
pub struct DiscoverIds<'a> {
|
pub struct DiscoverIds<'a> {
|
||||||
ids_iter: std::slice::Iter<'a, DocumentId>,
|
ids_iter: std::slice::Iter<'a, DocumentId>,
|
||||||
left_id: Option<u64>,
|
left_id: Option<u32>,
|
||||||
right_id: Option<u64>,
|
right_id: Option<u32>,
|
||||||
available_range: std::ops::Range<u64>,
|
available_range: std::ops::Range<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DiscoverIds<'_> {
|
impl DiscoverIds<'_> {
|
||||||
pub fn new(ids: &Set<DocumentId>) -> DiscoverIds {
|
pub fn new(ids: &Set<DocumentId>) -> DiscoverIds {
|
||||||
let mut ids_iter = ids.iter();
|
let mut ids_iter = ids.iter();
|
||||||
let right_id = ids_iter.next().map(|id| id.0);
|
let right_id = ids_iter.next().map(|id| id.0);
|
||||||
let available_range = 0..right_id.unwrap_or(u64::max_value());
|
let available_range = 0..right_id.unwrap_or(u32::max_value());
|
||||||
DiscoverIds { ids_iter, left_id: None, right_id, available_range }
|
DiscoverIds { ids_iter, left_id: None, right_id, available_range }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -49,7 +49,7 @@ impl Iterator for DiscoverIds<'_> {
|
|||||||
// The available range gives us a new id, we return it.
|
// The available range gives us a new id, we return it.
|
||||||
Some(id) => return Some(DocumentId(id)),
|
Some(id) => return Some(DocumentId(id)),
|
||||||
// The available range is exhausted, we need to find the next one.
|
// The available range is exhausted, we need to find the next one.
|
||||||
None if self.available_range.end == u64::max_value() => return None,
|
None if self.available_range.end == u32::max_value() => return None,
|
||||||
None => loop {
|
None => loop {
|
||||||
self.left_id = self.right_id.take();
|
self.left_id = self.right_id.take();
|
||||||
self.right_id = self.ids_iter.next().map(|id| id.0);
|
self.right_id = self.ids_iter.next().map(|id| id.0);
|
||||||
@ -61,9 +61,9 @@ impl Iterator for DiscoverIds<'_> {
|
|||||||
break;
|
break;
|
||||||
},
|
},
|
||||||
// The last used id has been reached, we can use all ids
|
// The last used id has been reached, we can use all ids
|
||||||
// until u64 MAX
|
// until u32 MAX
|
||||||
(Some(l), None) => {
|
(Some(l), None) => {
|
||||||
self.available_range = l.saturating_add(1)..u64::max_value();
|
self.available_range = l.saturating_add(1)..u32::max_value();
|
||||||
break;
|
break;
|
||||||
},
|
},
|
||||||
_ => (),
|
_ => (),
|
||||||
|
@ -153,7 +153,7 @@ impl Main {
|
|||||||
|
|
||||||
pub fn user_to_internal_id(self, reader: &heed::RoTxn<MainT>, userid: &str) -> ZResult<Option<DocumentId>> {
|
pub fn user_to_internal_id(self, reader: &heed::RoTxn<MainT>, userid: &str) -> ZResult<Option<DocumentId>> {
|
||||||
let user_ids = self.user_ids(reader)?;
|
let user_ids = self.user_ids(reader)?;
|
||||||
Ok(user_ids.get(userid).map(DocumentId))
|
Ok(user_ids.get(userid).map(|id| DocumentId(id as u32)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
|
pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
|
||||||
|
@ -45,20 +45,21 @@ use crate::serde::Deserializer;
|
|||||||
use crate::settings::SettingsUpdate;
|
use crate::settings::SettingsUpdate;
|
||||||
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
|
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
|
||||||
|
|
||||||
|
type BEU32 = zerocopy::U32<byteorder::BigEndian>;
|
||||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||||
pub type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
pub type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct DocumentFieldIndexedKey {
|
pub struct DocumentFieldIndexedKey {
|
||||||
docid: BEU64,
|
docid: BEU32,
|
||||||
indexed_pos: BEU16,
|
indexed_pos: BEU16,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentFieldIndexedKey {
|
impl DocumentFieldIndexedKey {
|
||||||
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
|
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
|
||||||
DocumentFieldIndexedKey {
|
DocumentFieldIndexedKey {
|
||||||
docid: BEU64::new(docid.0),
|
docid: BEU32::new(docid.0),
|
||||||
indexed_pos: BEU16::new(indexed_pos.0),
|
indexed_pos: BEU16::new(indexed_pos.0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -67,14 +68,14 @@ impl DocumentFieldIndexedKey {
|
|||||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct DocumentFieldStoredKey {
|
pub struct DocumentFieldStoredKey {
|
||||||
docid: BEU64,
|
docid: BEU32,
|
||||||
field_id: BEU16,
|
field_id: BEU16,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentFieldStoredKey {
|
impl DocumentFieldStoredKey {
|
||||||
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
|
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
|
||||||
DocumentFieldStoredKey {
|
DocumentFieldStoredKey {
|
||||||
docid: BEU64::new(docid.0),
|
docid: BEU32::new(docid.0),
|
||||||
field_id: BEU16::new(field_id.0),
|
field_id: BEU16::new(field_id.0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -98,7 +99,7 @@ impl<'a> BytesEncode<'a> for PostingsCodec {
|
|||||||
|
|
||||||
let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size);
|
let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size);
|
||||||
|
|
||||||
let docids_len = item.docids.len();
|
let docids_len = item.docids.len() as u64;
|
||||||
buffer.extend_from_slice(&docids_len.to_be_bytes());
|
buffer.extend_from_slice(&docids_len.to_be_bytes());
|
||||||
buffer.extend_from_slice(item.docids.as_bytes());
|
buffer.extend_from_slice(item.docids.as_bytes());
|
||||||
buffer.extend_from_slice(item.matches.as_bytes());
|
buffer.extend_from_slice(item.matches.as_bytes());
|
||||||
|
@ -4,7 +4,7 @@ use heed::types::{OwnedType, CowSlice};
|
|||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use zerocopy::{AsBytes, FromBytes};
|
use zerocopy::{AsBytes, FromBytes};
|
||||||
|
|
||||||
use super::BEU64;
|
use super::{BEU64, BEU32};
|
||||||
use crate::{DocumentId, Highlight};
|
use crate::{DocumentId, Highlight};
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
|
|
||||||
@ -13,15 +13,15 @@ use crate::database::MainT;
|
|||||||
pub struct PrefixKey {
|
pub struct PrefixKey {
|
||||||
prefix: [u8; 4],
|
prefix: [u8; 4],
|
||||||
index: BEU64,
|
index: BEU64,
|
||||||
docid: BEU64,
|
docid: BEU32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PrefixKey {
|
impl PrefixKey {
|
||||||
pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey {
|
pub fn new(prefix: [u8; 4], index: u64, docid: u32) -> PrefixKey {
|
||||||
PrefixKey {
|
PrefixKey {
|
||||||
prefix,
|
prefix,
|
||||||
index: BEU64::new(index),
|
index: BEU64::new(index),
|
||||||
docid: BEU64::new(docid),
|
docid: BEU32::new(docid),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -54,7 +54,7 @@ impl PrefixDocumentsCache {
|
|||||||
prefix: [u8; 4],
|
prefix: [u8; 4],
|
||||||
) -> ZResult<PrefixDocumentsIter<'txn>> {
|
) -> ZResult<PrefixDocumentsIter<'txn>> {
|
||||||
let start = PrefixKey::new(prefix, 0, 0);
|
let start = PrefixKey::new(prefix, 0, 0);
|
||||||
let end = PrefixKey::new(prefix, u64::max_value(), u64::max_value());
|
let end = PrefixKey::new(prefix, u64::max_value(), u32::max_value());
|
||||||
let iter = self.prefix_documents_cache.range(reader, &(start..=end))?;
|
let iter = self.prefix_documents_cache.range(reader, &(start..=end))?;
|
||||||
Ok(PrefixDocumentsIter { iter })
|
Ok(PrefixDocumentsIter { iter })
|
||||||
}
|
}
|
||||||
|
@ -242,7 +242,7 @@ pub fn apply_addition<'a, 'b>(
|
|||||||
|
|
||||||
index.main.put_schema(writer, &schema)?;
|
index.main.put_schema(writer, &schema)?;
|
||||||
|
|
||||||
let new_user_ids = fst::Map::from_iter(new_user_ids)?;
|
let new_user_ids = fst::Map::from_iter(new_user_ids.iter().map(|(u, i)| (u, *i as u64)))?;
|
||||||
let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids);
|
let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids);
|
||||||
index.main.merge_user_ids(writer, &new_user_ids)?;
|
index.main.merge_user_ids(writer, &new_user_ids)?;
|
||||||
index.main.merge_internal_ids(writer, &new_internal_ids)?;
|
index.main.merge_internal_ids(writer, &new_internal_ids)?;
|
||||||
|
@ -80,7 +80,7 @@ pub fn apply_documents_deletion(
|
|||||||
let user_ids = index.main.user_ids(writer)?;
|
let user_ids = index.main.user_ids(writer)?;
|
||||||
for userid in new_user_ids.as_slice() {
|
for userid in new_user_ids.as_slice() {
|
||||||
if let Some(id) = user_ids.get(userid) {
|
if let Some(id) = user_ids.get(userid) {
|
||||||
internal_ids.push(DocumentId(id));
|
internal_ids.push(DocumentId(id as u32));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ pub fn discover_document_id(
|
|||||||
{
|
{
|
||||||
if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||||
match user_ids.get(userid) {
|
match user_ids.get(userid) {
|
||||||
Some(internal_id) => Ok(DocumentId(internal_id)),
|
Some(id) => Ok(DocumentId(id as u32)),
|
||||||
None => {
|
None => {
|
||||||
let internal_id = available_ids.next().expect("no more ids available");
|
let internal_id = available_ids.next().expect("no more ids available");
|
||||||
Ok(internal_id)
|
Ok(internal_id)
|
||||||
|
@ -22,7 +22,7 @@ pub enum ResponseError {
|
|||||||
NotFound(String),
|
NotFound(String),
|
||||||
OpenIndex(String),
|
OpenIndex(String),
|
||||||
FilterParsing(String),
|
FilterParsing(String),
|
||||||
RetrieveDocument(u64, String),
|
RetrieveDocument(u32, String),
|
||||||
SearchDocuments(String),
|
SearchDocuments(String),
|
||||||
PayloadTooLarge,
|
PayloadTooLarge,
|
||||||
UnsupportedMediaType,
|
UnsupportedMediaType,
|
||||||
@ -116,7 +116,7 @@ impl ResponseError {
|
|||||||
ResponseError::Maintenance
|
ResponseError::Maintenance
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn retrieve_document(doc_id: u64, err: impl fmt::Display) -> ResponseError {
|
pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> ResponseError {
|
||||||
ResponseError::RetrieveDocument(doc_id, err.to_string())
|
ResponseError::RetrieveDocument(doc_id, err.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct DocumentId(pub u64);
|
pub struct DocumentId(pub u32);
|
||||||
|
|
||||||
/// This structure represent the position of a word
|
/// This structure represent the position of a word
|
||||||
/// in a document and its attributes.
|
/// in a document and its attributes.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user