diff --git a/CLAUDE.md b/CLAUDE.md index 1580fb5..3d62d2b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,18 +9,19 @@ Rust bindings for libmdbx (MDBX database). Crate name: `signet-libmdbx`. ## Crate Mandates -- You MUST NOT expose raw pointers to MDBX types outside of unsafe modules. +- You MUST NOT expose raw pointers to MDBX types via getters. - You MUST maintain zero-copy semantics for read operations in all new interfaces. - You MUST read and respect `SAFETY` comments throughout the codebase. - You MUST NOT introduce new dependencies without approval. +- All FFI calls MUST be made in the `ops` module. +- All access of the ops module must be done within a `with_txn_ptr` closure. ## MDBX Synchronization Model When making changes to this codebase you MUST remember and conform to the MDBX synchronization model for transactions and cursors. Access to raw pointers MUST -be mediated via the `TxAccess` trait. The table below summarizes the -transaction types and their access models. +be mediated via the `TxAccess` trait. ## Key Types diff --git a/Cargo.toml b/Cargo.toml index 2a03e71..6a4286b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "signet-libmdbx" description = "Idiomatic and safe MDBX wrapper" -version = "0.7.0" +version = "0.8.0" edition = "2024" rust-version = "1.92" license = "MIT OR Apache-2.0" diff --git a/benches/iter.rs b/benches/iter.rs index 0d1203c..a6ef52e 100644 --- a/benches/iter.rs +++ b/benches/iter.rs @@ -3,7 +3,7 @@ mod utils; use crate::utils::{create_ro_sync, create_ro_unsync}; use criterion::{Criterion, criterion_group, criterion_main}; -use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use signet_libmdbx::{DatabaseFlags, DupItem, Environment, WriteFlags}; use std::hint::black_box; use tempfile::{TempDir, tempdir}; @@ -42,8 +42,11 @@ fn bench_iter_dupfixed(c: &mut Criterion) { b.iter(|| { let mut cursor = txn.cursor(db).unwrap(); let mut count = 0u32; - for result in cursor.iter_dupfixed_start::<[u8; 3], VALUE_SIZE>().unwrap() { - let (_key, value) = result.unwrap(); + for result in cursor.iter_dupfixed_start::<[u8; 3], [u8; VALUE_SIZE]>().unwrap() { + let item = result.unwrap(); + let value = match item { + DupItem::NewKey(_, v) | DupItem::SameKey(v) => v, + }; black_box(value); count += 1; } @@ -82,8 +85,11 @@ fn bench_iter_dupfixed_sync(c: &mut Criterion) { b.iter(|| { let mut cursor = txn.cursor(db).unwrap(); let mut count = 0u32; - for result in cursor.iter_dupfixed_start::<[u8; 3], VALUE_SIZE>().unwrap() { - let (_key, value) = result.unwrap(); + for result in cursor.iter_dupfixed_start::<[u8; 3], [u8; VALUE_SIZE]>().unwrap() { + let item = result.unwrap(); + let value = match item { + DupItem::NewKey(_, v) | DupItem::SameKey(v) => v, + }; black_box(value); count += 1; } diff --git a/src/lib.rs b/src/lib.rs index 9e43782..6500322 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,6 +86,26 @@ //! [`Tx::open_db`]: crate::tx::Tx::open_db //! [`Tx::create_db`]: crate::tx::Tx::create_db //! +//! # Cursor Iterators +//! +//! Cursors provide several iterator types for traversing databases. The +//! iterator to use depends on your database flags and access pattern. +//! +//! | Iterator | Cursor Methods | Yields | Description | +//! |----------|----------------|--------|-------------| +//! | [`Iter`] | `iter_start`, `iter_from` | `(Key, Value)` | Forward iteration over all key-value pairs. | +//! | [`IterDup`] | `iter_dup_start`, `iter_dup_from` | [`DupItem`] | Flat iteration over DUPSORT tables. Yields `NewKey` for first value of each key, `SameKey` for subsequent. | +//! | [`IterDupOfKey`] | `iter_dup_of` | `Value` | Single-key iteration over DUPSORT duplicate values. | +//! | [`IterDupFixed`] | `iter_dupfixed_start`, `iter_dupfixed_from` | [`DupItem`] | Flat iteration over DUPFIXED tables using page-based access. | +//! | [`IterDupFixedOfKey`] | `iter_dupfixed_of` | `Value` | Single-key iteration over DUPFIXED values. Exact `size_hint()`. | +//! +//! [`Iter`]: crate::tx::iter::Iter +//! [`IterDup`]: crate::tx::iter::IterDup +//! [`IterDupOfKey`]: crate::tx::iter::IterDupOfKey +//! [`IterDupFixed`]: crate::tx::iter::IterDupFixed +//! [`IterDupFixedOfKey`]: crate::tx::iter::IterDupFixedOfKey +//! [`DupItem`]: crate::tx::iter::DupItem +//! //! # Custom Zero-copy Deserialization with [`TableObject`] //! //! Implement [`TableObject`] to decode custom types directly from the @@ -161,6 +181,7 @@ pub use sys::{Environment, EnvironmentBuilder, Geometry, Info, Stat}; pub mod tx; pub use tx::aliases::{TxSync, TxUnsync}; +pub use tx::iter::DupItem; pub use tx::{CommitLatency, Cursor, Database, Ro, RoSync, Rw, RwSync, TransactionKind}; #[cfg(test)] diff --git a/src/tx/aliases.rs b/src/tx/aliases.rs index a374bbe..e4f1840 100644 --- a/src/tx/aliases.rs +++ b/src/tx/aliases.rs @@ -6,7 +6,7 @@ use crate::{ PtrSync, PtrUnsync, cursor::Cursor, r#impl::Tx, - iter::{Iter, IterDupFixed, IterDupFixedOfKey}, + iter::{Iter, IterDup, IterDupFixed, IterDupFixedOfKey, IterDupOfKey}, }, }; use std::{borrow::Cow, sync::Arc}; @@ -65,17 +65,41 @@ pub type RwCursorUnsync<'tx> = Cursor<'tx, Rw>; pub type IterKeyVals<'tx, 'cur, K, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = Iter<'tx, 'cur, K, Key, Value, { ffi::MDBX_NEXT }>; -/// An iterator over the key/value pairs in an MDBX `DUPSORT` with duplicate -/// keys, yielding the first value for each key. -/// -/// See the [`Iter`] documentation for more details. -pub type IterDupKeys<'tx, 'cur, K, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = - Iter<'tx, 'cur, K, Key, Value, { ffi::MDBX_NEXT_NODUP }>; +// --- DUPSORT iterator aliases --- -/// An iterator over the key/value pairs in an MDBX `DUPSORT`, yielding each -/// duplicate value for a specific key. -pub type IterDupVals<'tx, 'cur, K, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = - Iter<'tx, 'cur, K, Key, Value, { ffi::MDBX_NEXT_DUP }>; +/// A flat DUPSORT iterator for a synchronized read-only transaction. +pub type RoDupIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDup<'tx, 'cur, RoSync, Key, Value>; + +/// A flat DUPSORT iterator for a synchronized read-write transaction. +pub type RwDupIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDup<'tx, 'cur, RwSync, Key, Value>; + +/// A flat DUPSORT iterator for an unsynchronized read-only transaction. +pub type RoDupIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDup<'tx, 'cur, Ro, Key, Value>; + +/// A flat DUPSORT iterator for an unsynchronized read-write transaction. +pub type RwDupIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDup<'tx, 'cur, Rw, Key, Value>; + +/// A single-key DUPSORT iterator for a synchronized read-only transaction. +pub type RoDupIterOfKeySync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupOfKey<'tx, 'cur, RoSync, Value>; + +/// A single-key DUPSORT iterator for a synchronized read-write transaction. +pub type RwDupIterOfKeySync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupOfKey<'tx, 'cur, RwSync, Value>; + +/// A single-key DUPSORT iterator for an unsynchronized read-only transaction. +pub type RoDupIterOfKeyUnsync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupOfKey<'tx, 'cur, Ro, Value>; + +/// A single-key DUPSORT iterator for an unsynchronized read-write transaction. +pub type RwDupIterOfKeyUnsync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupOfKey<'tx, 'cur, Rw, Value>; + +// --- Transaction-level iterator aliases --- /// A key-value iterator for a synchronized read-only transaction. pub type RoIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = @@ -94,33 +118,33 @@ pub type RwIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = IterKeyVals<'tx, 'cur, Rw, Key, Value>; /// A flattening DUPFIXED iterator for a synchronized read-only transaction. -pub type RoDupFixedIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, const VALUE_SIZE: usize = 0> = - IterDupFixed<'tx, 'cur, RoSync, Key, VALUE_SIZE>; +pub type RoDupFixedIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDupFixed<'tx, 'cur, RoSync, Key, Value>; /// A flattening DUPFIXED iterator for a synchronized read-write transaction. -pub type RwDupFixedIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, const VALUE_SIZE: usize = 0> = - IterDupFixed<'tx, 'cur, RwSync, Key, VALUE_SIZE>; +pub type RwDupFixedIterSync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDupFixed<'tx, 'cur, RwSync, Key, Value>; /// A flattening DUPFIXED iterator for an unsynchronized read-only transaction. -pub type RoDupFixedIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, const VALUE_SIZE: usize = 0> = - IterDupFixed<'tx, 'cur, Ro, Key, VALUE_SIZE>; +pub type RoDupFixedIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDupFixed<'tx, 'cur, Ro, Key, Value>; /// A flattening DUPFIXED iterator for an unsynchronized read-write transaction. -pub type RwDupFixedIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, const VALUE_SIZE: usize = 0> = - IterDupFixed<'tx, 'cur, Rw, Key, VALUE_SIZE>; +pub type RwDupFixedIterUnsync<'tx, 'cur, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> = + IterDupFixed<'tx, 'cur, Rw, Key, Value>; /// A single-key DUPFIXED iterator for a synchronized read-only transaction. -pub type RoDupFixedIterOfKeySync<'tx, 'cur, const VALUE_SIZE: usize = 0> = - IterDupFixedOfKey<'tx, 'cur, RoSync, VALUE_SIZE>; +pub type RoDupFixedIterOfKeySync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupFixedOfKey<'tx, 'cur, RoSync, Value>; /// A single-key DUPFIXED iterator for a synchronized read-write transaction. -pub type RwDupFixedIterOfKeySync<'tx, 'cur, const VALUE_SIZE: usize = 0> = - IterDupFixedOfKey<'tx, 'cur, RwSync, VALUE_SIZE>; +pub type RwDupFixedIterOfKeySync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupFixedOfKey<'tx, 'cur, RwSync, Value>; /// A single-key DUPFIXED iterator for an unsynchronized read-only transaction. -pub type RoDupFixedIterOfKeyUnsync<'tx, 'cur, const VALUE_SIZE: usize = 0> = - IterDupFixedOfKey<'tx, 'cur, Ro, VALUE_SIZE>; +pub type RoDupFixedIterOfKeyUnsync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupFixedOfKey<'tx, 'cur, Ro, Value>; /// A single-key DUPFIXED iterator for an unsynchronized read-write transaction. -pub type RwDupFixedIterOfKeyUnsync<'tx, 'cur, const VALUE_SIZE: usize = 0> = - IterDupFixedOfKey<'tx, 'cur, Rw, VALUE_SIZE>; +pub type RwDupFixedIterOfKeyUnsync<'tx, 'cur, Value = Cow<'tx, [u8]>> = + IterDupFixedOfKey<'tx, 'cur, Rw, Value>; diff --git a/src/tx/cursor.rs b/src/tx/cursor.rs index 1fa315e..f50633b 100644 --- a/src/tx/cursor.rs +++ b/src/tx/cursor.rs @@ -1,11 +1,12 @@ use crate::{ - Database, ReadError, ReadResult, TableObject, TransactionKind, codec_try_optional, + Database, ObjectLength, ReadError, ReadResult, TableObject, TableObjectOwned, TransactionKind, + codec_try_optional, error::{MdbxResult, mdbx_result}, flags::*, tx::{ TxPtrAccess, - aliases::{IterDupVals, IterKeyVals}, - iter::{Iter, IterDup, IterDupFixed, IterDupFixedOfKey}, + aliases::IterKeyVals, + iter::{Iter, IterDup, IterDupFixed, IterDupFixedOfKey, IterDupOfKey}, kind::WriteMarker, }, }; @@ -50,18 +51,6 @@ where Ok(Self { access, cursor, db, _kind: PhantomData }) } - /// Creates a cursor from a raw MDBX cursor pointer. - /// - /// This function must only be used when you are certain that the provided - /// cursor pointer is valid and associated with the given access type. - pub(crate) const fn new_raw( - access: &'tx K::Access, - cursor: *mut ffi::MDBX_cursor, - db: Database, - ) -> Self { - Self { access, cursor, db, _kind: PhantomData } - } - /// Helper function for `Clone`. This should only be invoked within /// a `with_txn_ptr` call to ensure safety. fn new_at_position(other: &Self) -> MdbxResult { @@ -110,6 +99,17 @@ where == ffi::MDBX_RESULT_TRUE } + /// Returns the count of duplicate values for the current key. + /// + /// For databases without `DUP_SORT`, this always returns 1. + /// The cursor must be positioned at a valid key. + pub fn dup_count(&self) -> MdbxResult { + self.access.with_txn_ptr(|_| { + // SAFETY: cursor is valid within with_txn_ptr block + unsafe { crate::tx::ops::cursor_dup_count(self.cursor) } + }) + } + /// Retrieves a key/data pair from the cursor. Depending on the cursor op, /// the current key may be returned. fn get( @@ -448,11 +448,11 @@ where if self.is_eof() { // Reposition to first item match self.first::() { - Ok(Some(first)) => return IterKeyVals::from_ref_with(self, first), - Ok(None) | Err(_) => return IterKeyVals::end_from_ref(self), + Ok(Some(first)) => return IterKeyVals::new_with(self, first), + Ok(None) | Err(_) => return IterKeyVals::new_end(self), } } - IterKeyVals::from_ref(self) + IterKeyVals::new(self) } /// Returns an iterator over database items as slices. @@ -464,7 +464,7 @@ where where 'tx: 'cur, { - IterKeyVals::from_ref(self) + IterKeyVals::new(self) } /// Iterate over database items starting from the beginning of the database. @@ -481,10 +481,10 @@ where Value: TableObject<'tx>, { let Some(first) = self.first()? else { - return Ok(Iter::end_from_ref(self)); + return Ok(Iter::new_end(self)); }; - Ok(Iter::from_ref_with(self, first)) + Ok(Iter::new_with(self, first)) } /// Iterate over database items starting from the given key. @@ -502,10 +502,10 @@ where Value: TableObject<'tx>, { let Some(first) = self.set_range::(key)? else { - return Ok(Iter::end_from_ref(self)); + return Ok(Iter::new_end(self)); }; - Ok(Iter::from_ref_with(self, first)) + Ok(Iter::new_with(self, first)) } /// Iterate over duplicate database items. @@ -527,11 +527,11 @@ where { if self.is_eof() { match self.first::() { - Ok(Some(first)) => return IterDup::from_ref_with(self, first), - Ok(None) | Err(_) => return IterDup::end_from_ref(self), + Ok(Some(first)) => return IterDup::new_with(self, first), + Ok(None) | Err(_) => return IterDup::new_end(self), } } - IterDup::::from_ref(self) + IterDup::::new(self) } /// Iterate over duplicate database items starting from the beginning of the @@ -545,10 +545,10 @@ where Value: TableObject<'tx>, { let Some(first) = self.first()? else { - return Ok(IterDup::end_from_ref(self)); + return Ok(IterDup::new_end(self)); }; - Ok(IterDup::from_ref_with(self, first)) + Ok(IterDup::new_with(self, first)) } /// Iterate over duplicate items in the database starting from the given @@ -563,28 +563,30 @@ where Value: TableObject<'tx>, { let Some(first) = self.set_range(key)? else { - return Ok(IterDup::::end_from_ref(self)); + return Ok(IterDup::::new_end(self)); }; - Ok(IterDup::from_ref_with(self, first)) + Ok(IterDup::new_with(self, first)) } /// Iterate over the duplicates of the item in the database with the given /// key. - pub fn iter_dup_of<'cur, Key, Value>( + /// + /// This iterator yields just the values for the specified key. When all + /// values are exhausted, iteration stops. + pub fn iter_dup_of<'cur, Value>( &'cur mut self, key: &[u8], - ) -> ReadResult> + ) -> ReadResult> where 'tx: 'cur, - Key: TableObject<'tx> + PartialEq, Value: TableObject<'tx>, { - let Some(first) = self.set_key(key.as_ref())? else { - return Ok(IterDupVals::end_from_ref(self)); + let Some(value) = self.set::(key)? else { + return Ok(IterDupOfKey::new_end(self)); }; - Ok(IterDupVals::from_ref_with(self, first)) + Ok(IterDupOfKey::new_with(self, value)) } /// [`DatabaseFlags::DUP_FIXED`]-only: Iterate over all fixed-size duplicate @@ -593,62 +595,60 @@ where /// This iterator efficiently fetches pages of fixed-size values and yields /// them individually, providing a flattened view of the DUPFIXED table. /// - /// The `VALUE_SIZE` const generic must match the fixed size of values in - /// the database. + /// The value size is determined at runtime from the first value in the + /// database. The `Value` type parameter must implement [`TableObjectOwned`] + /// for decoding values. /// /// Returns [`crate::MdbxError::RequiresDupFixed`] if the database does not have the /// [`DatabaseFlags::DUP_FIXED`] flag set. /// - /// # Correctness - /// - /// The `VALUE_SIZE` const generic must exactly match the fixed value size - /// in the database. See [`IterDupFixed`] for details on mismatch behavior. - /// /// # Example /// /// ```no_run - /// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags}; + /// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags, DupItem}; /// # use std::path::Path; /// # let env = Environment::builder().open(Path::new("/tmp/ex")).unwrap(); /// let txn = env.begin_ro_sync().unwrap(); /// let db = txn.open_db(None).unwrap(); /// let mut cursor = txn.cursor(db).unwrap(); /// - /// // Iterate over 8-byte values - /// for result in cursor.iter_dupfixed_start::, 8>().unwrap() { - /// let (key, value) = result.unwrap(); - /// println!("{:?} => {:?}", key, value); + /// // Iterate over fixed-size values decoded as [u8; 8] + /// for result in cursor.iter_dupfixed_start::, [u8; 8]>().unwrap() { + /// let value = result.unwrap().into_value(); + /// println!("{:?}", value); /// } /// ``` - pub fn iter_dupfixed_start<'cur, Key, const VALUE_SIZE: usize>( + pub fn iter_dupfixed_start<'cur, Key, Value>( &'cur mut self, - ) -> ReadResult> + ) -> ReadResult> where 'tx: 'cur, Key: TableObject<'tx> + Clone, + Value: TableObjectOwned, { #[cfg(debug_assertions)] - { - assertions::debug_assert_dup_fixed(self.db_flags()); - assert!(VALUE_SIZE > 0, "VALUE_SIZE must be non-zero"); - } + assertions::debug_assert_dup_fixed(self.db_flags()); - // Position at first key - let Some((_key, _value)) = self.first::()? else { - return Ok(IterDupFixed::end_from_ref(self)); + // Position at first key and get value size via ObjectLength + let Some((_key, ObjectLength(value_size))) = self.first::()? else { + return Ok(IterDupFixed::new_end(self)); }; + if value_size == 0 { + return Ok(IterDupFixed::new_end(self)); + } + // Get first page of values for current key let Some(page) = self.get_multiple::>()? else { - return Ok(IterDupFixed::end_from_ref(self)); + return Ok(IterDupFixed::new_end(self)); }; // Re-fetch the key since get_multiple doesn't return it let Some((key, _)) = self.get_current::()? else { - return Ok(IterDupFixed::end_from_ref(self)); + return Ok(IterDupFixed::new_end(self)); }; - Ok(IterDupFixed::from_ref_with(self, key, page)) + Ok(IterDupFixed::new_with(self, key, page, value_size)) } /// [`DatabaseFlags::DUP_FIXED`]-only: Iterate over all fixed-size duplicate @@ -657,55 +657,55 @@ where /// This iterator efficiently fetches pages of fixed-size values and yields /// them individually, providing a flattened view of the DUPFIXED table. /// - /// The `VALUE_SIZE` const generic must match the fixed size of values in - /// the database. - /// - /// # Correctness - /// - /// The `VALUE_SIZE` const generic must exactly match the fixed value size - /// in the database. See [`IterDupFixed`] for details on mismatch behavior. + /// The value size is determined at runtime from the first value in the + /// database. The `Value` type parameter must implement [`TableObjectOwned`] + /// for decoding values. /// /// # Example /// /// ```no_run - /// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags}; + /// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags, DupItem}; /// # use std::path::Path; /// # let env = Environment::builder().open(Path::new("/tmp/ex")).unwrap(); /// let txn = env.begin_ro_sync().unwrap(); /// let db = txn.open_db(None).unwrap(); /// let mut cursor = txn.cursor(db).unwrap(); /// - /// // Iterate over 8-byte values starting from key "start" - /// for result in cursor.iter_dupfixed_from::, 8>(b"start").unwrap() { - /// let (key, value) = result.unwrap(); - /// println!("{:?} => {:?}", key, value); + /// // Iterate over fixed-size values starting from key "start" + /// for result in cursor.iter_dupfixed_from::, [u8; 8]>(b"start").unwrap() { + /// let value = result.unwrap().into_value(); + /// println!("{:?}", value); /// } /// ``` - pub fn iter_dupfixed_from<'cur, Key, const VALUE_SIZE: usize>( + pub fn iter_dupfixed_from<'cur, Key, Value>( &'cur mut self, key: &[u8], - ) -> ReadResult> + ) -> ReadResult> where 'tx: 'cur, Key: TableObject<'tx> + Clone, + Value: TableObjectOwned, { #[cfg(debug_assertions)] - { - assertions::debug_assert_dup_fixed(self.db_flags()); - assert!(VALUE_SIZE > 0, "VALUE_SIZE must be non-zero"); - } + assertions::debug_assert_dup_fixed(self.db_flags()); - // Position at first key >= the requested key - let Some((found_key, _)) = self.set_range::(key)? else { - return Ok(IterDupFixed::end_from_ref(self)); + // Position at first key >= the requested key and get value size + let Some((found_key, ObjectLength(value_size))) = + self.set_range::(key)? + else { + return Ok(IterDupFixed::new_end(self)); }; + if value_size == 0 { + return Ok(IterDupFixed::new_end(self)); + } + // Get first page for this key let Some(page) = self.get_multiple::>()? else { - return Ok(IterDupFixed::end_from_ref(self)); + return Ok(IterDupFixed::new_end(self)); }; - Ok(IterDupFixed::from_ref_with(self, found_key, page)) + Ok(IterDupFixed::new_with(self, found_key, page, value_size)) } /// [`DatabaseFlags::DUP_FIXED`]-only: Iterate over all fixed-size duplicate @@ -716,14 +716,9 @@ where /// the specified key. When all values for that key are exhausted, /// iteration stops. /// - /// The `VALUE_SIZE` const generic must match the fixed size of values in - /// the database. - /// - /// # Correctness - /// - /// The `VALUE_SIZE` const generic must exactly match the fixed value size - /// in the database. See [`IterDupFixedOfKey`] for details on mismatch - /// behavior. + /// The value size is determined at runtime from the first value in the + /// database. The `Value` type parameter must implement [`TableObjectOwned`] + /// for decoding values. /// /// # Example /// @@ -735,35 +730,43 @@ where /// let db = txn.open_db(None).unwrap(); /// let mut cursor = txn.cursor(db).unwrap(); /// - /// // Iterate over 8-byte values for a specific key - /// for result in cursor.iter_dupfixed_of::<8>(b"my_key").unwrap() { + /// // Iterate over fixed-size values for a specific key + /// for result in cursor.iter_dupfixed_of::<[u8; 8]>(b"my_key").unwrap() { /// let value: [u8; 8] = result.unwrap(); /// println!("value: {:?}", value); /// } /// ``` /// /// [`IterDupFixedOfKey`]: crate::tx::iter::IterDupFixedOfKey - pub fn iter_dupfixed_of<'cur, const VALUE_SIZE: usize>( + pub fn iter_dupfixed_of<'cur, Value>( &'cur mut self, key: &[u8], - ) -> ReadResult> + ) -> ReadResult> where 'tx: 'cur, + Value: TableObjectOwned, { #[cfg(debug_assertions)] { assertions::debug_assert_dup_fixed(self.db_flags()); assertions::debug_assert_integer_key(self.db_flags(), key); - assert!(VALUE_SIZE > 0, "VALUE_SIZE must be non-zero"); } - let Some((_key, page)) = - self.seek_and_get_multiple::<(), std::borrow::Cow<'tx, [u8]>>(key)? - else { - return Ok(IterDupFixedOfKey::end_from_ref(self)); + // Position at key and get value size from the first value + let Some(ObjectLength(value_size)) = self.set::(key)? else { + return Ok(IterDupFixedOfKey::new_end(self)); + }; + + if value_size == 0 { + return Ok(IterDupFixedOfKey::new_end(self)); + } + + // Get first page of values (cursor is already positioned at the key) + let Some(page) = self.get_multiple::>()? else { + return Ok(IterDupFixedOfKey::new_end(self)); }; - Ok(IterDupFixedOfKey::from_ref_with(self, page)) + Ok(IterDupFixedOfKey::new_with(self, page, value_size)) } } diff --git a/src/tx/iter/base.rs b/src/tx/iter/base.rs index baa3c1d..c78429e 100644 --- a/src/tx/iter/base.rs +++ b/src/tx/iter/base.rs @@ -3,7 +3,7 @@ use crate::{ Cursor, MdbxError, ReadResult, TableObject, TableObjectOwned, TransactionKind, tx::TxPtrAccess, }; -use std::{borrow::Cow, marker::PhantomData, ptr}; +use std::{marker::PhantomData, ptr}; /// An iterator over the key/value pairs in an MDBX database. /// @@ -21,11 +21,11 @@ pub struct Iter< 'tx, 'cur, K: TransactionKind, - Key = Cow<'tx, [u8]>, - Value = Cow<'tx, [u8]>, + Key = std::borrow::Cow<'tx, [u8]>, + Value = std::borrow::Cow<'tx, [u8]>, const OP: u32 = { ffi::MDBX_NEXT }, > { - pub(crate) cursor: Cow<'cur, Cursor<'tx, K>>, + pub(crate) cursor: &'cur mut Cursor<'tx, K>, /// Pre-fetched value from cursor positioning, yielded before calling FFI. pending: Option<(Key, Value)>, /// When true, the iterator is exhausted and will always return `None`. @@ -48,47 +48,22 @@ impl<'tx: 'cur, 'cur, K, Key, Value, const OP: u32> Iter<'tx, 'cur, K, Key, Valu where K: TransactionKind, { - /// Create a new iterator from the given cursor, starting at the given - /// position. - pub(crate) fn new(cursor: Cow<'cur, Cursor<'tx, K>>) -> Self { + /// Create a new iterator from a mutable reference to the given cursor. + pub(crate) fn new(cursor: &'cur mut Cursor<'tx, K>) -> Self { Iter { cursor, pending: None, exhausted: false, _marker: PhantomData } } - /// Create a new iterator from a mutable reference to the given cursor, - pub(crate) fn from_ref(cursor: &'cur mut Cursor<'tx, K>) -> Self { - Self::new(Cow::Borrowed(cursor)) - } - /// Create a new iterator that is already exhausted. /// /// Iteration will immediately return `None`. - pub(crate) fn new_end(cursor: Cow<'cur, Cursor<'tx, K>>) -> Self { + pub(crate) fn new_end(cursor: &'cur mut Cursor<'tx, K>) -> Self { Iter { cursor, pending: None, exhausted: true, _marker: PhantomData } } - /// Create a new, exhausted iterator from a mutable reference to the given - /// cursor. This is usually used as a placeholder when no items are to be - /// yielded. - pub(crate) fn end_from_ref(cursor: &'cur mut Cursor<'tx, K>) -> Self { - Self::new_end(Cow::Borrowed(cursor)) - } - - /// Create a new iterator from the given cursor, first yielding the - /// provided key/value pair. - pub(crate) fn new_with(cursor: Cow<'cur, Cursor<'tx, K>>, first: (Key, Value)) -> Self { - Iter { cursor, pending: Some(first), exhausted: false, _marker: PhantomData } - } - /// Create a new iterator from a mutable reference to the given cursor, /// first yielding the provided key/value pair. - pub(crate) fn from_ref_with(cursor: &'cur mut Cursor<'tx, K>, first: (Key, Value)) -> Self { - Self::new_with(Cow::Borrowed(cursor), first) - } - - /// Create a new iterator from an owned cursor, first yielding the - /// provided key/value pair. - pub(crate) fn from_owned_with(cursor: Cursor<'tx, K>, first: (Key, Value)) -> Self { - Self::new_with(Cow::Owned(cursor), first) + pub(crate) fn new_with(cursor: &'cur mut Cursor<'tx, K>, first: (Key, Value)) -> Self { + Iter { cursor, pending: Some(first), exhausted: false, _marker: PhantomData } } } diff --git a/src/tx/iter/dup.rs b/src/tx/iter/dup.rs index 97ed489..f361473 100644 --- a/src/tx/iter/dup.rs +++ b/src/tx/iter/dup.rs @@ -1,30 +1,88 @@ -//! Iterator for DUPSORT databases with nested iteration. +//! Flat iterator for DUPSORT databases. +use super::DupItem; use crate::{ - Cursor, MdbxError, ReadResult, TableObject, TableObjectOwned, TransactionKind, - error::mdbx_result, - tx::{ - TxPtrAccess, - aliases::{IterDupKeys, IterDupVals}, - iter::Iter, - }, + Cursor, MdbxError, ReadResult, TableObject, TableObjectOwned, TransactionKind, tx::TxPtrAccess, }; -use std::{borrow::Cow, ptr}; +use std::{marker::PhantomData, ptr}; -/// An iterator over the key/value pairs in an MDBX database with duplicate -/// keys. -pub struct IterDup<'tx, 'cur, K: TransactionKind, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> { - inner: IterDupKeys<'tx, 'cur, K, Key, Value>, +/// A flat iterator over DUPSORT databases yielding [`DupItem`] variants. +/// +/// This iterator yields every key-value pair in the database, including all +/// duplicate values. To avoid unnecessary key cloning, it yields +/// [`DupItem::NewKey`] for the first value of each key, and +/// [`DupItem::SameKey`] for subsequent values of the same key. +/// +/// # Type Parameters +/// +/// - `'tx`: The transaction lifetime +/// - `'cur`: The cursor lifetime +/// - `K`: The transaction kind marker +/// - `Key`: The key type (must implement [`TableObject`]) +/// - `Value`: The value type (must implement [`TableObject`]) +/// +/// # Example +/// +/// ```no_run +/// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags, DupItem}; +/// # use std::path::Path; +/// # let env = Environment::builder().open(Path::new("/tmp/dup_example")).unwrap(); +/// let txn = env.begin_rw_sync().unwrap(); +/// let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); +/// +/// // Insert duplicate values +/// txn.put(db, b"a", b"1", WriteFlags::empty()).unwrap(); +/// txn.put(db, b"a", b"2", WriteFlags::empty()).unwrap(); +/// txn.put(db, b"b", b"1", WriteFlags::empty()).unwrap(); +/// txn.commit().unwrap(); +/// +/// // Iterate over all key-value pairs +/// let txn = env.begin_ro_sync().unwrap(); +/// let db = txn.open_db(None).unwrap(); +/// let mut cursor = txn.cursor(db).unwrap(); +/// +/// let mut current_key: Option> = None; +/// for result in cursor.iter_dup_start::, Vec>().unwrap() { +/// match result.unwrap() { +/// DupItem::NewKey(key, value) => { +/// println!("New key {:?} => {:?}", key, value); +/// current_key = Some(key); +/// } +/// DupItem::SameKey(value) => { +/// println!(" Same key {:?} => {:?}", current_key.as_ref().unwrap(), value); +/// } +/// } +/// } +/// ``` +pub struct IterDup< + 'tx, + 'cur, + K: TransactionKind, + Key = std::borrow::Cow<'tx, [u8]>, + Value = std::borrow::Cow<'tx, [u8]>, +> { + cursor: &'cur mut Cursor<'tx, K>, + /// Pre-fetched value from cursor positioning, yielded before calling FFI. + pending: Option<(Key, Value)>, + /// Values remaining for current key (0 = next is new key). + remaining: usize, + /// Whether we've yielded our first item yet. The first item is always NewKey. + first_yielded: bool, + /// When true, the iterator is exhausted and will always return `None`. + exhausted: bool, + _marker: PhantomData (Key, Value)>, } -impl<'tx, 'cur, K, Key, Value> core::fmt::Debug for IterDup<'tx, 'cur, K, Key, Value> +impl core::fmt::Debug for IterDup<'_, '_, K, Key, Value> where K: TransactionKind, - Key: core::fmt::Debug, - Value: core::fmt::Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("IterDup").finish() + f.debug_struct("IterDup") + .field("remaining", &self.remaining) + .field("first_yielded", &self.first_yielded) + .field("exhausted", &self.exhausted) + .finish() } } @@ -32,113 +90,191 @@ impl<'tx: 'cur, 'cur, K, Key, Value> IterDup<'tx, 'cur, K, Key, Value> where K: TransactionKind, { - /// Create a new iterator from the given cursor, starting at the given - /// position. - pub(crate) fn new(cursor: Cow<'cur, Cursor<'tx, K>>) -> Self { - IterDup { inner: IterDupKeys::new(cursor) } - } - - /// Create a new iterator from a mutable reference to the given cursor, - pub(crate) fn from_ref(cursor: &'cur mut Cursor<'tx, K>) -> Self { - Self::new(Cow::Borrowed(cursor)) - } - - /// Create a new iterator from an owned cursor. - pub fn from_owned(cursor: Cursor<'tx, K>) -> Self { - Self::new(Cow::Owned(cursor)) - } - - /// Create a new iterator from the given cursor, the inner iterator will - /// first yield the provided key/value pair. - pub(crate) fn new_with(cursor: Cow<'cur, Cursor<'tx, K>>, first: (Key, Value)) -> Self { - IterDup { inner: Iter::new_with(cursor, first) } + /// Create a new iterator from a mutable reference to the given cursor. + /// + /// The cursor must be positioned at a valid key. The iterator will start + /// from the NEXT item after the current cursor position. + pub(crate) fn new(cursor: &'cur mut Cursor<'tx, K>) -> Self { + // When continuing from an existing position, we need to get the current + // key's dup count to know how many values remain for this key. + // We subtract 1 because MDBX_NEXT will move to the next value. + // If dup_count fails (cursor not positioned), we'll treat the first + // result as a new key. + let remaining = cursor.dup_count().ok().and_then(|c| c.checked_sub(1)).unwrap_or(0); + IterDup { + cursor, + pending: None, + remaining, + first_yielded: false, + exhausted: false, + _marker: PhantomData, + } } /// Create a new iterator from a mutable reference to the given cursor, - /// first yielding the provided key/value pair. - pub fn from_ref_with(cursor: &'cur mut Cursor<'tx, K>, first: (Key, Value)) -> Self { - Self::new_with(Cow::Borrowed(cursor), first) - } - - /// Create a new iterator from the given cursor, with no items to yield. - pub fn new_end(cursor: Cow<'cur, Cursor<'tx, K>>) -> Self { - IterDup { inner: Iter::new_end(cursor) } - } - - /// Create a new iterator from a mutable reference to the given cursor, with - /// no items to yield. - pub fn end_from_ref(cursor: &'cur mut Cursor<'tx, K>) -> Self { - Self::new_end(Cow::Borrowed(cursor)) + /// first yielding the provided key/value pair as a new key. + pub(crate) fn new_with(cursor: &'cur mut Cursor<'tx, K>, first: (Key, Value)) -> Self { + // Get the count of duplicates for the current key. + // The pending item will be the first, so remaining = count. + let remaining = cursor.dup_count().unwrap_or(1); + IterDup { + cursor, + pending: Some(first), + remaining, + first_yielded: false, + exhausted: false, + _marker: PhantomData, + } } - /// Create a new iterator from an owned cursor, with no items to yield. - pub fn end_from_owned(cursor: Cursor<'tx, K>) -> Self { - Self::new_end(Cow::Owned(cursor)) + /// Create a new iterator that is already exhausted. + /// + /// Iteration will immediately return `None`. + pub(crate) fn new_end(cursor: &'cur mut Cursor<'tx, K>) -> Self { + IterDup { + cursor, + pending: None, + remaining: 0, + first_yielded: true, + exhausted: true, + _marker: PhantomData, + } } } -impl<'tx, 'cur, K, Key, Value> IterDup<'tx, 'cur, K, Key, Value> +impl<'tx: 'cur, 'cur, K, Key, Value> IterDup<'tx, 'cur, K, Key, Value> where K: TransactionKind, Key: TableObject<'tx>, Value: TableObject<'tx>, { - /// Borrow the next key/value pair from the iterator. - pub fn borrow_next(&mut self) -> ReadResult>> { - // We want to use Cursor::new_at_position to create a new cursor, - // but the kv pair may be borrowed from the inner cursor, so we need to - // store the references first. This is just to avoid borrow checker - // issues in the unsafe block. - let cursor_ptr = self.inner.cursor.as_ref().cursor(); - - // SAFETY: the access lives as long as self.inner.cursor, and the cursor op - // we perform does not invalidate the data borrowed from the inner - // cursor in borrow_next. - let access = self.inner.cursor.access(); - - // The next will be the FIRST KV pair for the NEXT key in the DUPSORT - match self.inner.borrow_next()? { - Some((key, value)) => { - // SAFETY: the access is valid as per above. The FFI calls here do - // not invalidate any data borrowed from the inner cursor. - // - // This is an inlined version of Cursor::new_at_position. - let db = self.inner.cursor.as_ref().db(); - let dup_cursor = access.with_txn_ptr(move |_| unsafe { - let new_cursor = ffi::mdbx_cursor_create(ptr::null_mut()); - let res = ffi::mdbx_cursor_copy(cursor_ptr, new_cursor); - mdbx_result(res)?; - Ok::<_, MdbxError>(Cursor::new_raw(access, new_cursor, db)) - })?; - - Ok(Some(IterDupVals::from_owned_with(dup_cursor, (key, value)))) + /// Execute MDBX_NEXT and decode the result. + fn execute_next(&self) -> ReadResult> { + let mut key = ffi::MDBX_val { iov_len: 0, iov_base: ptr::null_mut() }; + let mut data = ffi::MDBX_val { iov_len: 0, iov_base: ptr::null_mut() }; + + self.cursor.access().with_txn_ptr(|txn| { + let res = unsafe { + ffi::mdbx_cursor_get(self.cursor.cursor(), &mut key, &mut data, ffi::MDBX_NEXT) + }; + + match res { + ffi::MDBX_SUCCESS => { + // SAFETY: decode_val checks for dirty writes and copies if needed. + // The lifetime 'tx guarantees the Cow cannot outlive the transaction. + unsafe { + let key = TableObject::decode_val::(txn, key)?; + let value = TableObject::decode_val::(txn, data)?; + Ok(Some((key, value))) + } + } + ffi::MDBX_NOTFOUND | ffi::MDBX_ENODATA | ffi::MDBX_RESULT_TRUE => Ok(None), + other => Err(MdbxError::from_err_code(other).into()), } - None => Ok(None), + }) + } + + /// Borrow the next item from the iterator. + /// + /// Returns `Ok(Some(DupItem))` if an item was found, + /// `Ok(None)` if no more items are available, or `Err` on DB + /// access error. + pub fn borrow_next(&mut self) -> ReadResult>> { + if self.exhausted { + return Ok(None); } + + // Yield pending first item (always NewKey, already counted in remaining) + if let Some((key, value)) = self.pending.take() { + self.first_yielded = true; + self.remaining = self.remaining.saturating_sub(1); + return Ok(Some(DupItem::NewKey(key, value))); + } + + let Some((key, value)) = self.execute_next()? else { + self.exhausted = true; + return Ok(None); + }; + + // First item is always NewKey (caller hasn't seen any key yet) + if !self.first_yielded { + self.first_yielded = true; + self.remaining = self.remaining.saturating_sub(1); + return Ok(Some(DupItem::NewKey(key, value))); + } + + if self.remaining == 0 { + // This is a new key - get the count of duplicates + self.remaining = self.cursor.dup_count().unwrap_or(1).saturating_sub(1); + return Ok(Some(DupItem::NewKey(key, value))); + } + + self.remaining -= 1; + Ok(Some(DupItem::SameKey(value))) } } -impl<'tx: 'cur, 'cur, K, Key, Value> IterDup<'tx, 'cur, K, Key, Value> +impl IterDup<'_, '_, K, Key, Value> where K: TransactionKind, Key: TableObjectOwned, Value: TableObjectOwned, { - /// Own the next key/value pair from the iterator. - pub fn owned_next(&mut self) -> ReadResult>> { - self.borrow_next() + /// Own the next item from the iterator. + pub fn owned_next(&mut self) -> ReadResult>> { + if self.exhausted { + return Ok(None); + } + + // Yield pending first item (always NewKey, already counted in remaining) + if let Some((key, value)) = self.pending.take() { + self.first_yielded = true; + self.remaining = self.remaining.saturating_sub(1); + return Ok(Some(DupItem::NewKey(key, value))); + } + + let Some((key, value)) = self.execute_next()? else { + self.exhausted = true; + return Ok(None); + }; + + // First item is always NewKey (caller hasn't seen any key yet) + if !self.first_yielded { + self.first_yielded = true; + self.remaining = self.remaining.saturating_sub(1); + return Ok(Some(DupItem::NewKey(key, value))); + } + + if self.remaining == 0 { + // This is a new key - get the count of duplicates + self.remaining = self.cursor.dup_count().unwrap_or(1).saturating_sub(1); + return Ok(Some(DupItem::NewKey(key, value))); + } + + self.remaining -= 1; + Ok(Some(DupItem::SameKey(value))) } } -impl<'tx: 'cur, 'cur, K, Key, Value> Iterator for IterDup<'tx, 'cur, K, Key, Value> +impl Iterator for IterDup<'_, '_, K, Key, Value> where K: TransactionKind, Key: TableObjectOwned, Value: TableObjectOwned, { - type Item = ReadResult>; + type Item = ReadResult>; fn next(&mut self) -> Option { self.owned_next().transpose() } + + fn size_hint(&self) -> (usize, Option) { + if self.exhausted { + return (0, Some(0)); + } + // remaining = values left for current key (excluding pending) + // pending = pre-fetched item ready to yield + let pending = usize::from(self.pending.is_some()); + (self.remaining + pending, None) + } } diff --git a/src/tx/iter/dup_key.rs b/src/tx/iter/dup_key.rs new file mode 100644 index 0000000..734f815 --- /dev/null +++ b/src/tx/iter/dup_key.rs @@ -0,0 +1,162 @@ +//! Single-key iterator for DUPSORT databases. + +use crate::{ + Cursor, MdbxError, ReadResult, TableObject, TableObjectOwned, TransactionKind, tx::TxPtrAccess, +}; +use std::{marker::PhantomData, ptr}; + +/// A single-key iterator for DUPSORT databases, yielding just values. +/// +/// Unlike [`IterDup`](super::IterDup) which iterates across all keys yielding +/// `(Key, Value)` pairs, this iterator only yields values for a single key. +/// When all values for that key are exhausted, iteration stops. +/// +/// # Type Parameters +/// +/// - `'tx`: The transaction lifetime +/// - `'cur`: The cursor lifetime +/// - `K`: The transaction kind marker +/// - `Value`: The value type (must implement [`TableObject`]) +/// +/// # Example +/// +/// ```no_run +/// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags}; +/// # use std::path::Path; +/// # let env = Environment::builder().open(Path::new("/tmp/dup_key_example")).unwrap(); +/// let txn = env.begin_rw_sync().unwrap(); +/// let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); +/// +/// // Insert duplicate values for a key +/// txn.put(db, b"key", b"val1", WriteFlags::empty()).unwrap(); +/// txn.put(db, b"key", b"val2", WriteFlags::empty()).unwrap(); +/// txn.put(db, b"key", b"val3", WriteFlags::empty()).unwrap(); +/// txn.commit().unwrap(); +/// +/// // Iterate over values for a specific key +/// let txn = env.begin_ro_sync().unwrap(); +/// let db = txn.open_db(None).unwrap(); +/// let mut cursor = txn.cursor(db).unwrap(); +/// +/// for result in cursor.iter_dup_of::>(b"key").unwrap() { +/// let value = result.unwrap(); +/// println!("value: {:?}", value); +/// } +/// ``` +pub struct IterDupOfKey<'tx, 'cur, K: TransactionKind, Value = std::borrow::Cow<'tx, [u8]>> { + cursor: &'cur mut Cursor<'tx, K>, + /// Pre-fetched value from cursor positioning, yielded before calling FFI. + pending: Option, + /// When true, the iterator is exhausted and will always return `None`. + exhausted: bool, + _marker: PhantomData Value>, +} + +impl core::fmt::Debug for IterDupOfKey<'_, '_, K, Value> +where + K: TransactionKind, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("IterDupOfKey").field("exhausted", &self.exhausted).finish() + } +} + +impl<'tx: 'cur, 'cur, K, Value> IterDupOfKey<'tx, 'cur, K, Value> +where + K: TransactionKind, +{ + /// Create a new iterator that is already exhausted. + /// + /// Iteration will immediately return `None`. + pub(crate) fn new_end(cursor: &'cur mut Cursor<'tx, K>) -> Self { + IterDupOfKey { cursor, pending: None, exhausted: true, _marker: PhantomData } + } + + /// Create a new iterator with the provided first value. + pub(crate) fn new_with(cursor: &'cur mut Cursor<'tx, K>, first: Value) -> Self { + IterDupOfKey { cursor, pending: Some(first), exhausted: false, _marker: PhantomData } + } +} + +impl<'tx: 'cur, 'cur, K, Value> IterDupOfKey<'tx, 'cur, K, Value> +where + K: TransactionKind, + Value: TableObject<'tx>, +{ + /// Execute MDBX_NEXT_DUP and decode the value. + fn execute_next_dup(&self) -> ReadResult> { + let mut key = ffi::MDBX_val { iov_len: 0, iov_base: ptr::null_mut() }; + let mut data = ffi::MDBX_val { iov_len: 0, iov_base: ptr::null_mut() }; + + self.cursor.access().with_txn_ptr(|txn| { + let res = unsafe { + ffi::mdbx_cursor_get(self.cursor.cursor(), &mut key, &mut data, ffi::MDBX_NEXT_DUP) + }; + + match res { + ffi::MDBX_SUCCESS => { + // SAFETY: decode_val checks for dirty writes and copies if needed. + // The lifetime 'tx guarantees the Cow cannot outlive the transaction. + unsafe { + let value = TableObject::decode_val::(txn, data)?; + Ok(Some(value)) + } + } + ffi::MDBX_NOTFOUND | ffi::MDBX_ENODATA | ffi::MDBX_RESULT_TRUE => Ok(None), + other => Err(MdbxError::from_err_code(other).into()), + } + }) + } + + /// Borrow the next value from the iterator. + /// + /// Returns `Ok(Some(value))` if a value was found, + /// `Ok(None)` if no more values are available for this key, or `Err` on DB + /// access error. + pub fn borrow_next(&mut self) -> ReadResult> { + if self.exhausted { + return Ok(None); + } + if let Some(v) = self.pending.take() { + return Ok(Some(v)); + } + let result = self.execute_next_dup()?; + if result.is_none() { + self.exhausted = true; + } + Ok(result) + } +} + +impl IterDupOfKey<'_, '_, K, Value> +where + K: TransactionKind, + Value: TableObjectOwned, +{ + /// Own the next value from the iterator. + pub fn owned_next(&mut self) -> ReadResult> { + if self.exhausted { + return Ok(None); + } + if let Some(v) = self.pending.take() { + return Ok(Some(v)); + } + let result = self.execute_next_dup()?; + if result.is_none() { + self.exhausted = true; + } + Ok(result) + } +} + +impl Iterator for IterDupOfKey<'_, '_, K, Value> +where + K: TransactionKind, + Value: TableObjectOwned, +{ + type Item = ReadResult; + + fn next(&mut self) -> Option { + self.owned_next().transpose() + } +} diff --git a/src/tx/iter/dupfixed.rs b/src/tx/iter/dupfixed.rs index 0890ace..ef678a3 100644 --- a/src/tx/iter/dupfixed.rs +++ b/src/tx/iter/dupfixed.rs @@ -1,6 +1,7 @@ //! Flattening iterator for DUPFIXED tables. -use crate::{Cursor, ReadResult, TableObject, TransactionKind}; +use super::DupItem; +use crate::{Cursor, ReadResult, TableObject, TableObjectOwned, TransactionKind}; use std::{borrow::Cow, marker::PhantomData}; /// A flattening iterator over DUPFIXED tables. @@ -10,27 +11,23 @@ use std::{borrow::Cow, marker::PhantomData}; /// store duplicate values with a fixed size, allowing MDBX to pack multiple /// values per page. /// +/// To avoid unnecessary key cloning, this iterator yields [`DupItem::NewKey`] +/// for the first value of each key, and [`DupItem::SameKey`] for subsequent +/// values of the same key. +/// /// # Type Parameters /// /// - `'tx`: The transaction lifetime /// - `'cur`: The cursor lifetime /// - `K`: The transaction kind marker /// - `Key`: The key type (must implement [`TableObject`]) -/// - `VALUE_SIZE`: The fixed size of each value in bytes +/// - `Value`: The value type (must implement [`TableObjectOwned`]) /// /// # Correctness /// -/// The `VALUE_SIZE` const generic **must** match the fixed value size stored -/// in the database. MDBX does not validate this at runtime. If mismatched: -/// -/// - **Too large**: Values are skipped; the iterator yields fewer items than -/// exist, potentially with misaligned data. -/// - **Too small**: The iterator yields more items than exist, each containing -/// partial or corrupted data from adjacent values. -/// - **Zero**: Causes an infinite loop (caught by debug assertion). -/// -/// The correct value size is determined by the size of values written to the -/// DUPFIXED database. All values under a given key must have the same size. +/// The value size is determined at construction time from the first value +/// in the database. All values in a DUPFIXED database must have the same +/// size. /// /// # Zero-Copy Operation /// @@ -42,7 +39,7 @@ use std::{borrow::Cow, marker::PhantomData}; /// # Example /// /// ```no_run -/// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags}; +/// # use signet_libmdbx::{Environment, DatabaseFlags, WriteFlags, DupItem}; /// # use std::path::Path; /// # let env = Environment::builder().open(Path::new("/tmp/dupfixed_example")).unwrap(); /// // Create a DUPFIXED database @@ -60,105 +57,118 @@ use std::{borrow::Cow, marker::PhantomData}; /// let db = txn.open_db(Some("my_cool_db")).unwrap(); /// let mut cursor = txn.cursor(db).unwrap(); /// -/// for result in cursor.iter_dupfixed_start::, 4>().unwrap() { -/// let (key, value) = result.unwrap(); -/// let num = u32::from_le_bytes(value); -/// println!("{:?} => {}", key, num); +/// let mut current_key: Option> = None; +/// for result in cursor.iter_dupfixed_start::, [u8; 4]>().unwrap() { +/// match result.unwrap() { +/// DupItem::NewKey(key, value) => { +/// let num = u32::from_le_bytes(value); +/// println!("New key {:?} => {}", key, num); +/// current_key = Some(key); +/// } +/// DupItem::SameKey(value) => { +/// let num = u32::from_le_bytes(value); +/// println!(" Same key => {}", num); +/// } +/// } /// } /// ``` -pub struct IterDupFixed< - 'tx, - 'cur, - K: TransactionKind, - Key = Cow<'tx, [u8]>, - const VALUE_SIZE: usize = 0, -> { - cursor: Cow<'cur, Cursor<'tx, K>>, +pub struct IterDupFixed<'tx, 'cur, K: TransactionKind, Key = Cow<'tx, [u8]>, Value = Cow<'tx, [u8]>> +{ + cursor: &'cur mut Cursor<'tx, K>, /// The current key being iterated. current_key: Option, /// The current page of values. current_page: Cow<'tx, [u8]>, /// Current offset into the page, incremented as values are yielded. page_offset: usize, + /// The fixed value size, determined at construction. + value_size: usize, + /// Values remaining for current key (0 = next is new key). + remaining: usize, /// When true, the iterator is exhausted and will always return `None`. exhausted: bool, - _marker: PhantomData Key>, + _marker: PhantomData (Key, Value)>, } -impl core::fmt::Debug for IterDupFixed<'_, '_, K, Key, VALUE_SIZE> +impl core::fmt::Debug for IterDupFixed<'_, '_, K, Key, Value> where K: TransactionKind, Key: core::fmt::Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let remaining = self.current_page.len().saturating_sub(self.page_offset) / VALUE_SIZE; + let remaining_in_page = if self.value_size > 0 { + self.current_page.len().saturating_sub(self.page_offset) / self.value_size + } else { + 0 + }; f.debug_struct("IterDupFixed") .field("exhausted", &self.exhausted) - .field("remaining_in_page", &remaining) + .field("value_size", &self.value_size) + .field("remaining_in_page", &remaining_in_page) + .field("remaining_for_key", &self.remaining) .finish() } } -impl<'tx: 'cur, 'cur, K, Key, const VALUE_SIZE: usize> IterDupFixed<'tx, 'cur, K, Key, VALUE_SIZE> +impl<'tx: 'cur, 'cur, K, Key, Value> IterDupFixed<'tx, 'cur, K, Key, Value> where K: TransactionKind, { + /// Returns the fixed value size (determined at construction). + pub const fn value_size(&self) -> usize { + self.value_size + } + /// Create a new, exhausted iterator. /// /// Iteration will immediately return `None`. - pub(crate) fn new_end(cursor: Cow<'cur, Cursor<'tx, K>>) -> Self { + pub(crate) fn new_end(cursor: &'cur mut Cursor<'tx, K>) -> Self { IterDupFixed { cursor, current_key: None, current_page: Cow::Borrowed(&[]), page_offset: 0, + value_size: 0, + remaining: 0, exhausted: true, _marker: PhantomData, } } - /// Create a new, exhausted iterator from a mutable reference to the cursor. - pub(crate) fn end_from_ref(cursor: &'cur mut Cursor<'tx, K>) -> Self { - Self::new_end(Cow::Borrowed(cursor)) - } - - /// Create a new iterator with the given initial key and page. + /// Create a new iterator with the given initial key, page, and value size. pub(crate) fn new_with( - cursor: Cow<'cur, Cursor<'tx, K>>, + cursor: &'cur mut Cursor<'tx, K>, key: Key, page: Cow<'tx, [u8]>, + value_size: usize, ) -> Self { + debug_assert!(value_size > 0, "DUPFIXED value size must be greater than zero"); + // Get the count of duplicates for the current key. + let remaining = cursor.dup_count().unwrap_or(1); IterDupFixed { cursor, current_key: Some(key), current_page: page, page_offset: 0, + value_size, + remaining, exhausted: false, _marker: PhantomData, } } - - /// Create a new iterator from a mutable reference with initial key and page. - pub(crate) fn from_ref_with( - cursor: &'cur mut Cursor<'tx, K>, - key: Key, - page: Cow<'tx, [u8]>, - ) -> Self { - Self::new_with(Cow::Borrowed(cursor), key, page) - } } -impl<'tx: 'cur, 'cur, K, Key, const VALUE_SIZE: usize> IterDupFixed<'tx, 'cur, K, Key, VALUE_SIZE> +impl<'tx: 'cur, 'cur, K, Key, Value> IterDupFixed<'tx, 'cur, K, Key, Value> where K: TransactionKind, - Key: TableObject<'tx> + Clone, + Key: TableObject<'tx>, { /// Consume the next value from the current page. /// - /// Returns `Some(Cow<'tx, [u8]>)` containing exactly `VALUE_SIZE` bytes, + /// Returns `Some(Cow<'tx, [u8]>)` containing exactly `value_size` bytes, /// or `None` if the page is exhausted. fn consume_value(&mut self) -> Option> { - let end = self.page_offset.checked_add(VALUE_SIZE)?; + let end = self.page_offset.checked_add(self.value_size)?; if end > self.current_page.len() { return None; } @@ -180,10 +190,8 @@ where /// /// Returns `Ok(true)` if a new page was fetched, `Ok(false)` if exhausted. fn fetch_next_page(&mut self) -> ReadResult { - let cursor = self.cursor.to_mut(); - // Try to get next page for current key - if let Some((key, page)) = cursor.next_multiple::>()? { + if let Some((key, page)) = self.cursor.next_multiple::>()? { self.current_key = Some(key); self.current_page = page; self.page_offset = 0; @@ -191,84 +199,108 @@ where } // No more pages for current key, move to next key - if cursor.next_nodup::()?.is_none() { + if self.cursor.next_nodup::()?.is_none() { self.exhausted = true; return Ok(false); } // Get first page for new key - let Some(page) = cursor.get_multiple::>()? else { + let Some(page) = self.cursor.get_multiple::>()? else { self.exhausted = true; return Ok(false); }; // Re-fetch the key since get_multiple doesn't return it - let Some((key, _)) = cursor.get_current::()? else { + let Some((key, _)) = self.cursor.get_current::()? else { self.exhausted = true; return Ok(false); }; + // New key - get dup count + self.remaining = self.cursor.dup_count().unwrap_or(1); + self.current_key = Some(key); self.current_page = page; self.page_offset = 0; Ok(true) } - /// Borrow the next key/value pair from the iterator. + /// Borrow the next item from the iterator. /// - /// Returns `Ok(Some((key, value)))` where: - /// - `key` is cloned from the current key - /// - `value` is a `Cow<'tx, [u8]>` of exactly `VALUE_SIZE` bytes + /// Returns `Ok(Some(DupItem))` where the value is a `Cow<'tx, [u8]>` of + /// exactly `value_size` bytes. /// /// Returns `Ok(None)` when the iterator is exhausted. - pub fn borrow_next(&mut self) -> ReadResult)>> { + pub fn borrow_next(&mut self) -> ReadResult>>> { if self.exhausted { return Ok(None); } // Try to consume from current page - if let Some(value) = self.consume_value() { - // Key is cloned for each value - cheap for Cow<[u8]>, may allocate - // for decoded types - let key = self.current_key.clone().expect("key should be set when page is non-empty"); - return Ok(Some((key, value))); + let value = match self.consume_value() { + Some(v) => v, + None => { + // Current page exhausted, fetch next page + if !self.fetch_next_page()? { + return Ok(None); + } + self.consume_value().expect("freshly fetched page should have values") + } + }; + + if self.remaining == 0 { + // This is a new key (we got here via fetch_next_page which set remaining) + self.remaining = self.remaining.saturating_sub(1); + let key = self.current_key.take().expect("key should be set after fetch"); + return Ok(Some(DupItem::NewKey(key, value))); } - // Current page exhausted, fetch next page - if !self.fetch_next_page()? { - return Ok(None); + // Check if this is the first value for the current key + // (remaining was just set and key is present) + if self.current_key.is_some() { + self.remaining -= 1; + let key = self.current_key.take().expect("key should be set"); + return Ok(Some(DupItem::NewKey(key, value))); } - // Consume first value from new page - let value = self.consume_value().expect("freshly fetched page should have values"); - let key = self.current_key.clone().expect("key should be set after fetch"); - Ok(Some((key, value))) + self.remaining = self.remaining.saturating_sub(1); + Ok(Some(DupItem::SameKey(value))) } - /// Get the next key/value pair as owned data. + /// Get the next item as owned data. /// - /// Returns `Ok(Some((key, [u8; VALUE_SIZE])))` where the value is copied - /// into a fixed-size array. - pub fn owned_next(&mut self) -> ReadResult> { - self.borrow_next().map(|opt| { - opt.map(|(key, value)| { - let mut arr = [0u8; VALUE_SIZE]; - arr.copy_from_slice(&value); - (key, arr) + /// Returns `Ok(Some(DupItem))` where the value is decoded using + /// [`TableObjectOwned::decode`]. + pub fn owned_next(&mut self) -> ReadResult>> + where + Value: TableObjectOwned, + { + self.borrow_next()? + .map(|item| match item { + DupItem::NewKey(k, cow) => Value::decode(&cow).map(|v| DupItem::NewKey(k, v)), + DupItem::SameKey(cow) => Value::decode(&cow).map(DupItem::SameKey), }) - }) + .transpose() } } -impl<'tx: 'cur, 'cur, K, Key, const VALUE_SIZE: usize> Iterator - for IterDupFixed<'tx, 'cur, K, Key, VALUE_SIZE> +impl<'tx: 'cur, 'cur, K, Key, Value> Iterator for IterDupFixed<'tx, 'cur, K, Key, Value> where K: TransactionKind, - Key: TableObject<'tx> + Clone, + Key: TableObject<'tx>, + Value: TableObjectOwned, { - type Item = ReadResult<(Key, [u8; VALUE_SIZE])>; + type Item = ReadResult>; fn next(&mut self) -> Option { self.owned_next().transpose() } + + fn size_hint(&self) -> (usize, Option) { + if self.exhausted || self.value_size == 0 { + return (0, Some(0)); + } + // remaining tracks values left for current key + (self.remaining, None) + } } diff --git a/src/tx/iter/dupfixed_key.rs b/src/tx/iter/dupfixed_key.rs index 480bacc..cf48e5b 100644 --- a/src/tx/iter/dupfixed_key.rs +++ b/src/tx/iter/dupfixed_key.rs @@ -1,6 +1,6 @@ //! Single-key flattening iterator for DUPFIXED tables. -use crate::{Cursor, ReadResult, TransactionKind}; +use crate::{Cursor, ReadResult, TableObjectOwned, TransactionKind}; use std::{borrow::Cow, marker::PhantomData}; /// A single-key flattening iterator over DUPFIXED tables. @@ -14,21 +14,13 @@ use std::{borrow::Cow, marker::PhantomData}; /// - `'tx`: The transaction lifetime /// - `'cur`: The cursor lifetime /// - `K`: The transaction kind marker -/// - `VALUE_SIZE`: The fixed size of each value in bytes +/// - `Value`: The value type (must implement [`TableObjectOwned`]) /// /// # Correctness /// -/// The `VALUE_SIZE` const generic **must** match the fixed value size stored -/// in the database. MDBX does not validate this at runtime. If mismatched: -/// -/// - **Too large**: Values are skipped; the iterator yields fewer items than -/// exist, potentially with misaligned data. -/// - **Too small**: The iterator yields more items than exist, each containing -/// partial or corrupted data from adjacent values. -/// - **Zero**: Causes an infinite loop (caught by debug assertion). -/// -/// The correct value size is determined by the size of values written to the -/// DUPFIXED database. All values under a given key must have the same size. +/// The value size is determined at construction time from the first value +/// in the database. All values in a DUPFIXED database must have the same +/// size. /// /// # Zero-Copy Operation /// @@ -36,79 +28,97 @@ use std::{borrow::Cow, marker::PhantomData}; /// - In read-only transactions, values are borrowed directly from memory-mapped pages /// - In read-write transactions with clean pages, values are also borrowed /// - Only dirty pages (modified but not committed) require copying -pub struct IterDupFixedOfKey<'tx, 'cur, K: TransactionKind, const VALUE_SIZE: usize = 0> { - cursor: Cow<'cur, Cursor<'tx, K>>, +pub struct IterDupFixedOfKey<'tx, 'cur, K: TransactionKind, Value = Vec> { + cursor: &'cur mut Cursor<'tx, K>, /// The current page of values. current_page: Cow<'tx, [u8]>, /// Current offset into the page, incremented as values are yielded. page_offset: usize, + /// The fixed value size, determined at construction. + value_size: usize, + /// Values remaining for the current key. + remaining: usize, /// When true, the iterator is exhausted and will always return `None`. exhausted: bool, - _marker: PhantomData ()>, + _marker: PhantomData Value>, } -impl core::fmt::Debug for IterDupFixedOfKey<'_, '_, K, VALUE_SIZE> +impl core::fmt::Debug for IterDupFixedOfKey<'_, '_, K, Value> where K: TransactionKind, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let remaining = self.current_page.len().saturating_sub(self.page_offset) / VALUE_SIZE; + let remaining_in_page = if self.value_size > 0 { + self.current_page.len().saturating_sub(self.page_offset) / self.value_size + } else { + 0 + }; f.debug_struct("IterDupFixedOfKey") .field("exhausted", &self.exhausted) - .field("remaining_in_page", &remaining) + .field("value_size", &self.value_size) + .field("remaining_in_page", &remaining_in_page) + .field("remaining_for_key", &self.remaining) .finish() } } -impl<'tx: 'cur, 'cur, K, const VALUE_SIZE: usize> IterDupFixedOfKey<'tx, 'cur, K, VALUE_SIZE> +impl<'tx: 'cur, 'cur, K, Value> IterDupFixedOfKey<'tx, 'cur, K, Value> where K: TransactionKind, { + /// Returns the fixed value size (determined at construction). + pub const fn value_size(&self) -> usize { + self.value_size + } + /// Create a new, exhausted iterator. /// /// Iteration will immediately return `None`. - pub(crate) fn new_end(cursor: Cow<'cur, Cursor<'tx, K>>) -> Self { + pub(crate) fn new_end(cursor: &'cur mut Cursor<'tx, K>) -> Self { IterDupFixedOfKey { cursor, current_page: Cow::Borrowed(&[]), page_offset: 0, + value_size: 0, + remaining: 0, exhausted: true, _marker: PhantomData, } } - /// Create a new, exhausted iterator from a mutable reference to the cursor. - pub(crate) fn end_from_ref(cursor: &'cur mut Cursor<'tx, K>) -> Self { - Self::new_end(Cow::Borrowed(cursor)) - } - - /// Create a new iterator with the given initial page. - pub(crate) fn new_with(cursor: Cow<'cur, Cursor<'tx, K>>, page: Cow<'tx, [u8]>) -> Self { + /// Create a new iterator with the given initial page and value size. + pub(crate) fn new_with( + cursor: &'cur mut Cursor<'tx, K>, + page: Cow<'tx, [u8]>, + value_size: usize, + ) -> Self { + let remaining = cursor.dup_count().unwrap_or(1); IterDupFixedOfKey { cursor, current_page: page, page_offset: 0, + value_size, + remaining, exhausted: false, _marker: PhantomData, } } - - /// Create a new iterator from a mutable reference with initial page. - pub(crate) fn from_ref_with(cursor: &'cur mut Cursor<'tx, K>, page: Cow<'tx, [u8]>) -> Self { - Self::new_with(Cow::Borrowed(cursor), page) - } } -impl<'tx: 'cur, 'cur, K, const VALUE_SIZE: usize> IterDupFixedOfKey<'tx, 'cur, K, VALUE_SIZE> +impl<'tx: 'cur, 'cur, K, Value> IterDupFixedOfKey<'tx, 'cur, K, Value> where K: TransactionKind, { /// Consume the next value from the current page. /// - /// Returns `Some(Cow<'tx, [u8]>)` containing exactly `VALUE_SIZE` bytes, + /// Returns `Some(Cow<'tx, [u8]>)` containing exactly `value_size` bytes, /// or `None` if the page is exhausted. fn consume_value(&mut self) -> Option> { - let end = self.page_offset.checked_add(VALUE_SIZE)?; + if self.value_size == 0 { + return None; + } + + let end = self.page_offset.checked_add(self.value_size)?; if end > self.current_page.len() { return None; } @@ -131,10 +141,8 @@ where /// /// Returns `Ok(true)` if a new page was fetched, `Ok(false)` if exhausted. fn fetch_next_page(&mut self) -> ReadResult { - let cursor = self.cursor.to_mut(); - // Try to get next page for current key - if let Some((_key, page)) = cursor.next_multiple::<(), Cow<'tx, [u8]>>()? { + if let Some((_key, page)) = self.cursor.next_multiple::<(), Cow<'tx, [u8]>>()? { self.current_page = page; self.page_offset = 0; return Ok(true); @@ -148,7 +156,7 @@ where /// Borrow the next value from the iterator. /// /// Returns `Ok(Some(value))` where `value` is a `Cow<'tx, [u8]>` of exactly - /// `VALUE_SIZE` bytes. + /// `value_size` bytes. /// /// Returns `Ok(None)` when the iterator is exhausted. pub fn borrow_next(&mut self) -> ReadResult>> { @@ -158,6 +166,7 @@ where // Try to consume from current page if let Some(value) = self.consume_value() { + self.remaining = self.remaining.saturating_sub(1); return Ok(Some(value)); } @@ -168,32 +177,38 @@ where // Consume first value from new page let value = self.consume_value().expect("freshly fetched page should have values"); + self.remaining = self.remaining.saturating_sub(1); Ok(Some(value)) } /// Get the next value as owned data. /// - /// Returns `Ok(Some([u8; VALUE_SIZE]))` where the value is copied - /// into a fixed-size array. - pub fn owned_next(&mut self) -> ReadResult> { - self.borrow_next().map(|opt| { - opt.map(|value| { - let mut arr = [0u8; VALUE_SIZE]; - arr.copy_from_slice(&value); - arr - }) - }) + /// Returns `Ok(Some(Value))` where the value is decoded using + /// [`TableObjectOwned::decode`]. + pub fn owned_next(&mut self) -> ReadResult> + where + Value: TableObjectOwned, + { + self.borrow_next()?.map(|cow| Value::decode(&cow)).transpose() } } -impl<'tx: 'cur, 'cur, K, const VALUE_SIZE: usize> Iterator - for IterDupFixedOfKey<'tx, 'cur, K, VALUE_SIZE> +impl<'tx: 'cur, 'cur, K, Value> Iterator for IterDupFixedOfKey<'tx, 'cur, K, Value> where K: TransactionKind, + Value: TableObjectOwned, { - type Item = ReadResult<[u8; VALUE_SIZE]>; + type Item = ReadResult; fn next(&mut self) -> Option { self.owned_next().transpose() } + + fn size_hint(&self) -> (usize, Option) { + if self.exhausted || self.value_size == 0 { + return (0, Some(0)); + } + // remaining tracks values left for current key + (self.remaining, Some(self.remaining)) + } } diff --git a/src/tx/iter/mod.rs b/src/tx/iter/mod.rs index 0cdf617..57b3859 100644 --- a/src/tx/iter/mod.rs +++ b/src/tx/iter/mod.rs @@ -5,11 +5,13 @@ //! //! # Iterator Types //! -//! - [`Iter`]: Base iterator with configurable cursor operation -//! - [`IterKeyVals`]: Iterates over all key-value pairs (`MDBX_NEXT`) -//! - [`IterDupKeys`]: For `DUPSORT` databases, yields first value per key -//! - [`IterDupVals`]: For `DUPSORT` databases, yields all values for one key -//! - [`IterDup`]: Nested iteration over `DUPSORT` databases +//! | Iterator | Yields | Use Case | +//! |----------|--------|----------| +//! | [`Iter`] | `(Key, Value)` | Base iterator, configurable cursor op | +//! | [`IterDup`] | `(Key, Value)` | Flat iteration over DUPSORT tables | +//! | [`IterDupOfKey`] | `Value` | Single-key DUPSORT iteration | +//! | [`IterDupFixed`] | `(Key, Value)` | Flat iteration over DUPFIXED tables | +//! | [`IterDupFixedOfKey`] | `Value` | Single-key DUPFIXED iteration | //! //! # Borrowing vs Owning //! @@ -54,10 +56,6 @@ //! println!("{:?} => {:?}", key, value); //! } //! ``` -//! -//! [`IterKeyVals`]: crate::tx::aliases::IterKeyVals -//! [`IterDupVals`]: crate::tx::aliases::IterDupVals -//! [`IterDupKeys`]: crate::tx::aliases::IterDupKeys mod base; pub use base::Iter; @@ -65,8 +63,53 @@ pub use base::Iter; mod dup; pub use dup::IterDup; +mod dup_key; +pub use dup_key::IterDupOfKey; + mod dupfixed; pub use dupfixed::IterDupFixed; mod dupfixed_key; pub use dupfixed_key::IterDupFixedOfKey; + +/// An item from a duplicate-key iterator. +/// +/// This enum avoids cloning the key for every value when iterating +/// over databases with duplicate keys. The key is only provided when +/// it changes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DupItem { + /// First value for a new key. + NewKey(K, V), + /// Additional value for the current key. + SameKey(V), +} + +impl DupItem { + /// Returns the value, consuming self. + pub fn into_value(self) -> V { + match self { + Self::NewKey(_, v) | Self::SameKey(v) => v, + } + } + + /// Returns a reference to the value. + pub const fn value(&self) -> &V { + match self { + Self::NewKey(_, v) | Self::SameKey(v) => v, + } + } + + /// Returns the key if this is a new key entry. + pub const fn key(&self) -> Option<&K> { + match self { + Self::NewKey(k, _) => Some(k), + Self::SameKey(_) => None, + } + } + + /// Returns true if this item represents a new key. + pub const fn is_new_key(&self) -> bool { + matches!(self, Self::NewKey(..)) + } +} diff --git a/src/tx/ops.rs b/src/tx/ops.rs index 1d47935..8e583ce 100644 --- a/src/tx/ops.rs +++ b/src/tx/ops.rs @@ -402,6 +402,25 @@ pub(crate) unsafe fn debug_assert_append( crate::tx::assertions::debug_assert_append(pagesize, flags, key, data, last_key.as_deref()); } +/// Get the count of duplicates for the current key. +/// +/// Returns the number of duplicate values for the key at the current cursor +/// position. For databases without `DUP_SORT`, this always returns 1. +/// +/// # Safety +/// +/// - `cursor` must be a valid, non-null cursor pointer. +/// - Must be called within a `with_txn_ptr` block. +#[inline(always)] +pub(crate) unsafe fn cursor_dup_count(cursor: *mut ffi::MDBX_cursor) -> MdbxResult { + let mut count: usize = 0; + // SAFETY: Caller guarantees cursor is valid. + match unsafe { ffi::mdbx_cursor_count(cursor, &mut count) } { + ffi::MDBX_SUCCESS => Ok(count), + err_code => Err(crate::MdbxError::from_err_code(err_code)), + } +} + /// All-in-one append_dup assertion: opens cursor, gets last dup, asserts, closes cursor. /// /// # Safety diff --git a/tests/cursor.rs b/tests/cursor.rs index 2a8215e..e2aa075 100644 --- a/tests/cursor.rs +++ b/tests/cursor.rs @@ -2,7 +2,7 @@ mod common; use common::{TestRoTxn, TestRwTxn, V1Factory, V2Factory}; use signet_libmdbx::{ - Cursor, DatabaseFlags, Environment, MdbxError, MdbxResult, ObjectLength, ReadResult, + Cursor, DatabaseFlags, DupItem, Environment, MdbxError, MdbxResult, ObjectLength, ReadResult, TransactionKind, WriteFlags, }; use std::{borrow::Cow, hint::black_box}; @@ -11,6 +11,29 @@ use tempfile::tempdir; /// Convenience type Result = ReadResult; +/// Helper to collect DupItem iterators into (key, value) pairs. +/// +/// This reconstructs the key for SameKey items from the most recent NewKey. +fn collect_dup_items( + iter: impl Iterator>>, +) -> ReadResult> { + let mut current_key: Option = None; + iter.map(|r| { + let item = r?; + match item { + DupItem::NewKey(k, v) => { + current_key = Some(k.clone()); + Ok((k, v)) + } + DupItem::SameKey(v) => { + let k = current_key.clone().expect("SameKey without prior NewKey"); + Ok((k, v)) + } + } + }) + .collect() +} + // ============================================================================= // Dual-variant tests (run for both V1 and V2) // ============================================================================= @@ -272,10 +295,10 @@ fn test_iter_empty_dup_database_impl( assert!(cursor.iter_start::<(), ()>().unwrap().next().is_none()); assert!(cursor.iter_from::<(), ()>(b"foo").unwrap().next().is_none()); assert!(cursor.iter_from::<(), ()>(b"foo").unwrap().next().is_none()); - assert!(cursor.iter_dup::<(), ()>().flatten().flatten().next().is_none()); - assert!(cursor.iter_dup_start::<(), ()>().unwrap().flatten().flatten().next().is_none()); - assert!(cursor.iter_dup_from::<(), ()>(b"foo").unwrap().flatten().flatten().next().is_none()); - assert!(cursor.iter_dup_of::<(), ()>(b"foo").unwrap().next().is_none()); + assert!(cursor.iter_dup::<(), ()>().next().is_none()); + assert!(cursor.iter_dup_start::<(), ()>().unwrap().next().is_none()); + assert!(cursor.iter_dup_from::<(), ()>(b"foo").unwrap().next().is_none()); + assert!(cursor.iter_dup_of::<()>(b"foo").unwrap().next().is_none()); } #[test] @@ -332,69 +355,45 @@ fn test_iter_dup_impl( let txn = begin_ro(&env).unwrap(); let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - assert_eq!(items, cursor.iter_dup().flatten().flatten().collect::>>().unwrap()); + assert_eq!(items, collect_dup_items(cursor.iter_dup()).unwrap()); + // After set(b"b"), cursor is at ("b", "1"). iter_dup() starts from NEXT value. + // With flat iteration, this means ("b", "2"), ("b", "3"), then all of "c" and "e". cursor.set::<()>(b"b").unwrap(); assert_eq!( - items.iter().copied().skip(6).collect::>(), - cursor.iter_dup().flatten().flatten().collect::>>().unwrap() + items.iter().copied().skip(4).collect::>(), + collect_dup_items(cursor.iter_dup()).unwrap() ); - assert_eq!( - items, - cursor.iter_dup_start().unwrap().flatten().flatten().collect::>>().unwrap() - ); + assert_eq!(items, collect_dup_items(cursor.iter_dup_start().unwrap()).unwrap()); assert_eq!( items.iter().copied().skip(3).collect::>(), - cursor - .iter_dup_from(b"b") - .unwrap() - .flatten() - .flatten() - .collect::>>() - .unwrap() + collect_dup_items(cursor.iter_dup_from(b"b").unwrap()).unwrap() ); assert_eq!( items.iter().copied().skip(3).collect::>(), - cursor - .iter_dup_from(b"ab") - .unwrap() - .flatten() - .flatten() - .collect::>>() - .unwrap() + collect_dup_items(cursor.iter_dup_from(b"ab").unwrap()).unwrap() ); assert_eq!( items.iter().copied().skip(9).collect::>(), - cursor - .iter_dup_from(b"d") - .unwrap() - .flatten() - .flatten() - .collect::>>() - .unwrap() + collect_dup_items(cursor.iter_dup_from(b"d").unwrap()).unwrap() ); assert_eq!( Vec::<([u8; 1], [u8; 1])>::new(), - cursor - .iter_dup_from(b"f") - .unwrap() - .flatten() - .flatten() - .collect::>>() - .unwrap() + collect_dup_items(cursor.iter_dup_from(b"f").unwrap()).unwrap() ); + // iter_dup_of yields just values, not (key, value) tuples assert_eq!( - items.iter().copied().skip(3).take(3).collect::>(), - cursor.iter_dup_of(b"b").unwrap().collect::>>().unwrap() + items.iter().copied().skip(3).take(3).map(|(_, v)| v).collect::>(), + cursor.iter_dup_of::<[u8; 1]>(b"b").unwrap().collect::>>().unwrap() ); - assert_eq!(0, cursor.iter_dup_of::<(), ()>(b"foo").unwrap().count()); + assert_eq!(0, cursor.iter_dup_of::<()>(b"foo").unwrap().count()); } #[test] @@ -424,7 +423,7 @@ fn test_iter_del_get_impl( assert_eq!( txn.cursor(db) .unwrap() - .iter_dup_of::<(), ()>(b"a") + .iter_dup_of::<()>(b"a") .unwrap() .collect::>>() .unwrap() @@ -446,14 +445,12 @@ fn test_iter_del_get_impl( let txn = begin_rw(&env).unwrap(); let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - assert_eq!( - items, - cursor.iter_dup_start().unwrap().flatten().flatten().collect::>>().unwrap() - ); + assert_eq!(items, collect_dup_items(cursor.iter_dup_start().unwrap()).unwrap()); + // iter_dup_of yields just values, not (key, value) tuples assert_eq!( - items.iter().copied().take(1).collect::>(), - cursor.iter_dup_of(b"a").unwrap().collect::>>().unwrap() + items.iter().copied().take(1).map(|(_, v)| v).collect::>(), + cursor.iter_dup_of::<[u8; 1]>(b"a").unwrap().collect::>>().unwrap() ); assert_eq!(cursor.set(b"a").unwrap(), Some(*b"1")); @@ -461,12 +458,7 @@ fn test_iter_del_get_impl( cursor.del().unwrap(); assert_eq!( - cursor - .iter_dup_of::<[u8; 1], [u8; 1]>(b"a") - .unwrap() - .collect::>>() - .unwrap() - .len(), + cursor.iter_dup_of::<[u8; 1]>(b"a").unwrap().collect::>>().unwrap().len(), 0 ); } @@ -893,7 +885,7 @@ fn test_iter_dupfixed_basic_impl( let mut cursor = txn.cursor(db).unwrap(); let results: Vec<(Vec, [u8; 4])> = - cursor.iter_dupfixed_start::, 4>().unwrap().map(|r| r.unwrap()).collect(); + collect_dup_items(cursor.iter_dupfixed_start::, [u8; 4]>().unwrap()).unwrap(); assert_eq!(results.len(), 5); assert_eq!(results[0], (b"key1".to_vec(), 1u32.to_le_bytes())); @@ -940,7 +932,7 @@ fn test_iter_dupfixed_from_impl( let mut cursor = txn.cursor(db).unwrap(); let results: Vec<(Vec, [u8; 4])> = - cursor.iter_dupfixed_from::, 4>(b"bbb").unwrap().map(|r| r.unwrap()).collect(); + collect_dup_items(cursor.iter_dupfixed_from::, [u8; 4]>(b"bbb").unwrap()).unwrap(); assert_eq!(results.len(), 3); assert_eq!(results[0], (b"bbb".to_vec(), 2u32.to_le_bytes())); @@ -978,7 +970,7 @@ fn test_iter_dupfixed_empty_impl( let mut cursor = txn.cursor(db).unwrap(); let results: Vec<(Vec, [u8; 4])> = - cursor.iter_dupfixed_start::, 4>().unwrap().map(|r| r.unwrap()).collect(); + collect_dup_items(cursor.iter_dupfixed_start::, [u8; 4]>().unwrap()).unwrap(); assert!(results.is_empty()); } @@ -1028,7 +1020,7 @@ fn test_iter_dupfixed_many_values_impl( let mut cursor = txn.cursor(db).unwrap(); let results: Vec<(Vec, [u8; 4])> = - cursor.iter_dupfixed_start::, 4>().unwrap().map(|r| r.unwrap()).collect(); + collect_dup_items(cursor.iter_dupfixed_start::, [u8; 4]>().unwrap()).unwrap(); // Verify count assert_eq!(results.len(), 1000); @@ -1083,7 +1075,7 @@ fn test_iter_dupfixed_from_nonexistent_key_impl( // Start from "bbb" which doesn't exist - should find "ccc" let results: Vec<(Vec, [u8; 4])> = - cursor.iter_dupfixed_from::, 4>(b"bbb").unwrap().map(|r| r.unwrap()).collect(); + collect_dup_items(cursor.iter_dupfixed_from::, [u8; 4]>(b"bbb").unwrap()).unwrap(); assert_eq!(results.len(), 1); assert_eq!(results[0], (b"ccc".to_vec(), 2u32.to_le_bytes())); @@ -1120,7 +1112,7 @@ fn test_iter_dupfixed_from_past_end_impl( // Start from "zzz" which is past all keys let results: Vec<(Vec, [u8; 4])> = - cursor.iter_dupfixed_from::, 4>(b"zzz").unwrap().map(|r| r.unwrap()).collect(); + collect_dup_items(cursor.iter_dupfixed_from::, [u8; 4]>(b"zzz").unwrap()).unwrap(); assert!(results.is_empty()); } @@ -1173,7 +1165,7 @@ fn test_iter_dupfixed_of_impl( let mut cursor = txn.cursor(db).unwrap(); let results: Vec<[u8; 4]> = - cursor.iter_dupfixed_of::<4>(b"key2").unwrap().map(|r| r.unwrap()).collect(); + cursor.iter_dupfixed_of::<[u8; 4]>(b"key2").unwrap().map(|r| r.unwrap()).collect(); // Should only contain key2's values assert_eq!(results.len(), 2); @@ -1214,7 +1206,7 @@ fn test_iter_dupfixed_of_nonexistent_key_impl( // Seek nonexistent key "bbb" - should return empty iterator let results: Vec<[u8; 4]> = - cursor.iter_dupfixed_of::<4>(b"bbb").unwrap().map(|r| r.unwrap()).collect(); + cursor.iter_dupfixed_of::<[u8; 4]>(b"bbb").unwrap().map(|r| r.unwrap()).collect(); assert!(results.is_empty()); } @@ -1264,7 +1256,7 @@ fn test_iter_dupfixed_of_many_values_impl( let mut cursor = txn.cursor(db).unwrap(); let results: Vec<[u8; 4]> = - cursor.iter_dupfixed_of::<4>(b"target").unwrap().map(|r| r.unwrap()).collect(); + cursor.iter_dupfixed_of::<[u8; 4]>(b"target").unwrap().map(|r| r.unwrap()).collect(); // Verify count - should be exactly 1000 assert_eq!(results.len(), 1000); @@ -1535,6 +1527,6 @@ mod append_debug_tests { let txn = env.begin_ro_sync().unwrap(); let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - let _ = cursor.iter_dupfixed_start::, 4>(); + let _ = cursor.iter_dupfixed_start::, [u8; 4]>(); } } diff --git a/tests/proptest_inputs.rs b/tests/proptest_inputs.rs index c3029d3..0644b93 100644 --- a/tests/proptest_inputs.rs +++ b/tests/proptest_inputs.rs @@ -480,8 +480,8 @@ proptest! { let mut cursor = txn.cursor(db).unwrap(); - // iter_dup_of should not panic - let result = cursor.iter_dup_of::, Vec>(&key); + // iter_dup_of should not panic (yields just values, not (key, value)) + let result = cursor.iter_dup_of::>(&key); prop_assert!(result.is_ok()); // Consuming the iterator should not panic @@ -503,14 +503,12 @@ proptest! { let mut cursor = txn.cursor(db).unwrap(); - // iter_dup_from should not panic + // iter_dup_from should not panic (now yields flat (key, value) pairs) let result = cursor.iter_dup_from::, Vec>(&key); prop_assert!(result.is_ok()); - // Consuming nested iterators should not panic - for inner in result.unwrap().flatten() { - let _ = inner.count(); - } + // Consuming iterator should not panic (no nested iteration anymore) + let _ = result.unwrap().count(); } /// Test iter_dup_of with arbitrary key does not panic (V2). @@ -527,7 +525,8 @@ proptest! { let mut cursor = txn.cursor(db).unwrap(); - let result = cursor.iter_dup_of::, Vec>(&key); + // iter_dup_of yields just values, not (key, value) + let result = cursor.iter_dup_of::>(&key); prop_assert!(result.is_ok()); let _ = result.unwrap().count(); @@ -548,12 +547,12 @@ proptest! { let mut cursor = txn.cursor(db).unwrap(); + // iter_dup_from now yields flat (key, value) pairs let result = cursor.iter_dup_from::, Vec>(&key); prop_assert!(result.is_ok()); - for inner in result.unwrap().flatten() { - let _ = inner.count(); - } + // No nested iteration anymore - just count the items + let _ = result.unwrap().count(); } } @@ -878,14 +877,10 @@ proptest! { // Skip if nothing was inserted prop_assume!(!inserted.is_empty()); - // Retrieve all values via iter_dup_of + // Retrieve all values via iter_dup_of (yields just values, not (key, value)) let mut cursor = txn.cursor(db).unwrap(); - let retrieved: Vec> = cursor - .iter_dup_of::, Vec>(&key) - .unwrap() - .filter_map(Result::ok) - .map(|(_, v)| v) - .collect(); + let retrieved: Vec> = + cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); // All inserted values should be retrieved (order is sorted by MDBX) inserted.sort(); @@ -924,13 +919,10 @@ proptest! { prop_assume!(!inserted.is_empty()); + // iter_dup_of yields just values, not (key, value) let mut cursor = txn.cursor(db).unwrap(); - let retrieved: Vec> = cursor - .iter_dup_of::, Vec>(&key) - .unwrap() - .filter_map(Result::ok) - .map(|(_, v)| v) - .collect(); + let retrieved: Vec> = + cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); inserted.sort(); let mut retrieved_sorted = retrieved.clone(); diff --git a/tests/transaction.rs b/tests/transaction.rs index 14e2a7c..435d256 100644 --- a/tests/transaction.rs +++ b/tests/transaction.rs @@ -79,8 +79,9 @@ fn test_put_get_del_multi_impl( let db = txn.open_db(None).unwrap(); { let mut cur = txn.cursor(db).unwrap(); - let iter = cur.iter_dup_of::<(), [u8; 4]>(b"key1").unwrap(); - let vals = iter.map(|x| x.unwrap()).map(|(_, x)| x).collect::>(); + // iter_dup_of now yields just values, not (key, value) tuples + let iter = cur.iter_dup_of::<[u8; 4]>(b"key1").unwrap(); + let vals = iter.map(|x| x.unwrap()).collect::>(); assert_eq!(vals, vec![*b"val1", *b"val2", *b"val3"]); } txn.commit().unwrap(); @@ -95,11 +96,12 @@ fn test_put_get_del_multi_impl( let db = txn.open_db(None).unwrap(); { let mut cur = txn.cursor(db).unwrap(); - let iter = cur.iter_dup_of::<(), [u8; 4]>(b"key1").unwrap(); - let vals = iter.map(|x| x.unwrap()).map(|(_, x)| x).collect::>(); + // iter_dup_of now yields just values, not (key, value) tuples + let iter = cur.iter_dup_of::<[u8; 4]>(b"key1").unwrap(); + let vals = iter.map(|x| x.unwrap()).collect::>(); assert_eq!(vals, vec![*b"val1", *b"val3"]); - let iter = cur.iter_dup_of::<[u8; 4], [u8; 4]>(b"key2").unwrap(); + let iter = cur.iter_dup_of::<[u8; 4]>(b"key2").unwrap(); assert_eq!(0, iter.count()); } txn.commit().unwrap();