From 1d028f87a411144e226c33fb319f89303ce66670 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 12 Jan 2026 14:54:02 +0000 Subject: [PATCH 1/7] wip[array]: pushdown struct validity on write Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/array.rs | 91 +++++++++++++++++++ vortex-array/src/arrays/struct_/mod.rs | 1 + vortex-array/src/arrays/struct_/vtable/mod.rs | 29 ++++-- vortex-array/src/compute/zip.rs | 2 +- vortex-layout/src/layouts/compact.rs | 12 ++- vortex-layout/src/layouts/dict/mod.rs | 1 - vortex-layout/src/layouts/struct_/mod.rs | 42 ++++++++- vortex-layout/src/layouts/table.rs | 8 +- 8 files changed, 162 insertions(+), 24 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 4cbeb38bbbb..75329d004b8 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::iter::once; +use std::ops::Not; use std::sync::Arc; use vortex_dtype::DType; @@ -18,10 +19,20 @@ use vortex_error::vortex_err; use crate::Array; use crate::ArrayRef; use crate::IntoArray; +use crate::compute::mask; use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable::ValidityHelper; +/// Metadata for StructArray serialization. +#[derive(Clone, prost::Message)] +pub struct StructMetadata { + /// true = child validity is a superset of struct validity (validity was pushed down) + /// false = default, no guarantee about relationship + #[prost(bool, tag = "1")] + pub(super) validity_pushed_down: bool, +} + /// A struct array that stores multiple named fields as columns, similar to a database row. /// /// This mirrors the Apache Arrow Struct array encoding and provides a columnar representation @@ -147,6 +158,9 @@ pub struct StructArray { pub(super) fields: Arc<[ArrayRef]>, pub(super) validity: Validity, pub(super) stats_set: ArrayStats, + /// true = child validity is a superset of struct validity (validity was pushed down) + /// false = default, no guarantee about relationship + pub(super) validity_pushed_down: bool, } pub struct StructArrayParts { @@ -286,6 +300,7 @@ impl StructArray { fields, validity, stats_set: Default::default(), + validity_pushed_down: false, } } @@ -473,4 +488,80 @@ impl StructArray { Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone()) } + + /// Returns whether validity has been pushed down into children. + /// + /// When true, child validity is a superset of struct validity (children include + /// the struct's nulls baked in). This is an optimization that allows readers to + /// skip combining struct+child validity when extracting fields. + pub fn has_validity_pushed_down(&self) -> bool { + self.validity_pushed_down + } + + /// Set the validity_pushed_down flag. + /// + /// For non-nullable structs, this is a no-op (flag stays false) since there's + /// no validity to push down. + /// + /// For nullable structs, setting this to true indicates that child validity + /// is a superset of struct validity (children include struct's nulls). + pub fn with_validity_pushed_down(mut self, validity_pushed_down: bool) -> Self { + // For non-nullable structs, the flag is meaningless - keep it false + if !self.dtype.is_nullable() { + return self; + } + self.validity_pushed_down = validity_pushed_down; + self + } + + /// Push struct validity down into each child field. + /// + /// For nullable structs with non-trivial validity, this applies the validity + /// mask to each **nullable** child field, making child validity a superset + /// of parent validity. Non-nullable children are left unchanged to preserve + /// their dtype. + /// + /// The struct validity is **preserved** (DType never changes). The + /// `validity_pushed_down` flag indicates that nullable children already include + /// the parent's nulls, so readers can skip combining validities for those fields. + /// + /// For non-nullable structs or trivial validity, this is essentially a no-op. + pub fn compact(&self) -> VortexResult { + // For non-nullable structs, nothing to push down + if !self.dtype.is_nullable() { + return Ok(self.clone()); + } + + // If validity is trivial (AllValid), nothing to push down + // but mark as pushed down since children trivially include parent validity + if self.validity.all_valid(self.len)? { + return Ok(self.clone().with_validity_pushed_down(true)); + } + + // Get the validity mask - mask() expects true = set to null, so we invert the validity + let validity_mask = self.validity_mask()?.not(); + + // Apply mask only to nullable children - non-nullable children cannot have their + // dtype changed, so we leave them alone + let new_fields: Vec = self + .fields() + .iter() + .map(|field| { + if field.dtype().is_nullable() { + mask(field.as_ref(), &validity_mask) + } else { + Ok(field.clone()) + } + }) + .collect::>()?; + + // Create new struct with same validity but updated children + Ok(StructArray::try_new( + self.names().clone(), + new_fields, + self.len(), + self.validity.clone(), // Keep original validity + )? + .with_validity_pushed_down(true)) + } } diff --git a/vortex-array/src/arrays/struct_/mod.rs b/vortex-array/src/arrays/struct_/mod.rs index 85123d8053a..f7ff8c07b1e 100644 --- a/vortex-array/src/arrays/struct_/mod.rs +++ b/vortex-array/src/arrays/struct_/mod.rs @@ -4,6 +4,7 @@ mod array; pub use array::StructArray; pub use array::StructArrayParts; +pub use array::StructMetadata; mod compute; mod vtable; diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index abc95c2d518..d61a3b323b5 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -13,10 +13,13 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::Canonical; -use crate::EmptyMetadata; +use crate::DeserializeMetadata; use crate::ExecutionCtx; use crate::IntoArray; +use crate::ProstMetadata; +use crate::SerializeMetadata; use crate::arrays::struct_::StructArray; +use crate::arrays::struct_::StructMetadata; use crate::arrays::struct_::vtable::rules::PARENT_RULES; use crate::buffer::BufferHandle; use crate::serde::ArrayChildren; @@ -40,7 +43,7 @@ vtable!(Struct); impl VTable for StructVTable { type Array = StructArray; - type Metadata = EmptyMetadata; + type Metadata = ProstMetadata; type ArrayVTable = Self; type OperationsVTable = Self; @@ -52,22 +55,25 @@ impl VTable for StructVTable { Self::ID } - fn metadata(_array: &StructArray) -> VortexResult { - Ok(EmptyMetadata) + fn metadata(array: &StructArray) -> VortexResult { + Ok(ProstMetadata(StructMetadata { + validity_pushed_down: array.validity_pushed_down, + })) } - fn serialize(_metadata: Self::Metadata) -> VortexResult>> { - Ok(Some(vec![])) + fn serialize(metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(metadata.serialize())) } - fn deserialize(_buffer: &[u8]) -> VortexResult { - Ok(EmptyMetadata) + fn deserialize(buffer: &[u8]) -> VortexResult { + let metadata = ::deserialize(buffer)?; + Ok(ProstMetadata(metadata)) } fn build( dtype: &DType, len: usize, - _metadata: &Self::Metadata, + metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, ) -> VortexResult { @@ -99,7 +105,10 @@ impl VTable for StructVTable { }) .try_collect()?; - StructArray::try_new_with_dtype(children, struct_dtype.clone(), len, validity) + Ok( + StructArray::try_new_with_dtype(children, struct_dtype.clone(), len, validity)? + .with_validity_pushed_down(metadata.validity_pushed_down), + ) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/compute/zip.rs b/vortex-array/src/compute/zip.rs index 49ad2b19a1d..e6f27e7539c 100644 --- a/vortex-array/src/compute/zip.rs +++ b/vortex-array/src/compute/zip.rs @@ -347,7 +347,7 @@ mod tests { let wrapped_result = zip(&wrapped1, &wrapped2, &mask).unwrap(); insta::assert_snapshot!(wrapped_result.display_tree(), @r" root: vortex.struct({nested=utf8?}, len=100) nbytes=1.66 kB (100.00%) - metadata: EmptyMetadata + metadata: StructMetadata { validity_pushed_down: false } nested: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%) [all_valid] metadata: EmptyMetadata buffer (align=1): 29 B (1.75%) diff --git a/vortex-layout/src/layouts/compact.rs b/vortex-layout/src/layouts/compact.rs index 6f55174f391..20785172594 100644 --- a/vortex-layout/src/layouts/compact.rs +++ b/vortex-layout/src/layouts/compact.rs @@ -123,19 +123,23 @@ impl CompactCompressor { .into_array() } Canonical::Struct(struct_array) => { + // Push validity into children before compressing + let compacted = struct_array.compact()?; + // recurse - let fields = struct_array + let fields = compacted .unmasked_fields() .iter() .map(|field| self.compress(field)) .collect::>>()?; StructArray::try_new( - struct_array.names().clone(), + compacted.names().clone(), fields, - struct_array.len(), - struct_array.validity().clone(), + compacted.len(), + compacted.validity().clone(), )? + .with_validity_pushed_down(compacted.has_validity_pushed_down()) .into_array() } Canonical::List(listview) => { diff --git a/vortex-layout/src/layouts/dict/mod.rs b/vortex-layout/src/layouts/dict/mod.rs index 4a29f04db80..13f28d51249 100644 --- a/vortex-layout/src/layouts/dict/mod.rs +++ b/vortex-layout/src/layouts/dict/mod.rs @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors mod reader; diff --git a/vortex-layout/src/layouts/struct_/mod.rs b/vortex-layout/src/layouts/struct_/mod.rs index bb2329ed73c..812db5629f1 100644 --- a/vortex-layout/src/layouts/struct_/mod.rs +++ b/vortex-layout/src/layouts/struct_/mod.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use reader::StructReader; use vortex_array::ArrayContext; use vortex_array::DeserializeMetadata; -use vortex_array::EmptyMetadata; +use vortex_array::ProstMetadata; use vortex_dtype::DType; use vortex_dtype::Field; use vortex_dtype::FieldMask; @@ -35,12 +35,21 @@ use crate::segments::SegmentId; use crate::segments::SegmentSource; use crate::vtable; +/// Metadata for StructLayout serialization. +#[derive(Clone, prost::Message)] +pub struct StructLayoutMetadata { + /// true = child validity is a superset of struct validity (validity was pushed down) + /// false = default, no guarantee about relationship + #[prost(bool, tag = "1")] + pub(crate) validity_pushed_down: bool, +} + vtable!(Struct); impl VTable for StructVTable { type Layout = StructLayout; type Encoding = StructLayoutEncoding; - type Metadata = EmptyMetadata; + type Metadata = ProstMetadata; fn id(_encoding: &Self::Encoding) -> LayoutId { LayoutId::new_ref("vortex.struct") @@ -58,8 +67,10 @@ impl VTable for StructVTable { &layout.dtype } - fn metadata(_layout: &Self::Layout) -> Self::Metadata { - EmptyMetadata + fn metadata(layout: &Self::Layout) -> Self::Metadata { + ProstMetadata(StructLayoutMetadata { + validity_pushed_down: layout.validity_pushed_down, + }) } fn segment_ids(_layout: &Self::Layout) -> Vec { @@ -145,7 +156,7 @@ impl VTable for StructVTable { _encoding: &Self::Encoding, dtype: &DType, row_count: u64, - _metadata: &::Output, + metadata: &::Output, _segment_ids: Vec, children: &dyn LayoutChildren, _ctx: &ArrayContext, @@ -166,6 +177,7 @@ impl VTable for StructVTable { row_count, dtype: dtype.clone(), children: children.to_arc(), + validity_pushed_down: metadata.validity_pushed_down, }) } @@ -196,6 +208,9 @@ pub struct StructLayout { row_count: u64, dtype: DType, children: Arc, + /// true = child validity is a superset of struct validity (validity was pushed down) + /// false = default, no guarantee about relationship + validity_pushed_down: bool, } impl StructLayout { @@ -204,6 +219,7 @@ impl StructLayout { row_count, dtype, children: OwnedLayoutChildren::layout_children(children), + validity_pushed_down: false, } } @@ -223,6 +239,22 @@ impl StructLayout { &self.children } + /// Returns whether validity has been pushed down into children. + pub fn has_validity_pushed_down(&self) -> bool { + self.validity_pushed_down + } + + /// Set the validity_pushed_down flag. + /// + /// For non-nullable structs, this is a no-op (flag stays false). + pub fn with_validity_pushed_down(mut self, validity_pushed_down: bool) -> Self { + if !self.dtype.is_nullable() { + return self; + } + self.validity_pushed_down = validity_pushed_down; + self + } + pub fn matching_fields(&self, field_mask: &[FieldMask], mut per_child: F) -> VortexResult<()> where F: FnMut(FieldMask, usize) -> VortexResult<()>, diff --git a/vortex-layout/src/layouts/table.rs b/vortex-layout/src/layouts/table.rs index 13ad7f8f51d..4a7f758b828 100644 --- a/vortex-layout/src/layouts/table.rs +++ b/vortex-layout/src/layouts/table.rs @@ -246,17 +246,19 @@ impl LayoutStrategy for TableStrategy { let columns_vec_stream = stream.map(move |chunk| { let (sequence_id, chunk) = chunk?; let mut sequence_pointer = sequence_id.descend(); - let struct_chunk = chunk.to_struct(); + // Push struct validity into children before decomposing + let compacted = chunk.to_struct().compact()?; let mut columns: Vec<(SequenceId, ArrayRef)> = Vec::new(); + // Validity column is still written (DType unchanged, struct still nullable) if is_nullable { columns.push(( sequence_pointer.advance(), - chunk.validity_mask()?.into_array(), + compacted.validity_mask()?.into_array(), )); } columns.extend( - struct_chunk + compacted .unmasked_fields() .iter() .map(|field| (sequence_pointer.advance(), field.to_array())), From 003256a88523d3bda0cdfa8ac0354ff5f1a629f9 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 13:46:36 +0000 Subject: [PATCH 2/7] wip Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/array.rs | 130 ++++++++++++- vortex-array/src/arrays/struct_/tests.rs | 191 ++++++++++++++++++++ vortex-array/src/expr/exprs/get_item.rs | 9 +- vortex-layout/src/layouts/dict/mod.rs | 1 + vortex-layout/src/layouts/struct_/reader.rs | 6 +- 5 files changed, 316 insertions(+), 21 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 75329d004b8..7269174a88d 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -19,6 +19,7 @@ use vortex_error::vortex_err; use crate::Array; use crate::ArrayRef; use crate::IntoArray; +use crate::builtins::ArrayBuiltins; use crate::compute::mask; use crate::stats::ArrayStats; use crate::validity::Validity; @@ -27,8 +28,10 @@ use crate::vtable::ValidityHelper; /// Metadata for StructArray serialization. #[derive(Clone, prost::Message)] pub struct StructMetadata { - /// true = child validity is a superset of struct validity (validity was pushed down) - /// false = default, no guarantee about relationship + /// If true, child validity is a superset of struct validity (validity was pushed down). + /// For nullable children, their validity already includes struct nulls. For non-nullable + /// children, we apply struct validity on field read. If false (default), no guarantee + /// about relationship - must intersect validities on read. #[prost(bool, tag = "1")] pub(super) validity_pushed_down: bool, } @@ -171,12 +174,74 @@ pub struct StructArrayParts { } impl StructArray { - /// Return the struct fields without the validity of the struct applied + /// Note this field may not have the validity of the parent struct applied. + /// Should use `masked_fields` instead, unless you know what you are doing. pub fn unmasked_fields(&self) -> &Arc<[ArrayRef]> { &self.fields } - /// Return the struct field without the validity of the struct applied + pub fn masked_fields(&self) -> VortexResult> { + if !self.dtype.is_nullable() { + // fields need not be masked + return Ok(self.fields.to_vec()); + } + + if self.has_validity_pushed_down() { + self.fields + .iter() + .cloned() + .map(|f| { + if f.dtype().is_nullable() { + Ok(f.into_array()) + } else { + let validity = self.validity().to_array(self.len); + f.mask(validity) + } + }) + .collect::>>() + } else { + // Apply struct validity to all fields + let struct_validity = self.validity().to_array(self.len); + self.fields + .iter() + .map(move |f| f.clone().mask(struct_validity.clone())) + .collect::>>() + } + } + + pub fn field_by_name(&self, name: impl AsRef) -> VortexResult { + let name = name.as_ref(); + self.field_by_name_opt(name)?.ok_or_else(|| { + vortex_err!( + "Field {name} not found in struct array with names {:?}", + self.names() + ) + }) + } + + pub fn field_by_name_opt(&self, name: impl AsRef) -> VortexResult> { + let name = name.as_ref(); + self.struct_fields() + .find(name) + .map(|idx| { + let field = self.fields[idx].clone(); + // Non-nullable struct: return field as-is (no struct validity to apply) + if !self.dtype.is_nullable() { + return Ok(field); + } + // Non-nullable field: always apply struct validity (even with validity_pushed_down, + // since we can't push validity to non-nullable fields) + if !field.dtype().is_nullable() { + return field.mask(self.validity().to_array(self.len)); + } + // Nullable field: return as-is (validity is either in the field or was pushed down) + Ok(field) + }) + .transpose() + } + + /// Note this field may not have the validity of the parent struct applied. + /// Should use `field_by_name` instead. pub fn unmasked_field_by_name(&self, name: impl AsRef) -> VortexResult<&ArrayRef> { let name = name.as_ref(); self.unmasked_field_by_name_opt(name).ok_or_else(|| { @@ -187,7 +252,8 @@ impl StructArray { }) } - /// Return the struct field without the validity of the struct applied + /// Note this field may not have the validity of the parent struct applied. + /// Should use `field_by_name_opt` instead. pub fn unmasked_field_by_name_opt(&self, name: impl AsRef) -> Option<&ArrayRef> { let name = name.as_ref(); self.struct_fields().find(name).map(|idx| &self.fields[idx]) @@ -495,13 +561,54 @@ impl StructArray { /// the struct's nulls baked in). This is an optimization that allows readers to /// skip combining struct+child validity when extracting fields. pub fn has_validity_pushed_down(&self) -> bool { + #[cfg(debug_assertions)] + if self.validity_pushed_down { + self.check_validity_pushed_down() + .vortex_expect("validity_pushed_down invariant violated"); + } self.validity_pushed_down } + /// Checks that the validity_pushed_down invariant holds. + /// + /// When `validity_pushed_down` is true, for every nullable child field, + /// the child's validity must be a superset of the struct's validity. + /// That is, wherever the struct is invalid (null), the child must also be invalid. + fn check_validity_pushed_down(&self) -> VortexResult<()> { + if !self.validity_pushed_down { + return Ok(()); + } + + let struct_validity = self.validity_mask()?; + + for (idx, field) in self.fields.iter().enumerate() { + // Only check nullable children - non-nullable children cannot have validity pushed down + if !field.dtype().is_nullable() { + continue; + } + + let child_validity = field.validity_mask()?; + + // Check invariant: struct_invalid => child_invalid + // Equivalently: (!struct_validity) & child_validity should be all-false + // If struct is invalid (false) but child is valid (true), that's a violation + let violation = &(!&struct_validity) & &child_validity; + if !violation.all_false() { + vortex_bail!( + "validity_pushed_down invariant violated for field {}: \ + struct has nulls at positions where child is valid", + idx + ); + } + } + + Ok(()) + } + /// Set the validity_pushed_down flag. /// - /// For non-nullable structs, this is a no-op (flag stays false) since there's - /// no validity to push down. + /// For non-nullable structs, this is a no-op (flag stays false) since there's no validity to + /// push down /// /// For nullable structs, setting this to true indicates that child validity /// is a superset of struct validity (children include struct's nulls). @@ -511,6 +618,13 @@ impl StructArray { return self; } self.validity_pushed_down = validity_pushed_down; + + #[cfg(debug_assertions)] + if validity_pushed_down { + self.check_validity_pushed_down() + .vortex_expect("validity_pushed_down invariant violated"); + } + self } @@ -544,7 +658,7 @@ impl StructArray { // Apply mask only to nullable children - non-nullable children cannot have their // dtype changed, so we leave them alone let new_fields: Vec = self - .fields() + .unmasked_fields() .iter() .map(|field| { if field.dtype().is_nullable() { diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index 71cba8261f0..bfd35841a25 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -8,6 +8,7 @@ use vortex_dtype::FieldNames; use vortex_dtype::Nullability; use vortex_dtype::PType; use vortex_error::VortexResult; +use vortex_mask::Mask; use crate::Array; use crate::IntoArray; @@ -150,3 +151,193 @@ fn test_uncompressed_size_in_bytes() -> VortexResult<()> { assert_eq!(uncompressed_size, Some(4000)); Ok(()) } + +#[test] +fn test_masked_fields_without_validity_pushed_down() -> VortexResult<()> { + // Create a nullable struct with struct-level nulls at positions 1 and 3 + // Fields are non-nullable, so struct validity should be applied to all fields + let field_a = PrimitiveArray::new(buffer![10i32, 20, 30, 40], Validity::NonNullable); + let field_b = PrimitiveArray::new(buffer![100i32, 200, 300, 400], Validity::NonNullable); + + let struct_validity = Validity::Array( + BoolArray::from_iter([true, false, true, false]).into_array(), // positions 1, 3 are null + ); + let expected_mask = Mask::from_iter([true, false, true, false]); + + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![field_a.into_array(), field_b.into_array()], + 4, + struct_validity, + )?; + + // Verify validity is NOT pushed down by default + assert!(!struct_array.has_validity_pushed_down()); + + // === Test masked_fields === + let masked = struct_array.masked_fields()?; + assert_eq!(masked.len(), 2); + + // Both fields should have struct validity applied + assert!(masked[0].dtype().is_nullable()); + assert_eq!(masked[0].validity_mask()?, expected_mask); + assert!(masked[1].dtype().is_nullable()); + assert_eq!(masked[1].validity_mask()?, expected_mask); + + // === Test field_by_name === + let field_a_by_name = struct_array.field_by_name("a")?; + assert!(field_a_by_name.dtype().is_nullable()); + assert_eq!(field_a_by_name.validity_mask()?, expected_mask); + + let field_b_by_name = struct_array.field_by_name("b")?; + assert!(field_b_by_name.dtype().is_nullable()); + assert_eq!(field_b_by_name.validity_mask()?, expected_mask); + + Ok(()) +} + +#[test] +fn test_masked_fields_with_validity_pushed_down() -> VortexResult<()> { + // Create nullable fields where validity has already been pushed down + // (child nulls include struct nulls at positions 1 and 3) + let field_a = PrimitiveArray::new( + buffer![10i32, 20, 30, 40], + Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()), + ); + let field_b = PrimitiveArray::new( + buffer![100i32, 200, 300, 400], + Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()), + ); + + let struct_validity = + Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()); + let expected_mask = Mask::from_iter([true, false, true, false]); + + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![field_a.into_array(), field_b.into_array()], + 4, + struct_validity, + )? + .with_validity_pushed_down(true); + + // Verify validity IS pushed down + assert!(struct_array.has_validity_pushed_down()); + + // === Test masked_fields === + let masked = struct_array.masked_fields()?; + assert_eq!(masked.len(), 2); + + // Nullable fields returned as-is (validity already pushed) + assert!(masked[0].dtype().is_nullable()); + assert_eq!(masked[0].validity_mask()?, expected_mask); + assert!(masked[1].dtype().is_nullable()); + assert_eq!(masked[1].validity_mask()?, expected_mask); + + // === Test field_by_name === + let field_a_by_name = struct_array.field_by_name("a")?; + assert!(field_a_by_name.dtype().is_nullable()); + assert_eq!(field_a_by_name.validity_mask()?, expected_mask); + + let field_b_by_name = struct_array.field_by_name("b")?; + assert!(field_b_by_name.dtype().is_nullable()); + assert_eq!(field_b_by_name.validity_mask()?, expected_mask); + + Ok(()) +} + +#[test] +fn test_masked_fields_mixed_nullability_with_pushed_down() -> VortexResult<()> { + // Create a struct with mixed nullable and non-nullable fields + // Nullable field has validity pushed down, non-nullable field needs masking + let nullable_field = PrimitiveArray::new( + buffer![10i32, 20, 30, 40], + Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()), + ); + let non_nullable_field = + PrimitiveArray::new(buffer![100i32, 200, 300, 400], Validity::NonNullable); + + let struct_validity = + Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()); + let expected_mask = Mask::from_iter([true, false, true, false]); + + let struct_array = StructArray::try_new( + FieldNames::from(["nullable", "non_nullable"]), + vec![nullable_field.into_array(), non_nullable_field.into_array()], + 4, + struct_validity, + )? + .with_validity_pushed_down(true); + + assert!(struct_array.has_validity_pushed_down()); + + // === Test masked_fields === + let masked = struct_array.masked_fields()?; + + // Nullable field: returned as-is (validity already pushed down) + assert!(masked[0].dtype().is_nullable()); + assert_eq!(masked[0].validity_mask()?, expected_mask); + + // Non-nullable field: struct validity applied (becomes nullable) + assert!(masked[1].dtype().is_nullable()); + assert_eq!(masked[1].validity_mask()?, expected_mask); + + // === Test field_by_name === + let nullable_by_name = struct_array.field_by_name("nullable")?; + assert!(nullable_by_name.dtype().is_nullable()); + assert_eq!(nullable_by_name.validity_mask()?, expected_mask); + + let non_nullable_by_name = struct_array.field_by_name("non_nullable")?; + assert!(non_nullable_by_name.dtype().is_nullable()); + assert_eq!(non_nullable_by_name.validity_mask()?, expected_mask); + + Ok(()) +} + +#[test] +fn test_masked_fields_non_nullable_struct() -> VortexResult<()> { + // Non-nullable struct: fields should be returned unchanged + let field_a = PrimitiveArray::new(buffer![10i32, 20, 30, 40], Validity::NonNullable); + let field_b = PrimitiveArray::new( + buffer![100i32, 200, 300, 400], + Validity::Array(BoolArray::from_iter([true, true, false, true]).into_array()), + ); + let expected_mask_all_valid = Mask::new_true(4); + let expected_mask_b = Mask::from_iter([true, true, false, true]); + + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![field_a.into_array(), field_b.into_array()], + 4, + Validity::NonNullable, + )?; + + // Non-nullable struct cannot have validity pushed down + assert!(!struct_array.has_validity_pushed_down()); + + // with_validity_pushed_down should be a no-op for non-nullable structs + let struct_array = struct_array.with_validity_pushed_down(true); + assert!(!struct_array.has_validity_pushed_down()); + + // === Test masked_fields === + let masked = struct_array.masked_fields()?; + + // Non-nullable field: stays non-nullable (no struct validity to apply) + assert!(!masked[0].dtype().is_nullable()); + assert_eq!(masked[0].validity_mask()?, expected_mask_all_valid); + + // Nullable field: keeps its own validity + assert!(masked[1].dtype().is_nullable()); + assert_eq!(masked[1].validity_mask()?, expected_mask_b); + + // === Test field_by_name === + let field_a_by_name = struct_array.field_by_name("a")?; + assert!(!field_a_by_name.dtype().is_nullable()); + assert_eq!(field_a_by_name.validity_mask()?, expected_mask_all_valid); + + let field_b_by_name = struct_array.field_by_name("b")?; + assert!(field_b_by_name.dtype().is_nullable()); + assert_eq!(field_b_by_name.validity_mask()?, expected_mask_b); + + Ok(()) +} diff --git a/vortex-array/src/expr/exprs/get_item.rs b/vortex-array/src/expr/exprs/get_item.rs index e72b8036a0c..3a606b13f4e 100644 --- a/vortex-array/src/expr/exprs/get_item.rs +++ b/vortex-array/src/expr/exprs/get_item.rs @@ -16,7 +16,6 @@ use vortex_proto::expr as pb; use crate::arrays::StructArray; use crate::builtins::ExprBuiltins; -use crate::compute::mask; use crate::expr::Arity; use crate::expr::ChildName; use crate::expr::EmptyOptions; @@ -111,13 +110,7 @@ impl VTable for GetItem { .pop() .vortex_expect("missing input for GetItem expression") .execute::(args.ctx)?; - let field = input.unmasked_field_by_name(field_name).cloned()?; - - match input.dtype().nullability() { - Nullability::NonNullable => Ok(field), - Nullability::Nullable => mask(&field, &input.validity_mask()?.not()), - }? - .execute(args.ctx) + input.field_by_name(field_name)?.execute(args.ctx) } fn reduce( diff --git a/vortex-layout/src/layouts/dict/mod.rs b/vortex-layout/src/layouts/dict/mod.rs index 13f28d51249..4a29f04db80 100644 --- a/vortex-layout/src/layouts/dict/mod.rs +++ b/vortex-layout/src/layouts/dict/mod.rs @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors mod reader; diff --git a/vortex-layout/src/layouts/struct_/reader.rs b/vortex-layout/src/layouts/struct_/reader.rs index 1e256272e6b..8bfe8ea14ed 100644 --- a/vortex-layout/src/layouts/struct_/reader.rs +++ b/vortex-layout/src/layouts/struct_/reader.rs @@ -166,11 +166,7 @@ impl StructReader { let mut partitioned = partition( expr.clone(), self.dtype(), - annotate_scope_access( - self.dtype() - .as_struct_fields_opt() - .vortex_expect("We know it's a struct DType"), - ), + annotate_scope_access(self.dtype().as_struct_fields()), ) .vortex_expect("We should not fail to partition expression over struct fields"); From 12b6c687b3d0607cac6edddbd2a4496a270d8388 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 14:28:52 +0000 Subject: [PATCH 3/7] fix tests Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/array.rs | 2 +- vortex-array/src/arrays/struct_/tests.rs | 239 +++++++---------------- vortex-array/src/expr/exprs/get_item.rs | 1 - vortex-layout/src/layouts/struct_/mod.rs | 42 +--- 4 files changed, 76 insertions(+), 208 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 7269174a88d..707fe0b1603 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -32,7 +32,7 @@ pub struct StructMetadata { /// For nullable children, their validity already includes struct nulls. For non-nullable /// children, we apply struct validity on field read. If false (default), no guarantee /// about relationship - must intersect validities on read. - #[prost(bool, tag = "1")] + #[prost(bool, tag = "1", default = false)] pub(super) validity_pushed_down: bool, } diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index bfd35841a25..88a6791bce5 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Not; + +use rstest::rstest; use vortex_buffer::buffer; use vortex_dtype::DType; use vortex_dtype::FieldName; @@ -8,7 +11,6 @@ use vortex_dtype::FieldNames; use vortex_dtype::Nullability; use vortex_dtype::PType; use vortex_error::VortexResult; -use vortex_mask::Mask; use crate::Array; use crate::IntoArray; @@ -152,192 +154,91 @@ fn test_uncompressed_size_in_bytes() -> VortexResult<()> { Ok(()) } -#[test] -fn test_masked_fields_without_validity_pushed_down() -> VortexResult<()> { - // Create a nullable struct with struct-level nulls at positions 1 and 3 - // Fields are non-nullable, so struct validity should be applied to all fields - let field_a = PrimitiveArray::new(buffer![10i32, 20, 30, 40], Validity::NonNullable); - let field_b = PrimitiveArray::new(buffer![100i32, 200, 300, 400], Validity::NonNullable); - - let struct_validity = Validity::Array( - BoolArray::from_iter([true, false, true, false]).into_array(), // positions 1, 3 are null - ); - let expected_mask = Mask::from_iter([true, false, true, false]); - - let struct_array = StructArray::try_new( - FieldNames::from(["a", "b"]), - vec![field_a.into_array(), field_b.into_array()], - 4, - struct_validity, - )?; - - // Verify validity is NOT pushed down by default - assert!(!struct_array.has_validity_pushed_down()); - - // === Test masked_fields === - let masked = struct_array.masked_fields()?; - assert_eq!(masked.len(), 2); - - // Both fields should have struct validity applied - assert!(masked[0].dtype().is_nullable()); - assert_eq!(masked[0].validity_mask()?, expected_mask); - assert!(masked[1].dtype().is_nullable()); - assert_eq!(masked[1].validity_mask()?, expected_mask); - - // === Test field_by_name === - let field_a_by_name = struct_array.field_by_name("a")?; - assert!(field_a_by_name.dtype().is_nullable()); - assert_eq!(field_a_by_name.validity_mask()?, expected_mask); - - let field_b_by_name = struct_array.field_by_name("b")?; - assert!(field_b_by_name.dtype().is_nullable()); - assert_eq!(field_b_by_name.validity_mask()?, expected_mask); - - Ok(()) +// Field validity must include struct null positions for pushed_down invariant. +// STRUCT: nulls at positions 1, 3 +// FIELD: nulls at positions 1, 2, 3 (superset of struct nulls) +const STRUCT_VALIDITY: [bool; 4] = [true, false, true, false]; +const FIELD_VALIDITY: [bool; 4] = [true, false, false, false]; + +fn field_validity(nullable: bool) -> Validity { + if nullable { + Validity::Array(BoolArray::from_iter(FIELD_VALIDITY).into_array()) + } else { + Validity::NonNullable + } } -#[test] -fn test_masked_fields_with_validity_pushed_down() -> VortexResult<()> { - // Create nullable fields where validity has already been pushed down - // (child nulls include struct nulls at positions 1 and 3) - let field_a = PrimitiveArray::new( - buffer![10i32, 20, 30, 40], - Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()), - ); - let field_b = PrimitiveArray::new( - buffer![100i32, 200, 300, 400], - Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()), - ); - - let struct_validity = - Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()); - let expected_mask = Mask::from_iter([true, false, true, false]); +fn struct_validity(nullable: bool) -> Validity { + if nullable { + Validity::Array(BoolArray::from_iter(STRUCT_VALIDITY).into_array()) + } else { + Validity::NonNullable + } +} - let struct_array = StructArray::try_new( +#[rstest] +#[case::both_non_nullable_struct_non_nullable(false, false, false)] +#[case::both_non_nullable_struct_nullable(false, false, true)] +#[case::field_a_nullable_struct_non_nullable(true, false, false)] +#[case::field_a_nullable_struct_nullable(true, false, true)] +#[case::field_b_nullable_struct_non_nullable(false, true, false)] +#[case::field_b_nullable_struct_nullable(false, true, true)] +#[case::both_nullable_struct_non_nullable(true, true, false)] +#[case::both_nullable_struct_nullable(true, true, true)] +fn test_masked_fields( + #[case] field_a_nullable: bool, + #[case] field_b_nullable: bool, + #[case] struct_nullable: bool, + #[values(false, true)] should_compact: bool, +) -> VortexResult<()> { + let field_a_val = field_validity(field_a_nullable); + let field_b_val = field_validity(field_b_nullable); + let struct_val = struct_validity(struct_nullable); + let field_a = PrimitiveArray::new(buffer![10i32, 20, 30, 40], field_a_val.clone()); + let field_b = PrimitiveArray::new(buffer![100i32, 200, 300, 400], field_b_val.clone()); + + let mut struct_array = StructArray::try_new( FieldNames::from(["a", "b"]), vec![field_a.into_array(), field_b.into_array()], 4, - struct_validity, - )? - .with_validity_pushed_down(true); - - // Verify validity IS pushed down - assert!(struct_array.has_validity_pushed_down()); - - // === Test masked_fields === - let masked = struct_array.masked_fields()?; - assert_eq!(masked.len(), 2); - - // Nullable fields returned as-is (validity already pushed) - assert!(masked[0].dtype().is_nullable()); - assert_eq!(masked[0].validity_mask()?, expected_mask); - assert!(masked[1].dtype().is_nullable()); - assert_eq!(masked[1].validity_mask()?, expected_mask); - - // === Test field_by_name === - let field_a_by_name = struct_array.field_by_name("a")?; - assert!(field_a_by_name.dtype().is_nullable()); - assert_eq!(field_a_by_name.validity_mask()?, expected_mask); - - let field_b_by_name = struct_array.field_by_name("b")?; - assert!(field_b_by_name.dtype().is_nullable()); - assert_eq!(field_b_by_name.validity_mask()?, expected_mask); - - Ok(()) -} - -#[test] -fn test_masked_fields_mixed_nullability_with_pushed_down() -> VortexResult<()> { - // Create a struct with mixed nullable and non-nullable fields - // Nullable field has validity pushed down, non-nullable field needs masking - let nullable_field = PrimitiveArray::new( - buffer![10i32, 20, 30, 40], - Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()), - ); - let non_nullable_field = - PrimitiveArray::new(buffer![100i32, 200, 300, 400], Validity::NonNullable); - - let struct_validity = - Validity::Array(BoolArray::from_iter([true, false, true, false]).into_array()); - let expected_mask = Mask::from_iter([true, false, true, false]); - - let struct_array = StructArray::try_new( - FieldNames::from(["nullable", "non_nullable"]), - vec![nullable_field.into_array(), non_nullable_field.into_array()], - 4, - struct_validity, - )? - .with_validity_pushed_down(true); - - assert!(struct_array.has_validity_pushed_down()); - - // === Test masked_fields === - let masked = struct_array.masked_fields()?; - - // Nullable field: returned as-is (validity already pushed down) - assert!(masked[0].dtype().is_nullable()); - assert_eq!(masked[0].validity_mask()?, expected_mask); - - // Non-nullable field: struct validity applied (becomes nullable) - assert!(masked[1].dtype().is_nullable()); - assert_eq!(masked[1].validity_mask()?, expected_mask); - - // === Test field_by_name === - let nullable_by_name = struct_array.field_by_name("nullable")?; - assert!(nullable_by_name.dtype().is_nullable()); - assert_eq!(nullable_by_name.validity_mask()?, expected_mask); + struct_val.clone(), + )?; - let non_nullable_by_name = struct_array.field_by_name("non_nullable")?; - assert!(non_nullable_by_name.dtype().is_nullable()); - assert_eq!(non_nullable_by_name.validity_mask()?, expected_mask); + let before_dtype = struct_array.dtype().clone(); - Ok(()) -} + if should_compact { + struct_array = struct_array.compact()?; + } -#[test] -fn test_masked_fields_non_nullable_struct() -> VortexResult<()> { - // Non-nullable struct: fields should be returned unchanged - let field_a = PrimitiveArray::new(buffer![10i32, 20, 30, 40], Validity::NonNullable); - let field_b = PrimitiveArray::new( - buffer![100i32, 200, 300, 400], - Validity::Array(BoolArray::from_iter([true, true, false, true]).into_array()), - ); - let expected_mask_all_valid = Mask::new_true(4); - let expected_mask_b = Mask::from_iter([true, true, false, true]); + assert_eq!(struct_array.dtype(), &before_dtype); + if struct_val.nullability().is_nullable() { + assert_eq!(struct_array.has_validity_pushed_down(), should_compact); + } - let struct_array = StructArray::try_new( - FieldNames::from(["a", "b"]), - vec![field_a.into_array(), field_b.into_array()], - 4, - Validity::NonNullable, - )?; + assert_eq!(struct_array.validity()?, struct_val); - // Non-nullable struct cannot have validity pushed down - assert!(!struct_array.has_validity_pushed_down()); + let combined_a = (field_a_val.nullability().is_nullable() + || struct_val.nullability().is_nullable()) + .then(|| field_a_val.mask(&struct_val.to_mask(struct_array.len()).not())) + .unwrap_or(Validity::NonNullable); - // with_validity_pushed_down should be a no-op for non-nullable structs - let struct_array = struct_array.with_validity_pushed_down(true); - assert!(!struct_array.has_validity_pushed_down()); + let combined_b = (field_b_val.nullability().is_nullable() + || struct_val.nullability().is_nullable()) + .then(|| field_b_val.mask(&struct_val.to_mask(struct_array.len()).not())) + .unwrap_or(Validity::NonNullable); - // === Test masked_fields === + // Test masked_fields let masked = struct_array.masked_fields()?; + assert_eq!(masked.len(), 2); + assert_eq!(masked[0].validity()?, combined_a); + assert_eq!(masked[1].validity()?, combined_b); - // Non-nullable field: stays non-nullable (no struct validity to apply) - assert!(!masked[0].dtype().is_nullable()); - assert_eq!(masked[0].validity_mask()?, expected_mask_all_valid); - - // Nullable field: keeps its own validity - assert!(masked[1].dtype().is_nullable()); - assert_eq!(masked[1].validity_mask()?, expected_mask_b); - - // === Test field_by_name === + // Test field_by_name let field_a_by_name = struct_array.field_by_name("a")?; - assert!(!field_a_by_name.dtype().is_nullable()); - assert_eq!(field_a_by_name.validity_mask()?, expected_mask_all_valid); + assert_eq!(field_a_by_name.validity()?, combined_a); let field_b_by_name = struct_array.field_by_name("b")?; - assert!(field_b_by_name.dtype().is_nullable()); - assert_eq!(field_b_by_name.validity_mask()?, expected_mask_b); + assert_eq!(field_b_by_name.validity()?, combined_b); Ok(()) } diff --git a/vortex-array/src/expr/exprs/get_item.rs b/vortex-array/src/expr/exprs/get_item.rs index 3a606b13f4e..c3ced54a70e 100644 --- a/vortex-array/src/expr/exprs/get_item.rs +++ b/vortex-array/src/expr/exprs/get_item.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::fmt::Formatter; -use std::ops::Not; use prost::Message; use vortex_dtype::DType; diff --git a/vortex-layout/src/layouts/struct_/mod.rs b/vortex-layout/src/layouts/struct_/mod.rs index 812db5629f1..bb2329ed73c 100644 --- a/vortex-layout/src/layouts/struct_/mod.rs +++ b/vortex-layout/src/layouts/struct_/mod.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use reader::StructReader; use vortex_array::ArrayContext; use vortex_array::DeserializeMetadata; -use vortex_array::ProstMetadata; +use vortex_array::EmptyMetadata; use vortex_dtype::DType; use vortex_dtype::Field; use vortex_dtype::FieldMask; @@ -35,21 +35,12 @@ use crate::segments::SegmentId; use crate::segments::SegmentSource; use crate::vtable; -/// Metadata for StructLayout serialization. -#[derive(Clone, prost::Message)] -pub struct StructLayoutMetadata { - /// true = child validity is a superset of struct validity (validity was pushed down) - /// false = default, no guarantee about relationship - #[prost(bool, tag = "1")] - pub(crate) validity_pushed_down: bool, -} - vtable!(Struct); impl VTable for StructVTable { type Layout = StructLayout; type Encoding = StructLayoutEncoding; - type Metadata = ProstMetadata; + type Metadata = EmptyMetadata; fn id(_encoding: &Self::Encoding) -> LayoutId { LayoutId::new_ref("vortex.struct") @@ -67,10 +58,8 @@ impl VTable for StructVTable { &layout.dtype } - fn metadata(layout: &Self::Layout) -> Self::Metadata { - ProstMetadata(StructLayoutMetadata { - validity_pushed_down: layout.validity_pushed_down, - }) + fn metadata(_layout: &Self::Layout) -> Self::Metadata { + EmptyMetadata } fn segment_ids(_layout: &Self::Layout) -> Vec { @@ -156,7 +145,7 @@ impl VTable for StructVTable { _encoding: &Self::Encoding, dtype: &DType, row_count: u64, - metadata: &::Output, + _metadata: &::Output, _segment_ids: Vec, children: &dyn LayoutChildren, _ctx: &ArrayContext, @@ -177,7 +166,6 @@ impl VTable for StructVTable { row_count, dtype: dtype.clone(), children: children.to_arc(), - validity_pushed_down: metadata.validity_pushed_down, }) } @@ -208,9 +196,6 @@ pub struct StructLayout { row_count: u64, dtype: DType, children: Arc, - /// true = child validity is a superset of struct validity (validity was pushed down) - /// false = default, no guarantee about relationship - validity_pushed_down: bool, } impl StructLayout { @@ -219,7 +204,6 @@ impl StructLayout { row_count, dtype, children: OwnedLayoutChildren::layout_children(children), - validity_pushed_down: false, } } @@ -239,22 +223,6 @@ impl StructLayout { &self.children } - /// Returns whether validity has been pushed down into children. - pub fn has_validity_pushed_down(&self) -> bool { - self.validity_pushed_down - } - - /// Set the validity_pushed_down flag. - /// - /// For non-nullable structs, this is a no-op (flag stays false). - pub fn with_validity_pushed_down(mut self, validity_pushed_down: bool) -> Self { - if !self.dtype.is_nullable() { - return self; - } - self.validity_pushed_down = validity_pushed_down; - self - } - pub fn matching_fields(&self, field_mask: &[FieldMask], mut per_child: F) -> VortexResult<()> where F: FnMut(FieldMask, usize) -> VortexResult<()>, From c55829362ef6bb70a605b89704147a4c853f3dd5 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 15:56:52 +0000 Subject: [PATCH 4/7] fix Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/array.rs | 32 ++++++++++++------- vortex-array/src/arrays/struct_/vtable/mod.rs | 10 ++++-- vortex-layout/src/layouts/compact.rs | 18 +++++++---- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 707fe0b1603..e3ba57cf997 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -209,6 +209,8 @@ impl StructArray { } } + /// Return the struct field with name `name` with the struct validity already applied. + /// If the struct has no field with that `name` an error is returned. pub fn field_by_name(&self, name: impl AsRef) -> VortexResult { let name = name.as_ref(); self.field_by_name_opt(name)?.ok_or_else(|| { @@ -219,6 +221,8 @@ impl StructArray { }) } + /// Return the struct field with name `name` with the struct validity already applied. + /// If the struct has no field with that `name` Ok(None) is returned. pub fn field_by_name_opt(&self, name: impl AsRef) -> VortexResult> { let name = name.as_ref(); self.struct_fields() @@ -563,7 +567,7 @@ impl StructArray { pub fn has_validity_pushed_down(&self) -> bool { #[cfg(debug_assertions)] if self.validity_pushed_down { - self.check_validity_pushed_down() + self.validate_validity_pushed_down() .vortex_expect("validity_pushed_down invariant violated"); } self.validity_pushed_down @@ -574,7 +578,7 @@ impl StructArray { /// When `validity_pushed_down` is true, for every nullable child field, /// the child's validity must be a superset of the struct's validity. /// That is, wherever the struct is invalid (null), the child must also be invalid. - fn check_validity_pushed_down(&self) -> VortexResult<()> { + fn validate_validity_pushed_down(&self) -> VortexResult<()> { if !self.validity_pushed_down { return Ok(()); } @@ -612,7 +616,7 @@ impl StructArray { /// /// For nullable structs, setting this to true indicates that child validity /// is a superset of struct validity (children include struct's nulls). - pub fn with_validity_pushed_down(mut self, validity_pushed_down: bool) -> Self { + pub unsafe fn with_validity_pushed_down(mut self, validity_pushed_down: bool) -> Self { // For non-nullable structs, the flag is meaningless - keep it false if !self.dtype.is_nullable() { return self; @@ -621,7 +625,7 @@ impl StructArray { #[cfg(debug_assertions)] if validity_pushed_down { - self.check_validity_pushed_down() + self.validate_validity_pushed_down() .vortex_expect("validity_pushed_down invariant violated"); } @@ -649,7 +653,8 @@ impl StructArray { // If validity is trivial (AllValid), nothing to push down // but mark as pushed down since children trivially include parent validity if self.validity.all_valid(self.len)? { - return Ok(self.clone().with_validity_pushed_down(true)); + // # Safety no validity to push down + return Ok(unsafe { self.clone().with_validity_pushed_down(true) }); } // Get the validity mask - mask() expects true = set to null, so we invert the validity @@ -670,12 +675,15 @@ impl StructArray { .collect::>()?; // Create new struct with same validity but updated children - Ok(StructArray::try_new( - self.names().clone(), - new_fields, - self.len(), - self.validity.clone(), // Keep original validity - )? - .with_validity_pushed_down(true)) + // # Safety mask of struct validity applied to each child. + Ok(unsafe { + StructArray::try_new( + self.names().clone(), + new_fields, + self.len(), + self.validity.clone(), // Keep original validity + )? + .with_validity_pushed_down(true) + }) } } diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index d61a3b323b5..4b9928776ba 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -105,10 +105,14 @@ impl VTable for StructVTable { }) .try_collect()?; - Ok( + // # Safety + // + // The file was written with fields pushed down, if the file was corrupted this + // will result in invalid results, but no unsoundness. + Ok(unsafe { StructArray::try_new_with_dtype(children, struct_dtype.clone(), len, validity)? - .with_validity_pushed_down(metadata.validity_pushed_down), - ) + .with_validity_pushed_down(metadata.validity_pushed_down) + }) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-layout/src/layouts/compact.rs b/vortex-layout/src/layouts/compact.rs index 20785172594..9f6becbb26c 100644 --- a/vortex-layout/src/layouts/compact.rs +++ b/vortex-layout/src/layouts/compact.rs @@ -133,13 +133,17 @@ impl CompactCompressor { .map(|field| self.compress(field)) .collect::>>()?; - StructArray::try_new( - compacted.names().clone(), - fields, - compacted.len(), - compacted.validity().clone(), - )? - .with_validity_pushed_down(compacted.has_validity_pushed_down()) + // # Safety + // compressing the fields must not affect the validity those fields. + unsafe { + StructArray::try_new( + compacted.names().clone(), + fields, + compacted.len(), + compacted.validity().clone(), + )? + .with_validity_pushed_down(compacted.has_validity_pushed_down()) + } .into_array() } Canonical::List(listview) => { From 648ab8b8fd5f6d725aacc4d18205f07d0819fb13 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 18:39:34 +0000 Subject: [PATCH 5/7] clean Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/array.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index e3ba57cf997..798d9a8507b 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -616,6 +616,11 @@ impl StructArray { /// /// For nullable structs, setting this to true indicates that child validity /// is a superset of struct validity (children include struct's nulls). + /// + /// # Safety + /// + /// If set all non-nullable field must have their nullability be a superset of the struct + /// validity pub unsafe fn with_validity_pushed_down(mut self, validity_pushed_down: bool) -> Self { // For non-nullable structs, the flag is meaningless - keep it false if !self.dtype.is_nullable() { From 390f5926c8f4ea6d6667fc2fa5a10c0d3a576634 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 18:42:10 +0000 Subject: [PATCH 6/7] clean Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/tests.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index 88a6791bce5..8e78f581d7b 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -217,15 +217,19 @@ fn test_masked_fields( assert_eq!(struct_array.validity()?, struct_val); - let combined_a = (field_a_val.nullability().is_nullable() - || struct_val.nullability().is_nullable()) - .then(|| field_a_val.mask(&struct_val.to_mask(struct_array.len()).not())) - .unwrap_or(Validity::NonNullable); - - let combined_b = (field_b_val.nullability().is_nullable() - || struct_val.nullability().is_nullable()) - .then(|| field_b_val.mask(&struct_val.to_mask(struct_array.len()).not())) - .unwrap_or(Validity::NonNullable); + let combined_a = + if field_a_val.nullability().is_nullable() || struct_val.nullability().is_nullable() { + field_a_val.mask(&struct_val.to_mask(struct_array.len()).not()) + } else { + Validity::NonNullable + }; + + let combined_b = + if field_b_val.nullability().is_nullable() || struct_val.nullability().is_nullable() { + field_b_val.mask(&struct_val.to_mask(struct_array.len()).not()) + } else { + Validity::NonNullable + }; // Test masked_fields let masked = struct_array.masked_fields()?; From a43054c576eddab1a671c55626fdba3cd677921d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 19:03:00 +0000 Subject: [PATCH 7/7] fix Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/struct_/array.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 798d9a8507b..2e2476e7d30 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -578,6 +578,7 @@ impl StructArray { /// When `validity_pushed_down` is true, for every nullable child field, /// the child's validity must be a superset of the struct's validity. /// That is, wherever the struct is invalid (null), the child must also be invalid. + #[cfg(debug_assertions)] fn validate_validity_pushed_down(&self) -> VortexResult<()> { if !self.validity_pushed_down { return Ok(());