Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions vortex-array/src/expr/exprs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub(crate) mod operators;
pub(crate) mod pack;
pub(crate) mod root;
pub(crate) mod select;
pub(crate) mod stats;

pub use between::*;
pub use binary::*;
Expand All @@ -34,3 +35,4 @@ pub use operators::*;
pub use pack::*;
pub use root::*;
pub use select::*;
pub use stats::*;
84 changes: 84 additions & 0 deletions vortex-array/src/expr/exprs/stats.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_dtype::DType;
use vortex_error::VortexResult;
use vortex_error::vortex_err;
use vortex_vector::Datum;
use vortex_vector::Scalar;

use crate::Array;
use crate::ArrayRef;
use crate::IntoArray;
use crate::arrays::ConstantArray;
use crate::expr::Arity;
use crate::expr::ChildName;
use crate::expr::ExecutionArgs;
use crate::expr::ExprId;
use crate::expr::Expression;
use crate::expr::SimplifyCtx;
use crate::expr::VTable;
use crate::expr::VTableExt;
use crate::expr::stats::Stat;

/// Creates a new expression that returns a minimum bound of its input.
pub fn statistic(stat: Stat, child: Expression) -> Expression {
Statistic.new_expr(stat, vec![child])
}

pub struct Statistic;

impl VTable for Statistic {
type Options = Stat;

fn id(&self) -> ExprId {
ExprId::from("statistic")
}

fn arity(&self, _options: &Self::Options) -> Arity {
Arity::Exact(1)
}

fn child_name(&self, _options: &Self::Options, _child_idx: usize) -> ChildName {
ChildName::from("input")
}

fn return_dtype(&self, stat: &Stat, arg_dtypes: &[DType]) -> VortexResult<DType> {
stat.dtype(&arg_dtypes[0])
.ok_or_else(|| {
vortex_err!(
"statistic {:?} not supported for dtype {:?}",
stat,
arg_dtypes[0]
)
})
// We make all statistics types nullable in case there is no reduction rule to handle
// the statistic expression.
.map(|dt| dt.as_nullable())
}

fn evaluate(
&self,
_stat: &Stat,
expr: &Expression,
scope: &ArrayRef,
) -> VortexResult<ArrayRef> {
let return_dtype = expr.return_dtype(scope.dtype())?;
Ok(ConstantArray::new(vortex_scalar::Scalar::null(return_dtype), scope.len()).into_array())
}

fn execute(&self, _stat: &Stat, args: ExecutionArgs) -> VortexResult<Datum> {
Ok(Datum::Scalar(Scalar::null(&args.return_dtype)))
}

fn simplify(
&self,
_options: &Self::Options,
_expr: &Expression,
_ctx: &dyn SimplifyCtx,
) -> VortexResult<Option<Expression>> {
// FIXME(ngates): we really want to implement a reduction rule for all arrays? But it's an array.
// And it's a reduction rule. How do we do this without reduce_parent on everything..?
Ok(None)
}
}
2 changes: 1 addition & 1 deletion vortex-array/src/expr/stats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ impl Stat {
})
}

pub fn name(&self) -> &str {
pub const fn name(&self) -> &'static str {
match self {
Self::IsConstant => "is_constant",
Self::IsSorted => "is_sorted",
Expand Down
23 changes: 22 additions & 1 deletion vortex-array/src/expr/vtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,20 @@ pub trait VTable: 'static + Sized + Send + Sync {
options: &Self::Options,
expr: &Expression,
f: &mut Formatter<'_>,
) -> fmt::Result;
) -> fmt::Result {
write!(f, "{}(", expr.id())?;
for (i, child) in expr.children().iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
child.fmt_sql(f)?;
}
let options = format!("{}", options);
if !options.is_empty() {
write!(f, ", options={}", options)?;
}
write!(f, ")")
}

/// Compute the return [`DType`] of the expression if evaluated over the given input types.
fn return_dtype(&self, options: &Self::Options, arg_dtypes: &[DType]) -> VortexResult<DType>;
Expand Down Expand Up @@ -144,6 +157,14 @@ pub trait VTable: 'static + Sized + Send + Sync {
Ok(None)
}

/// Falsify the expression, returning a new expression that is true whenever the original
/// expression is guaranteed to be false via stats.
fn falsify(&self, options: &Self::Options, expr: &Expression) -> Option<Expression> {
_ = options;
_ = expr;
None
}

/// See [`Expression::stat_falsification`].
fn stat_falsification(
&self,
Expand Down
9 changes: 9 additions & 0 deletions vortex-file/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use vortex_layout::segments::SegmentSource;
use vortex_metrics::VortexMetrics;
use vortex_scan::ScanBuilder;
use vortex_scan::SplitBy;
use vortex_scan::v2::scan::ScanBuilder2;
use vortex_session::VortexSession;
use vortex_utils::aliases::hash_map::HashMap;

Expand Down Expand Up @@ -103,6 +104,14 @@ impl VortexFile {
)
}

pub fn scan2(&self) -> VortexResult<ScanBuilder2> {
let reader_ref = self
.footer
.layout()
.new_reader2(&self.segment_source, &self.session)?;
Ok(ScanBuilder2::new(reader_ref, self.session.clone()))
}

#[cfg(gpu_unstable)]
pub fn gpu_scan(
&self,
Expand Down
Loading
Loading