-
Notifications
You must be signed in to change notification settings - Fork 80
feat: implement update stastics #511
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -45,6 +45,7 @@ | |||||
| #include "iceberg/schema.h" | ||||||
| #include "iceberg/snapshot.h" | ||||||
| #include "iceberg/sort_order.h" | ||||||
| #include "iceberg/statistics_file.h" | ||||||
| #include "iceberg/table_properties.h" | ||||||
| #include "iceberg/table_update.h" | ||||||
| #include "iceberg/util/checked_cast.h" | ||||||
|
|
@@ -612,14 +613,16 @@ class TableMetadataBuilder::Impl { | |||||
| Status SetCurrentSchema(int32_t schema_id); | ||||||
| Status RemoveSchemas(const std::unordered_set<int32_t>& schema_ids); | ||||||
| Result<int32_t> AddSchema(const Schema& schema, int32_t new_last_column_id); | ||||||
| void SetLocation(std::string_view location); | ||||||
| Status SetLocation(std::string_view location); | ||||||
| Status AddSnapshot(std::shared_ptr<Snapshot> snapshot); | ||||||
| Status SetBranchSnapshot(int64_t snapshot_id, const std::string& branch); | ||||||
| Status SetBranchSnapshot(std::shared_ptr<Snapshot> snapshot, const std::string& branch); | ||||||
| Status SetRef(const std::string& name, std::shared_ptr<SnapshotRef> ref); | ||||||
| Status RemoveRef(const std::string& name); | ||||||
| Status RemoveSnapshots(const std::vector<int64_t>& snapshot_ids); | ||||||
| Status RemovePartitionSpecs(const std::vector<int32_t>& spec_ids); | ||||||
| Status SetStatistics(const std::shared_ptr<StatisticsFile>& statistics_file); | ||||||
| Status RemoveStatistics(int64_t snapshot_id); | ||||||
|
|
||||||
| Result<std::unique_ptr<TableMetadata>> Build(); | ||||||
|
|
||||||
|
|
@@ -1032,12 +1035,13 @@ Result<int32_t> TableMetadataBuilder::Impl::AddSchema(const Schema& schema, | |||||
| return new_schema_id; | ||||||
| } | ||||||
|
|
||||||
| void TableMetadataBuilder::Impl::SetLocation(std::string_view location) { | ||||||
| Status TableMetadataBuilder::Impl::SetLocation(std::string_view location) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why changing this? We don't need to return status if it never has any error. |
||||||
| if (location == metadata_.location) { | ||||||
| return; | ||||||
| return {}; | ||||||
| } | ||||||
| metadata_.location = std::string(location); | ||||||
| changes_.push_back(std::make_unique<table::SetLocation>(std::string(location))); | ||||||
| return {}; | ||||||
| } | ||||||
|
|
||||||
| Status TableMetadataBuilder::Impl::AddSnapshot(std::shared_ptr<Snapshot> snapshot) { | ||||||
|
|
@@ -1173,6 +1177,45 @@ Status TableMetadataBuilder::Impl::SetRef(const std::string& name, | |||||
| return {}; | ||||||
| } | ||||||
|
|
||||||
| Status TableMetadataBuilder::Impl::SetStatistics( | ||||||
| const std::shared_ptr<StatisticsFile>& statistics_file) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| ICEBERG_CHECK(statistics_file != nullptr, "Cannot set null statistics file"); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Note that PRECHECK returns InvalidArgument while CHECK returns ValidationFailed. |
||||||
|
|
||||||
| // Find and replace existing statistics for the same snapshot_id, or add new one | ||||||
| auto it = std::ranges::find_if( | ||||||
| metadata_.statistics, | ||||||
| [snapshot_id = statistics_file->snapshot_id](const auto& stat) { | ||||||
| return stat && stat->snapshot_id == snapshot_id; | ||||||
| }); | ||||||
|
|
||||||
| if (it != metadata_.statistics.end()) { | ||||||
| *it = statistics_file; | ||||||
| } else { | ||||||
| metadata_.statistics.push_back(statistics_file); | ||||||
| } | ||||||
|
|
||||||
| changes_.push_back(std::make_unique<table::SetStatistics>(statistics_file)); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use std::move |
||||||
| return {}; | ||||||
| } | ||||||
|
|
||||||
| Status TableMetadataBuilder::Impl::RemoveStatistics(int64_t snapshot_id) { | ||||||
| auto it = std::ranges::find_if(metadata_.statistics, [snapshot_id](const auto& stat) { | ||||||
| return stat && stat->snapshot_id == snapshot_id; | ||||||
| }); | ||||||
|
|
||||||
| if (it == metadata_.statistics.end()) { | ||||||
| return {}; | ||||||
| } | ||||||
|
|
||||||
| // Remove statistics for the given snapshot_id | ||||||
| std::erase_if(metadata_.statistics, [snapshot_id](const auto& stat) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we combine find and erase in a single call?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or you can just call something like |
||||||
| return stat && stat->snapshot_id == snapshot_id; | ||||||
| }); | ||||||
|
|
||||||
| changes_.push_back(std::make_unique<table::RemoveStatistics>(snapshot_id)); | ||||||
| return {}; | ||||||
| } | ||||||
|
|
||||||
| std::unordered_set<int64_t> TableMetadataBuilder::Impl::IntermediateSnapshotIdSet( | ||||||
| int64_t current_snapshot_id) const { | ||||||
| std::unordered_set<int64_t> added_snapshot_ids; | ||||||
|
|
@@ -1590,11 +1633,13 @@ TableMetadataBuilder& TableMetadataBuilder::SuppressHistoricalSnapshots() { | |||||
|
|
||||||
| TableMetadataBuilder& TableMetadataBuilder::SetStatistics( | ||||||
| const std::shared_ptr<StatisticsFile>& statistics_file) { | ||||||
| throw IcebergError(std::format("{} not implemented", __FUNCTION__)); | ||||||
| ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->SetStatistics(statistics_file)); | ||||||
| return *this; | ||||||
| } | ||||||
|
|
||||||
| TableMetadataBuilder& TableMetadataBuilder::RemoveStatistics(int64_t snapshot_id) { | ||||||
| throw IcebergError(std::format("{} not implemented", __FUNCTION__)); | ||||||
| ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->RemoveStatistics(snapshot_id)); | ||||||
| return *this; | ||||||
| } | ||||||
|
|
||||||
| TableMetadataBuilder& TableMetadataBuilder::SetPartitionStatistics( | ||||||
|
|
@@ -1620,7 +1665,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveProperties( | |||||
| } | ||||||
|
|
||||||
| TableMetadataBuilder& TableMetadataBuilder::SetLocation(std::string_view location) { | ||||||
| impl_->SetLocation(location); | ||||||
| ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->SetLocation(location)); | ||||||
| return *this; | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
| #include "iceberg/exception.h" | ||
| #include "iceberg/schema.h" | ||
| #include "iceberg/sort_order.h" | ||
| #include "iceberg/statistics_file.h" | ||
| #include "iceberg/table_metadata.h" | ||
| #include "iceberg/table_requirements.h" | ||
|
|
||
|
|
@@ -446,4 +447,50 @@ std::unique_ptr<TableUpdate> SetLocation::Clone() const { | |
| return std::make_unique<SetLocation>(location_); | ||
| } | ||
|
|
||
| // SetStatistics | ||
|
|
||
| int64_t SetStatistics::snapshot_id() const { return statistics_file_->snapshot_id; } | ||
|
|
||
| void SetStatistics::ApplyTo(TableMetadataBuilder& builder) const { | ||
| builder.SetStatistics(statistics_file_); | ||
| } | ||
|
|
||
| void SetStatistics::GenerateRequirements(TableUpdateContext& context) const { | ||
| // SetStatistics doesn't generate any requirements | ||
| } | ||
|
|
||
| bool SetStatistics::Equals(const TableUpdate& other) const { | ||
| if (other.kind() != Kind::kSetStatistics) { | ||
| return false; | ||
| } | ||
| const auto& other_set = static_cast<const SetStatistics&>(other); | ||
| return *statistics_file_ == *other_set.statistics_file_; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: check null just in case. |
||
| } | ||
|
|
||
| std::unique_ptr<TableUpdate> SetStatistics::Clone() const { | ||
| return std::make_unique<SetStatistics>(statistics_file_); | ||
| } | ||
|
|
||
| // RemoveStatistics | ||
|
|
||
| void RemoveStatistics::ApplyTo(TableMetadataBuilder& builder) const { | ||
| builder.RemoveStatistics(snapshot_id_); | ||
| } | ||
|
|
||
| void RemoveStatistics::GenerateRequirements(TableUpdateContext& context) const { | ||
| // RemoveStatistics doesn't generate any requirements | ||
| } | ||
|
|
||
| bool RemoveStatistics::Equals(const TableUpdate& other) const { | ||
| if (other.kind() != Kind::kRemoveStatistics) { | ||
| return false; | ||
| } | ||
| const auto& other_remove = static_cast<const RemoveStatistics&>(other); | ||
| return snapshot_id_ == other_remove.snapshot_id_; | ||
| } | ||
|
|
||
| std::unique_ptr<TableUpdate> RemoveStatistics::Clone() const { | ||
| return std::make_unique<RemoveStatistics>(snapshot_id_); | ||
| } | ||
|
|
||
| } // namespace iceberg::table | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also add
FromJsonequivalents and their test cases.