Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
59a6871
Add open_as_void option to zarr v2 driver (#6)
BrianMichell Dec 4, 2025
2aedabf
Remove default `open_as_void` from definitions
BrianMichell Jan 5, 2026
46d9902
Use derived `DataCache` for `open_as_void`
BrianMichell Jan 5, 2026
ccc4bd7
Fix compile issues for missing argument
BrianMichell Jan 6, 2026
5d4a68f
Correct tests, add argument comment for open as void value
BrianMichell Jan 6, 2026
c410f5e
Add test coverage for `GetSpecInfo`
BrianMichell Jan 6, 2026
d886c2f
Resolve feedback `https://github.com/google/tensorstore/pull/272#disc…
BrianMichell Jan 6, 2026
a42b6f5
Resolve feedback `https://github.com/google/tensorstore/pull/272#disc…
BrianMichell Jan 6, 2026
e9c15da
Resolve feedback `https://github.com/google/tensorstore/pull/272/chan…
BrianMichell Jan 7, 2026
7fb91d7
Resolve feedback `https://github.com/google/tensorstore/pull/272#disc…
BrianMichell Jan 7, 2026
389d6a9
Resolve feedback `https://github.com/google/tensorstore/pull/272/chan…
BrianMichell Jan 7, 2026
101011b
Resolve `https://github.com/google/tensorstore/pull/272/changes#r2669…
BrianMichell Jan 12, 2026
62fd8f9
Resolve `https://github.com/google/tensorstore/pull/272/changes#r2669…
BrianMichell Jan 12, 2026
9735318
Resolve `https://github.com/google/tensorstore/pull/272/changes#r2669…
BrianMichell Jan 12, 2026
a0efd69
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 13, 2026
5775f0c
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 14, 2026
eb169a0
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 14, 2026
c3fb8c0
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 16, 2026
23bff85
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 16, 2026
7d187e5
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 16, 2026
5b90443
Resolve `https://github.com/google/tensorstore/pull/272#discussion_r2…
BrianMichell Jan 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tensorstore/driver/zarr/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ tensorstore_cc_library(
hdrs = ["dtype.h"],
deps = [
"//tensorstore:data_type",
"//tensorstore:index",
"//tensorstore/internal/json_binding",
"//tensorstore/internal/json_binding:bindable",
"//tensorstore/util:endian",
Expand All @@ -213,6 +214,7 @@ tensorstore_cc_library(
"//tensorstore/util:result",
"//tensorstore/util:str_cat",
"@abseil-cpp//absl/base:core_headers",
"@abseil-cpp//absl/status",
"@nlohmann_json//:json",
],
)
Expand Down Expand Up @@ -434,6 +436,10 @@ tensorstore_cc_library(
"@abseil-cpp//absl/status",
"@abseil-cpp//absl/strings:cord",
"@nlohmann_json//:json",
"@riegeli//riegeli/bytes:cord_reader",
"@riegeli//riegeli/bytes:cord_writer",
"@riegeli//riegeli/bytes:read_all",
"@riegeli//riegeli/bytes:write",
],
alwayslink = 1,
)
Expand Down
103 changes: 90 additions & 13 deletions tensorstore/driver/zarr/driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include <nlohmann/json_fwd.hpp>
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/cord_writer.h"
#include "riegeli/bytes/read_all.h"
#include "riegeli/bytes/write.h"
#include "tensorstore/array.h"
#include "tensorstore/array_storage_statistics.h"
#include "tensorstore/box.h"
Expand All @@ -55,6 +59,7 @@
#include "tensorstore/internal/chunk_grid_specification.h"
#include "tensorstore/internal/grid_storage_statistics.h"
#include "tensorstore/internal/intrusive_ptr.h"
#include "tensorstore/internal/riegeli/array_endian_codec.h"
#include "tensorstore/internal/json_binding/bindable.h"
#include "tensorstore/internal/json_binding/json_binding.h"
#include "tensorstore/internal/uri_utils.h"
Expand Down Expand Up @@ -137,7 +142,8 @@ absl::Status ZarrDriverSpec::ApplyOptions(SpecOptions&& options) {
}

Result<SpecRankAndFieldInfo> ZarrDriverSpec::GetSpecInfo() const {
return GetSpecRankAndFieldInfo(partial_metadata, selected_field, schema);
return GetSpecRankAndFieldInfo(partial_metadata, selected_field, schema,
open_as_void);
}

TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(
Expand Down Expand Up @@ -171,7 +177,16 @@ TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(
jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
jb::DefaultValue<jb::kNeverIncludeDefaults>(
[](auto* obj) { *obj = std::string{}; }))),
jb::Member("open_as_void",
jb::Projection<&ZarrDriverSpec::open_as_void>(
jb::DefaultValue<jb::kNeverIncludeDefaults>(
[](auto* v) { *v = false; }))),
jb::Initialize([](auto* obj) {
// Validate that field and open_as_void are mutually exclusive
if (obj->open_as_void && !obj->selected_field.empty()) {
return absl::InvalidArgumentError(
"\"field\" and \"open_as_void\" are mutually exclusive");
}
TENSORSTORE_ASSIGN_OR_RETURN(auto info, obj->GetSpecInfo());
if (info.full_rank != dynamic_rank) {
TENSORSTORE_RETURN_IF_ERROR(
Expand Down Expand Up @@ -209,8 +224,11 @@ Result<SharedArray<const void>> ZarrDriverSpec::GetFillValue(

const auto& metadata = partial_metadata;
if (metadata.dtype && metadata.fill_value) {
TENSORSTORE_ASSIGN_OR_RETURN(
size_t field_index, GetFieldIndex(*metadata.dtype, selected_field));
size_t field_index = 0; // open_as_void has a single field.
if (!open_as_void) {
TENSORSTORE_ASSIGN_OR_RETURN(
field_index, GetFieldIndex(*metadata.dtype, selected_field));
}
fill_value = (*metadata.fill_value)[field_index];
}

Expand Down Expand Up @@ -356,6 +374,7 @@ absl::Status DataCache::GetBoundSpecData(
const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
spec.selected_field = EncodeSelectedField(component_index, metadata.dtype);
spec.metadata_key = metadata_key_;
spec.open_as_void = false;
auto& pm = spec.partial_metadata;
pm.rank = metadata.rank;
pm.zarr_format = metadata.zarr_format;
Expand All @@ -382,6 +401,36 @@ Result<ChunkLayout> DataCache::GetChunkLayoutFromMetadata(
}

std::string DataCache::GetBaseKvstorePath() { return key_prefix_; }

// VoidDataCache implementation
// Uses inherited DataCache constructor and encode/decode methods.
// The void metadata (with dtype containing only the void field) is created
// in GetDataCache and passed via the initializer, so standard encode/decode
// paths work correctly.

absl::Status VoidDataCache::ValidateMetadataCompatibility(
const void* existing_metadata_ptr, const void* new_metadata_ptr) {
// The existing metadata is already void metadata (from cache initialization).
// Convert the new metadata to void metadata so both have the same synthesized
// void dtype, then use normal validation which compares all fields except
// shape (via IsMetadataCompatible).
assert(new_metadata_ptr);
const auto& new_metadata =
*static_cast<const ZarrMetadata*>(new_metadata_ptr);
return DataCache::ValidateMetadataCompatibility(
existing_metadata_ptr, new_metadata.GetVoidMetadata().get());
}

absl::Status VoidDataCache::GetBoundSpecData(
internal_kvs_backed_chunk_driver::KvsDriverSpec& spec_base,
const void* metadata_ptr, size_t component_index) {
TENSORSTORE_RETURN_IF_ERROR(
DataCache::GetBoundSpecData(spec_base, metadata_ptr, component_index));
auto& spec = static_cast<ZarrDriverSpec&>(spec_base);
spec.open_as_void = true;
return absl::OkStatus();
}

Result<CodecSpec> ZarrDriver::GetCodec() {
return internal_zarr::GetCodecSpecFromMetadata(metadata());
}
Expand Down Expand Up @@ -416,6 +465,10 @@ Result<std::string> ZarrDriverSpec::ToUrl() const {
return absl::InvalidArgumentError(
"zarr2 URL syntax not supported with selected_field specified");
}
if (open_as_void) {
return absl::InvalidArgumentError(
"zarr2 URL syntax not supported with open_as_void specified");
}
TENSORSTORE_ASSIGN_OR_RETURN(auto base_url, store.ToUrl());
return tensorstore::StrCat(base_url, "|", kUrlScheme, ":");
}
Expand Down Expand Up @@ -451,7 +504,7 @@ Future<ArrayStorageStatistics> ZarrDriver::GetStorageStatistics(
/*chunk_shape=*/grid.chunk_shape,
/*shape=*/metadata->shape,
/*dimension_separator=*/
GetDimensionSeparatorChar(cache->dimension_separator_),
GetDimensionSeparatorChar(cache->dimension_separator()),
staleness_bound, request.options));
}),
std::move(promise), std::move(metadata_future));
Expand Down Expand Up @@ -483,7 +536,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
TENSORSTORE_ASSIGN_OR_RETURN(
auto metadata,
internal_zarr::GetNewMetadata(spec().partial_metadata,
spec().selected_field, spec().schema),
spec().selected_field, spec().schema,
spec().open_as_void),
tensorstore::MaybeAnnotateStatus(
_, "Cannot create using specified \"metadata\" and schema"));
return metadata;
Expand All @@ -496,29 +550,52 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
internal::EncodeCacheKey(
&result, spec.store.path,
GetDimensionSeparator(spec.partial_metadata, zarr_metadata),
zarr_metadata, spec.metadata_key);
zarr_metadata, spec.metadata_key,
spec.open_as_void ? "void" : "normal");
return result;
}

std::unique_ptr<internal_kvs_backed_chunk_driver::DataCacheBase> GetDataCache(
DataCache::Initializer&& initializer) override {
const auto& metadata =
const auto& original_metadata =
*static_cast<const ZarrMetadata*>(initializer.metadata.get());
auto dim_sep = GetDimensionSeparator(spec().partial_metadata, original_metadata);
if (spec().open_as_void) {
// Use the cached void metadata from the original. The void metadata has
// dtype.fields containing only the void field, allowing standard
// encode/decode to work.
initializer.metadata = original_metadata.GetVoidMetadata();
return std::make_unique<VoidDataCache>(
std::move(initializer), spec().store.path, dim_sep,
spec().metadata_key);
}
return std::make_unique<DataCache>(
std::move(initializer), spec().store.path,
GetDimensionSeparator(spec().partial_metadata, metadata),
std::move(initializer), spec().store.path, dim_sep,
spec().metadata_key);
}

Result<size_t> GetComponentIndex(const void* metadata_ptr,
OpenMode open_mode) override {
const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
// Validate partial_metadata against regular metadata
TENSORSTORE_RETURN_IF_ERROR(
ValidateMetadata(metadata, spec().partial_metadata));
TENSORSTORE_ASSIGN_OR_RETURN(
auto field_index, GetFieldIndex(metadata.dtype, spec().selected_field));
TENSORSTORE_RETURN_IF_ERROR(
ValidateMetadataSchema(metadata, field_index, spec().schema));
// For void access, use component index 0 since we create a special
// component for raw byte access
size_t field_index;
if (spec().open_as_void) {
field_index = 0;
// Validate schema against void metadata, which has the synthesized void
// field that matches how the data will actually be accessed
TENSORSTORE_RETURN_IF_ERROR(ValidateMetadataSchema(
*metadata.GetVoidMetadata(), field_index, spec().schema));
} else {
TENSORSTORE_ASSIGN_OR_RETURN(
field_index,
GetFieldIndex(metadata.dtype, spec().selected_field));
TENSORSTORE_RETURN_IF_ERROR(
ValidateMetadataSchema(metadata, field_index, spec().schema));
}
return field_index;
}
};
Expand Down
43 changes: 42 additions & 1 deletion tensorstore/driver/zarr/driver_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,31 @@

#include <stddef.h>

#include <memory>
#include <string>
#include <string_view>

#include "absl/container/inlined_vector.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "tensorstore/array.h"
#include "tensorstore/array_storage_statistics.h"
#include "tensorstore/box.h"
#include "tensorstore/chunk_layout.h"
#include "tensorstore/codec_spec.h"
#include "tensorstore/driver/kvs_backed_chunk_driver.h"
#include "tensorstore/driver/zarr/metadata.h"
#include "tensorstore/driver/zarr/spec.h"
#include "tensorstore/index.h"
#include "tensorstore/index_space/index_domain.h"
#include "tensorstore/index_space/index_transform.h"
#include "tensorstore/internal/cache/chunk_cache.h"
#include "tensorstore/internal/chunk_grid_specification.h"
#include "tensorstore/internal/json_binding/bindable.h"
#include "tensorstore/util/dimension_set.h"
#include "tensorstore/util/future.h"
#include "tensorstore/util/garbage_collection/fwd.h"
#include "tensorstore/util/result.h"
#include "tensorstore/util/span.h"

namespace tensorstore {
Expand Down Expand Up @@ -63,10 +78,11 @@ class ZarrDriverSpec
ZarrPartialMetadata partial_metadata;
SelectedField selected_field;
std::string metadata_key;
bool open_as_void = false;

constexpr static auto ApplyMembers = [](auto& x, auto f) {
return f(internal::BaseCast<KvsDriverSpec>(x), x.partial_metadata,
x.selected_field, x.metadata_key);
x.selected_field, x.metadata_key, x.open_as_void);
};
absl::Status ApplyOptions(SpecOptions&& options) override;

Expand Down Expand Up @@ -137,11 +153,36 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache {

std::string GetBaseKvstorePath() override;

DimensionSeparator dimension_separator() const { return dimension_separator_; }

protected:
std::string key_prefix_;
DimensionSeparator dimension_separator_;
std::string metadata_key_;
};

/// Derived DataCache for open_as_void mode that provides raw byte access.
///
/// The void metadata (created via CreateVoidMetadata) has dtype.fields
/// containing only the void field, so inherited encode/decode methods
/// work correctly for raw byte access. GetBoundSpecData is overridden
/// to set open_as_void=true in the spec.
class VoidDataCache : public DataCache {
public:
using DataCache::DataCache;

/// Converts the new metadata to void metadata and uses normal validation.
/// This ensures both existing (already void) and new metadata have the
/// same synthesized void dtype, allowing IsMetadataCompatible to work.
absl::Status ValidateMetadataCompatibility(
const void* existing_metadata_ptr,
const void* new_metadata_ptr) override;

absl::Status GetBoundSpecData(
internal_kvs_backed_chunk_driver::KvsDriverSpec& spec_base,
const void* metadata_ptr, size_t component_index) override;
};

class ZarrDriver;
using ZarrDriverBase = internal_kvs_backed_chunk_driver::RegisteredKvsDriver<
ZarrDriver, ZarrDriverSpec, DataCache,
Expand Down
Loading