Skip to content

Commit 0bb1ed5

Browse files
Update to zarrs 0.20 (#87)
Co-authored-by: Lachlan Deakin <[email protected]>
1 parent b4fb0b7 commit 0bb1ed5

6 files changed

Lines changed: 73 additions & 73 deletions

File tree

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ repos:
2121
language: system
2222
pass_filenames: false
2323
- repo: https://github.com/astral-sh/ruff-pre-commit
24-
rev: v0.7.2
24+
rev: v0.11.9
2525
hooks:
2626
- id: ruff
2727
args: ["--fix"]

Cargo.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ crate-type = ["cdylib", "rlib"]
1010

1111
[dependencies]
1212
pyo3 = { version = "0.23.2", features = ["abi3-py311"] }
13-
zarrs = { version = "0.19.0", features = ["async"] }
13+
zarrs = { version = "0.20.0", features = ["async", "zlib", "pcodec", "bz2"] }
1414
rayon_iter_concurrent_limit = "0.2.0"
1515
rayon = "1.10.0"
1616
# fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path
@@ -19,10 +19,9 @@ numpy = "0.23.0"
1919
unsafe_cell_slice = "0.2.0"
2020
serde_json = "1.0.128"
2121
pyo3-stub-gen = "0.7.0"
22-
opendal = { version = "0.51.0", features = ["services-http"] }
22+
opendal = { version = "0.53.0", features = ["services-http"] }
2323
tokio = { version = "1.41.1", features = ["rt-multi-thread"] }
24-
zarrs_opendal = "0.5.0"
25-
zarrs_metadata = "0.3.7" # require recent zarr-python compatibility fixes (remove with zarrs 0.20)
24+
zarrs_opendal = "0.7.2"
2625
itertools = "0.9.0"
2726

2827
[profile.release]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ select = [
100100
"W", # Warning detected by Pycodestyle
101101
"UP", # pyupgrade
102102
"I", # isort
103-
"TCH", # manage type checking blocks
103+
"TC", # manage type checking blocks
104104
"TID251", # Banned imports
105105
"ICN", # Follow import conventions
106106
"PTH", # Pathlib instead of os.path

src/chunk_item.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
1010
use zarrs::{
1111
array::{ChunkRepresentation, DataType, FillValue},
1212
array_subset::ArraySubset,
13-
metadata::v3::{array::data_type::DataTypeMetadataV3, MetadataV3},
13+
metadata::v3::MetadataV3,
1414
storage::StoreKey,
1515
};
1616

@@ -146,9 +146,11 @@ fn get_chunk_representation(
146146
fill_value: Vec<u8>,
147147
) -> PyResult<ChunkRepresentation> {
148148
// Get the chunk representation
149-
let data_type =
150-
DataType::from_metadata(&DataTypeMetadataV3::from_metadata(&MetadataV3::new(dtype)))
151-
.map_py_err::<PyRuntimeError>()?;
149+
let data_type = DataType::from_metadata(
150+
&MetadataV3::new(dtype),
151+
zarrs::config::global_config().data_type_aliases_v3(),
152+
)
153+
.map_py_err::<PyRuntimeError>()?;
152154
let chunk_shape = chunk_shape
153155
.into_iter()
154156
.map(|x| NonZeroU64::new(x).expect("chunk shapes should always be non-zero"))

src/lib.rs

Lines changed: 55 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ use zarrs::array::codec::{
2222
ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder,
2323
};
2424
use zarrs::array::{
25-
copy_fill_value_into, update_array_bytes, ArrayBytes, ArraySize, CodecChain, FillValue,
25+
copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize,
26+
CodecChain, FillValue,
2627
};
2728
use zarrs::array_subset::ArraySubset;
2829
use zarrs::metadata::v3::MetadataV3;
@@ -114,7 +115,7 @@ impl CodecPipelineImpl {
114115
codec_options: &CodecOptions,
115116
) -> PyResult<()> {
116117
let array_shape = item.representation().shape_u64();
117-
if !chunk_subset.inbounds(&array_shape) {
118+
if !chunk_subset.inbounds_shape(&array_shape) {
118119
return Err(PyErr::new::<PyValueError, _>(format!(
119120
"chunk subset ({chunk_subset}) is out of bounds for array shape ({array_shape:?})"
120121
)));
@@ -134,20 +135,14 @@ impl CodecPipelineImpl {
134135
let chunk_bytes_old = self.retrieve_chunk_bytes(item, codec_chain, codec_options)?;
135136

136137
// Update the chunk
137-
let chunk_bytes_new = unsafe {
138-
// SAFETY:
139-
// - chunk_bytes_old is compatible with the chunk shape and data type size (validated on decoding)
140-
// - chunk_subset is compatible with chunk_subset_bytes and the data type size (validated above)
141-
// - chunk_subset is within the bounds of the chunk shape (validated above)
142-
// - output bytes and output subset bytes are compatible (same data type)
143-
update_array_bytes(
144-
chunk_bytes_old,
145-
&array_shape,
146-
chunk_subset,
147-
&chunk_subset_bytes,
148-
data_type_size,
149-
)
150-
};
138+
let chunk_bytes_new = update_array_bytes(
139+
chunk_bytes_old,
140+
&array_shape,
141+
chunk_subset,
142+
&chunk_subset_bytes,
143+
data_type_size,
144+
)
145+
.map_py_err::<PyRuntimeError>()?;
151146

152147
// Store the updated chunk
153148
self.store_chunk_bytes(item, codec_chain, chunk_bytes_new, codec_options)
@@ -279,8 +274,8 @@ impl CodecPipelineImpl {
279274
.unique_by(|item| item.key())
280275
.collect::<Vec<_>>();
281276
let mut partial_decoder_cache: HashMap<StoreKey, Arc<dyn ArrayPartialDecoderTraits>> =
282-
HashMap::new().into();
283-
if partial_chunk_descriptions.len() > 0 {
277+
HashMap::new();
278+
if !partial_chunk_descriptions.is_empty() {
284279
let key_decoder_pairs = iter_concurrent_limit!(
285280
chunk_concurrent_limit,
286281
partial_chunk_descriptions,
@@ -308,59 +303,61 @@ impl CodecPipelineImpl {
308303
// For variable length data types, need a codepath with non `_into` methods.
309304
// Collect all the subsets and copy into value on the Python side?
310305
let update_chunk_subset = |item: chunk_item::WithSubset| {
306+
let chunk_item::WithSubset {
307+
item,
308+
subset,
309+
chunk_subset,
310+
} = item;
311+
let mut output_view = unsafe {
312+
// TODO: Is the following correct?
313+
// can we guarantee that when this function is called from Python with arbitrary arguments?
314+
// SAFETY: chunks represent disjoint array subsets
315+
ArrayBytesFixedDisjointView::new(
316+
output,
317+
// TODO: why is data_type in `item`, it should be derived from `output`, no?
318+
item.representation()
319+
.data_type()
320+
.fixed_size()
321+
.ok_or("variable length data type not supported")
322+
.map_py_err::<PyTypeError>()?,
323+
&output_shape,
324+
subset,
325+
)
326+
.map_py_err::<PyRuntimeError>()?
327+
};
328+
311329
// See zarrs::array::Array::retrieve_chunk_subset_into
312-
if is_whole_chunk(&item) {
330+
if chunk_subset.start().iter().all(|&o| o == 0)
331+
&& chunk_subset.shape() == item.representation().shape_u64()
332+
{
313333
// See zarrs::array::Array::retrieve_chunk_into
314334
if let Some(chunk_encoded) = self.stores.get(&item)? {
315335
// Decode the encoded data into the output buffer
316336
let chunk_encoded: Vec<u8> = chunk_encoded.into();
317-
unsafe {
318-
// SAFETY:
319-
// - output is an array with output_shape elements of the item.representation data type,
320-
// - item.subset is within the bounds of output_shape.
321-
self.codec_chain.decode_into(
322-
Cow::Owned(chunk_encoded),
323-
item.representation(),
324-
&output,
325-
&output_shape,
326-
&item.subset,
327-
&codec_options,
328-
)
329-
}
337+
self.codec_chain.decode_into(
338+
Cow::Owned(chunk_encoded),
339+
item.representation(),
340+
&mut output_view,
341+
&codec_options,
342+
)
330343
} else {
331344
// The chunk is missing, write the fill value
332-
unsafe {
333-
// SAFETY:
334-
// - data type and fill value are confirmed to be compatible when the ChunkRepresentation is created,
335-
// - output is an array with output_shape elements of the item.representation data type,
336-
// - item.subset is within the bounds of output_shape.
337-
copy_fill_value_into(
338-
item.representation().data_type(),
339-
item.representation().fill_value(),
340-
&output,
341-
&output_shape,
342-
&item.subset,
343-
)
344-
}
345+
copy_fill_value_into(
346+
item.representation().data_type(),
347+
item.representation().fill_value(),
348+
&mut output_view,
349+
)
345350
}
346351
} else {
347352
let key = item.key();
348353
let partial_decoder = partial_decoder_cache.get(key).ok_or_else(|| {
349354
PyRuntimeError::new_err(format!("Partial decoder not found for key: {key}"))
350355
})?;
351-
unsafe {
352-
// SAFETY:
353-
// - output is an array with output_shape elements of the item.representation data type,
354-
// - item.subset is within the bounds of output_shape.
355-
// - item.chunk_subset has the same number of elements as item.subset.
356-
partial_decoder.partial_decode_into(
357-
&item.chunk_subset,
358-
&output,
359-
&output_shape,
360-
&item.subset,
361-
&codec_options,
362-
)
363-
}
356+
partial_decoder.partial_decode_into(
357+
&chunk_subset,
358+
&mut output_view,
359+
&codec_options,
360+
)
364361
}
365362
.map_py_err::<PyValueError>()
366363
};

src/metadata_v2.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use pyo3::{exceptions::PyRuntimeError, pyfunction, PyErr, PyResult};
22
use zarrs::metadata::{
3-
v2::{array::ArrayMetadataV2Order, MetadataV2},
4-
v3::array::data_type::DataTypeMetadataV3,
3+
v2::{ArrayMetadataV2Order, MetadataV2},
4+
v3::MetadataV3,
55
};
66

77
#[pyfunction]
@@ -35,13 +35,15 @@ pub fn codec_metadata_v2_to_v3(
3535

3636
// FIXME: The array order, dimensionality, data type, and endianness are needed to exhaustively support all Zarr V2 data that zarrs can handle.
3737
// However, CodecPipeline.from_codecs does not supply this information, and CodecPipeline.evolve_from_array_spec is seemingly never called.
38-
let metadata = zarrs::metadata::v2_to_v3::codec_metadata_v2_to_v3(
38+
let metadata = zarrs::metadata_ext::v2_to_v3::codec_metadata_v2_to_v3(
3939
ArrayMetadataV2Order::C,
40-
0, // unused with C order
41-
&DataTypeMetadataV3::Bool, // FIXME
40+
0, // unused with C order
41+
&MetadataV3::new("bool"), // FIXME
4242
None,
4343
&filters,
4444
&compressor,
45+
zarrs::config::global_config().codec_aliases_v2(),
46+
zarrs::config::global_config().codec_aliases_v3(),
4547
)
4648
.map_err(|err| {
4749
// TODO: More informative error messages from zarrs for ArrayMetadataV2ToV3ConversionError

0 commit comments

Comments
 (0)