@@ -22,7 +22,8 @@ use zarrs::array::codec::{
2222 ArrayPartialDecoderTraits , ArrayToBytesCodecTraits , CodecOptions , CodecOptionsBuilder ,
2323} ;
2424use zarrs:: array:: {
25- copy_fill_value_into, update_array_bytes, ArrayBytes , ArraySize , CodecChain , FillValue ,
25+ copy_fill_value_into, update_array_bytes, ArrayBytes , ArrayBytesFixedDisjointView , ArraySize ,
26+ CodecChain , FillValue ,
2627} ;
2728use zarrs:: array_subset:: ArraySubset ;
2829use zarrs:: metadata:: v3:: MetadataV3 ;
@@ -114,7 +115,7 @@ impl CodecPipelineImpl {
114115 codec_options : & CodecOptions ,
115116 ) -> PyResult < ( ) > {
116117 let array_shape = item. representation ( ) . shape_u64 ( ) ;
117- if !chunk_subset. inbounds ( & array_shape) {
118+ if !chunk_subset. inbounds_shape ( & array_shape) {
118119 return Err ( PyErr :: new :: < PyValueError , _ > ( format ! (
119120 "chunk subset ({chunk_subset}) is out of bounds for array shape ({array_shape:?})"
120121 ) ) ) ;
@@ -134,20 +135,14 @@ impl CodecPipelineImpl {
134135 let chunk_bytes_old = self . retrieve_chunk_bytes ( item, codec_chain, codec_options) ?;
135136
136137 // Update the chunk
137- let chunk_bytes_new = unsafe {
138- // SAFETY:
139- // - chunk_bytes_old is compatible with the chunk shape and data type size (validated on decoding)
140- // - chunk_subset is compatible with chunk_subset_bytes and the data type size (validated above)
141- // - chunk_subset is within the bounds of the chunk shape (validated above)
142- // - output bytes and output subset bytes are compatible (same data type)
143- update_array_bytes (
144- chunk_bytes_old,
145- & array_shape,
146- chunk_subset,
147- & chunk_subset_bytes,
148- data_type_size,
149- )
150- } ;
138+ let chunk_bytes_new = update_array_bytes (
139+ chunk_bytes_old,
140+ & array_shape,
141+ chunk_subset,
142+ & chunk_subset_bytes,
143+ data_type_size,
144+ )
145+ . map_py_err :: < PyRuntimeError > ( ) ?;
151146
152147 // Store the updated chunk
153148 self . store_chunk_bytes ( item, codec_chain, chunk_bytes_new, codec_options)
@@ -279,8 +274,8 @@ impl CodecPipelineImpl {
279274 . unique_by ( |item| item. key ( ) )
280275 . collect :: < Vec < _ > > ( ) ;
281276 let mut partial_decoder_cache: HashMap < StoreKey , Arc < dyn ArrayPartialDecoderTraits > > =
282- HashMap :: new ( ) . into ( ) ;
283- if partial_chunk_descriptions. len ( ) > 0 {
277+ HashMap :: new ( ) ;
278+ if ! partial_chunk_descriptions. is_empty ( ) {
284279 let key_decoder_pairs = iter_concurrent_limit ! (
285280 chunk_concurrent_limit,
286281 partial_chunk_descriptions,
@@ -308,59 +303,61 @@ impl CodecPipelineImpl {
308303 // For variable length data types, need a codepath with non `_into` methods.
309304 // Collect all the subsets and copy into value on the Python side?
310305 let update_chunk_subset = |item : chunk_item:: WithSubset | {
306+ let chunk_item:: WithSubset {
307+ item,
308+ subset,
309+ chunk_subset,
310+ } = item;
311+ let mut output_view = unsafe {
312+ // TODO: Is the following correct?
313+ // can we guarantee that when this function is called from Python with arbitrary arguments?
314+ // SAFETY: chunks represent disjoint array subsets
315+ ArrayBytesFixedDisjointView :: new (
316+ output,
317+ // TODO: why is data_type in `item`, it should be derived from `output`, no?
318+ item. representation ( )
319+ . data_type ( )
320+ . fixed_size ( )
321+ . ok_or ( "variable length data type not supported" )
322+ . map_py_err :: < PyTypeError > ( ) ?,
323+ & output_shape,
324+ subset,
325+ )
326+ . map_py_err :: < PyRuntimeError > ( ) ?
327+ } ;
328+
311329 // See zarrs::array::Array::retrieve_chunk_subset_into
312- if is_whole_chunk ( & item) {
330+ if chunk_subset. start ( ) . iter ( ) . all ( |& o| o == 0 )
331+ && chunk_subset. shape ( ) == item. representation ( ) . shape_u64 ( )
332+ {
313333 // See zarrs::array::Array::retrieve_chunk_into
314334 if let Some ( chunk_encoded) = self . stores . get ( & item) ? {
315335 // Decode the encoded data into the output buffer
316336 let chunk_encoded: Vec < u8 > = chunk_encoded. into ( ) ;
317- unsafe {
318- // SAFETY:
319- // - output is an array with output_shape elements of the item.representation data type,
320- // - item.subset is within the bounds of output_shape.
321- self . codec_chain . decode_into (
322- Cow :: Owned ( chunk_encoded) ,
323- item. representation ( ) ,
324- & output,
325- & output_shape,
326- & item. subset ,
327- & codec_options,
328- )
329- }
337+ self . codec_chain . decode_into (
338+ Cow :: Owned ( chunk_encoded) ,
339+ item. representation ( ) ,
340+ & mut output_view,
341+ & codec_options,
342+ )
330343 } else {
331344 // The chunk is missing, write the fill value
332- unsafe {
333- // SAFETY:
334- // - data type and fill value are confirmed to be compatible when the ChunkRepresentation is created,
335- // - output is an array with output_shape elements of the item.representation data type,
336- // - item.subset is within the bounds of output_shape.
337- copy_fill_value_into (
338- item. representation ( ) . data_type ( ) ,
339- item. representation ( ) . fill_value ( ) ,
340- & output,
341- & output_shape,
342- & item. subset ,
343- )
344- }
345+ copy_fill_value_into (
346+ item. representation ( ) . data_type ( ) ,
347+ item. representation ( ) . fill_value ( ) ,
348+ & mut output_view,
349+ )
345350 }
346351 } else {
347352 let key = item. key ( ) ;
348353 let partial_decoder = partial_decoder_cache. get ( key) . ok_or_else ( || {
349354 PyRuntimeError :: new_err ( format ! ( "Partial decoder not found for key: {key}" ) )
350355 } ) ?;
351- unsafe {
352- // SAFETY:
353- // - output is an array with output_shape elements of the item.representation data type,
354- // - item.subset is within the bounds of output_shape.
355- // - item.chunk_subset has the same number of elements as item.subset.
356- partial_decoder. partial_decode_into (
357- & item. chunk_subset ,
358- & output,
359- & output_shape,
360- & item. subset ,
361- & codec_options,
362- )
363- }
356+ partial_decoder. partial_decode_into (
357+ & chunk_subset,
358+ & mut output_view,
359+ & codec_options,
360+ )
364361 }
365362 . map_py_err :: < PyValueError > ( )
366363 } ;
0 commit comments