1616def apply_ragged (
1717 func : callable ,
1818 arrays : list [np .ndarray ],
19- rowsize : list [int ],
19+ count : list [int ],
2020 * args : tuple ,
2121 executor : futures .Executor = futures .ThreadPoolExecutor (max_workers = None ),
2222 ** kwargs : dict ,
2323) -> Union [tuple [np .ndarray ], np .ndarray ]:
2424 """Apply a function to a ragged array.
2525
2626 The function ``func`` will be applied to each contiguous row of ``arrays`` as
27- indicated by row sizes ``rowsize ``. The output of ``func`` will be
27+ indicated by row sizes ``count ``. The output of ``func`` will be
2828 concatenated into a single ragged array.
2929
3030 By default this function uses ``concurrent.futures.ThreadPoolExecutor`` to
@@ -41,7 +41,7 @@ def apply_ragged(
4141 Function to apply to each row of each ragged array in ``arrays``.
4242 arrays : list[np.ndarray] or np.ndarray
4343 An array or a list of arrays to apply ``func`` to.
44- rowsize : list
44+ count : list
4545 List of integers specifying the number of data points in each row.
4646 *args : tuple
4747 Additional arguments to pass to ``func``.
@@ -64,31 +64,31 @@ def apply_ragged(
6464 multiple particles, the coordinates of which are found in the ragged arrays x, y, and t
6565 that share row sizes 2, 3, and 4:
6666
67- >>> rowsize = [2, 3, 4]
67+ >>> count = [2, 3, 4]
6868 >>> x = np.array([1, 2, 10, 12, 14, 30, 33, 36, 39])
6969 >>> y = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
7070 >>> t = np.array([1, 2, 1, 2, 3, 1, 2, 3, 4])
71- >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize , coord_system="cartesian")
71+ >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], count , coord_system="cartesian")
7272 array([1., 1., 2., 2., 2., 3., 3., 3., 3.]),
7373 array([1., 1., 1., 1., 1., 1., 1., 1., 1.]))
7474
7575 Raises
7676 ------
7777 ValueError
78- If the sum of ``rowsize `` does not equal the length of ``arrays``.
78+ If the sum of ``count `` does not equal the length of ``arrays``.
7979 IndexError
8080 If empty ``arrays``.
8181 """
8282 # make sure the arrays is iterable
8383 if type (arrays ) not in [list , tuple ]:
8484 arrays = [arrays ]
85- # validate rowsize
85+ # validate count
8686 for arr in arrays :
87- if not sum (rowsize ) == len (arr ):
88- raise ValueError ("The sum of rowsize must equal the length of arr." )
87+ if not sum (count ) == len (arr ):
88+ raise ValueError ("The sum of count must equal the length of arr." )
8989
9090 # split the array(s) into trajectories
91- arrays = [unpack_ragged (arr , rowsize ) for arr in arrays ]
91+ arrays = [unpack_ragged (arr , count ) for arr in arrays ]
9292 iter = [[arrays [i ][j ] for i in range (len (arrays ))] for j in range (len (arrays [0 ]))]
9393
9494 # parallel execution
@@ -180,8 +180,8 @@ def chunk(
180180 notice that you must pass the array to chunk as an array-like, not a list:
181181
182182 >>> x = np.array([1, 2, 3, 4, 5])
183- >>> rowsize = [2, 1, 2]
184- >>> apply_ragged(chunk, x, rowsize , 2)
183+ >>> count = [2, 1, 2]
184+ >>> apply_ragged(chunk, x, count , 2)
185185 array([[1, 2],
186186 [4, 5]])
187187
@@ -217,18 +217,18 @@ def chunk(
217217
218218def prune (
219219 ragged : Union [list , np .ndarray , pd .Series , xr .DataArray ],
220- rowsize : Union [list , np .ndarray , pd .Series , xr .DataArray ],
221- min_rowsize : float ,
220+ count : Union [list , np .ndarray , pd .Series , xr .DataArray ],
221+ min_count : float ,
222222) -> Tuple [np .ndarray , np .ndarray ]:
223223 """Within a ragged array, removes arrays less than a specified row size.
224224
225225 Parameters
226226 ----------
227227 ragged : np.ndarray or pd.Series or xr.DataArray
228228 A ragged array.
229- rowsize : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
229+ count : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
230230 The size of each row in the input ragged array.
231- min_rowsize :
231+ min_count :
232232 The minimum row size that will be kept.
233233
234234 Returns
@@ -244,7 +244,7 @@ def prune(
244244 Raises
245245 ------
246246 ValueError
247- If the sum of ``rowsize `` does not equal the length of ``arrays``.
247+ If the sum of ``count `` does not equal the length of ``arrays``.
248248 IndexError
249249 If empty ``ragged``.
250250
@@ -256,17 +256,17 @@ def prune(
256256 ragged = apply_ragged (
257257 lambda x , min_len : x if len (x ) >= min_len else np .empty (0 , dtype = x .dtype ),
258258 np .array (ragged ),
259- rowsize ,
260- min_len = min_rowsize ,
259+ count ,
260+ min_len = min_count ,
261261 )
262- rowsize = apply_ragged (
262+ count = apply_ragged (
263263 lambda x , min_len : x if x >= min_len else np .empty (0 , dtype = x .dtype ),
264- np .array (rowsize ),
265- np .ones_like (rowsize ),
266- min_len = min_rowsize ,
264+ np .array (count ),
265+ np .ones_like (count ),
266+ min_len = min_count ,
267267 )
268268
269- return ragged , rowsize
269+ return ragged , count
270270
271271
272272def regular_to_ragged (
@@ -313,14 +313,14 @@ def regular_to_ragged(
313313
314314def ragged_to_regular (
315315 ragged : Union [np .ndarray , pd .Series , xr .DataArray ],
316- rowsize : Union [list , np .ndarray , pd .Series , xr .DataArray ],
316+ count : Union [list , np .ndarray , pd .Series , xr .DataArray ],
317317 fill_value : float = np .nan ,
318318) -> np .ndarray :
319319 """Convert a ragged array to a two-dimensional array such that each contiguous segment
320320 of a ragged array is a row in the two-dimensional array. Each row of the two-dimensional
321321 array is padded with NaNs as needed. The length of the first dimension of the output
322- array is the length of ``rowsize ``. The length of the second dimension is the maximum
323- element of ``rowsize ``.
322+ array is the length of ``count ``. The length of the second dimension is the maximum
323+ element of ``count ``.
324324
325325 Note: Although this function accepts parameters of type ``xarray.DataArray``,
326326 passing NumPy arrays is recommended for performance reasons.
@@ -329,7 +329,7 @@ def ragged_to_regular(
329329 ----------
330330 ragged : np.ndarray or pd.Series or xr.DataArray
331331 A ragged array.
332- rowsize : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
332+ count : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
333333 The size of each row in the ragged array.
334334 fill_value : float, optional
335335 Fill value to use for the trailing elements of each row of the resulting
@@ -359,17 +359,17 @@ def ragged_to_regular(
359359 --------
360360 :func:`regular_to_ragged`
361361 """
362- res = fill_value * np .ones ((len (rowsize ), int (max (rowsize ))), dtype = ragged .dtype )
363- unpacked = unpack_ragged (ragged , rowsize )
364- for n in range (len (rowsize )):
365- res [n , : int (rowsize [n ])] = unpacked [n ]
362+ res = fill_value * np .ones ((len (count ), int (max (count ))), dtype = ragged .dtype )
363+ unpacked = unpack_ragged (ragged , count )
364+ for n in range (len (count )):
365+ res [n , : int (count [n ])] = unpacked [n ]
366366 return res
367367
368368
369369def segment (
370370 x : np .ndarray ,
371371 tolerance : Union [float , np .timedelta64 , timedelta , pd .Timedelta ],
372- rowsize : np .ndarray [int ] = None ,
372+ count : np .ndarray [int ] = None ,
373373) -> np .ndarray [int ]:
374374 """Divide an array into segments based on a tolerance value.
375375
@@ -380,7 +380,7 @@ def segment(
380380 tolerance : float, np.timedelta64, timedelta, pd.Timedelta
381381 The maximum signed difference between consecutive points in a segment.
382382 The array x will be segmented wherever differences exceed the tolerance.
383- rowsize : np.ndarray[int], optional
383+ count : np.ndarray[int], optional
384384 The size of rows if x is originally a ragged array. If present, x will be
385385 divided both by gaps that exceed the tolerance, and by the original rows
386386 of the ragged array.
@@ -401,12 +401,12 @@ def segment(
401401 array([1, 3, 2, 4, 1])
402402
403403 If the array is already previously segmented (e.g. multiple rows in
404- a ragged array), then the ``rowsize `` argument can be used to preserve
404+ a ragged array), then the ``count `` argument can be used to preserve
405405 the original segments:
406406
407407 >>> x = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4]
408- >>> rowsize = [3, 2, 6]
409- >>> segment(x, 0.5, rowsize )
408+ >>> count = [3, 2, 6]
409+ >>> segment(x, 0.5, count )
410410 array([1, 2, 1, 1, 1, 4, 1])
411411
412412 The tolerance can also be negative. In this case, the input array is
@@ -419,11 +419,11 @@ def segment(
419419
420420 To segment an array for both positive and negative gaps, invoke the function
421421 twice, once for a positive tolerance and once for a negative tolerance.
422- The result of the first invocation can be passed as the ``rowsize `` argument
422+ The result of the first invocation can be passed as the ``count `` argument
423423 to the first ``segment`` invocation:
424424
425425 >>> x = [1, 1, 2, 2, 1, 1, 2, 2]
426- >>> segment(x, 0.5, rowsize =segment(x, -0.5))
426+ >>> segment(x, 0.5, count =segment(x, -0.5))
427427 array([2, 2, 2, 2])
428428
429429 If the input array contains time objects, the tolerance must be a time interval:
@@ -444,7 +444,7 @@ def segment(
444444 else :
445445 positive_tol = tolerance >= 0
446446
447- if rowsize is None :
447+ if count is None :
448448 if positive_tol :
449449 exceeds_tolerance = np .diff (x ) > tolerance
450450 else :
@@ -453,11 +453,11 @@ def segment(
453453 segment_sizes = np .append (segment_sizes , len (x ) - np .sum (segment_sizes ))
454454 return segment_sizes
455455 else :
456- if not sum (rowsize ) == len (x ):
457- raise ValueError ("The sum of rowsize must equal the length of x." )
456+ if not sum (count ) == len (x ):
457+ raise ValueError ("The sum of count must equal the length of x." )
458458 segment_sizes = []
459459 start = 0
460- for r in rowsize :
460+ for r in count :
461461 end = start + int (r )
462462 segment_sizes .append (segment (x [start :end ], tolerance ))
463463 start = end
@@ -990,7 +990,7 @@ def subset(ds: xr.Dataset, criteria: dict) -> xr.Dataset:
990990 raise ValueError (f"Unknown variable '{ key } '." )
991991
992992 # remove data when trajectories are filtered
993- traj_idx = np .insert (np .cumsum (ds ["rowsize " ].values ), 0 , 0 )
993+ traj_idx = np .insert (np .cumsum (ds ["count " ].values ), 0 , 0 )
994994 for i in np .where (~ mask_traj )[0 ]:
995995 mask_obs [slice (traj_idx [i ], traj_idx [i + 1 ])] = False
996996
@@ -1005,16 +1005,14 @@ def subset(ds: xr.Dataset, criteria: dict) -> xr.Dataset:
10051005 else :
10061006 # apply the filtering for both dimensions
10071007 ds_sub = ds .isel ({"traj" : mask_traj , "obs" : mask_obs })
1008- # update the rowsize
1009- ds_sub ["rowsize " ].values = segment (
1010- ds_sub .ids , 0.5 , rowsize = segment (ds_sub .ids , - 0.5 )
1008+ # update the count
1009+ ds_sub ["count " ].values = segment (
1010+ ds_sub .ids , 0.5 , count = segment (ds_sub .ids , - 0.5 )
10111011 )
10121012 return ds_sub
10131013
10141014
1015- def unpack_ragged (
1016- ragged_array : np .ndarray , rowsize : np .ndarray [int ]
1017- ) -> list [np .ndarray ]:
1015+ def unpack_ragged (ragged_array : np .ndarray , count : np .ndarray [int ]) -> list [np .ndarray ]:
10181016 """Unpack a ragged array into a list of regular arrays.
10191017
10201018 Unpacking a ``np.ndarray`` ragged array is about 2 orders of magnitude
@@ -1025,15 +1023,15 @@ def unpack_ragged(
10251023 ----------
10261024 ragged_array : array-like
10271025 A ragged_array to unpack
1028- rowsize : array-like
1026+ count : array-like
10291027 An array of integers whose values is the size of each row in the ragged
10301028 array
10311029
10321030 Returns
10331031 -------
10341032 list
10351033 A list of array-likes with sizes that correspond to the values in
1036- rowsize , and types that correspond to the type of ragged_array
1034+ count , and types that correspond to the type of ragged_array
10371035
10381036 Examples
10391037 --------
@@ -1042,20 +1040,20 @@ def unpack_ragged(
10421040
10431041 .. code-block:: python
10441042
1045- lon = unpack_ragged(ds.lon, ds.rowsize ) # return a list[xr.DataArray] (slower)
1046- lon = unpack_ragged(ds.lon.values, ds.rowsize ) # return a list[np.ndarray] (faster)
1043+ lon = unpack_ragged(ds.lon, ds["count"] ) # return a list[xr.DataArray] (slower)
1044+ lon = unpack_ragged(ds.lon.values, ds["count"] ) # return a list[np.ndarray] (faster)
10471045
10481046 Looping over trajectories in a ragged Xarray Dataset to compute velocities
10491047 for each:
10501048
10511049 .. code-block:: python
10521050
10531051 for lon, lat, time in list(zip(
1054- unpack_ragged(ds.lon.values, ds.rowsize ),
1055- unpack_ragged(ds.lat.values, ds.rowsize ),
1056- unpack_ragged(ds.time.values, ds.rowsize )
1052+ unpack_ragged(ds.lon.values, ds["count"] ),
1053+ unpack_ragged(ds.lat.values, ds["count"] ),
1054+ unpack_ragged(ds.time.values, ds["count"] )
10571055 )):
10581056 u, v = velocity_from_position(lon, lat, time)
10591057 """
1060- indices = np .insert (np .cumsum (np .array (rowsize )), 0 , 0 )
1058+ indices = np .insert (np .cumsum (np .array (count )), 0 , 0 )
10611059 return [ragged_array [indices [n ] : indices [n + 1 ]] for n in range (indices .size - 1 )]
0 commit comments