Cloud-Drift
diff --git a/‎clouddrift/adapters/gdp.py‎
Lines changed: 2 additions & 2 deletions b/‎clouddrift/adapters/gdp.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clouddrift/adapters/gdp1h.py‎
Lines changed: 8 additions & 1 deletion b/‎clouddrift/adapters/gdp1h.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎clouddrift/adapters/gdp6h.py‎
Lines changed: 9 additions & 2 deletions b/‎clouddrift/adapters/gdp6h.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎clouddrift/analysis.py‎
Lines changed: 56 additions & 58 deletions b/‎clouddrift/analysis.py‎
Lines changed: 56 additions & 58 deletions
@@ -20,7 +20,7 @@
 
 GDP_METADATA = [
     "ID",
-    "rowsize",
+    "count",
     "WMO",
     "expno",
     "deploy_date",
@@ -270,7 +270,7 @@ def drogue_presence(lost_time, time) -> bool:
         return time < lost_time
 
 
-def rowsize(index: int, **kwargs) -> int:
+def count(index: int, **kwargs) -> int:
     try:
         return xr.open_dataset(
             os.path.join(
 
@@ -486,6 +486,13 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
     # rename variables
     ds = ds.rename_vars({"longitude": "lon", "latitude": "lat"})
 
+    if "rowsize" in ds.variables:
+        ds = ds.rename_vars({"rowsize": "count"})
+    else:
+        warnings.warn(
+            "Variable rowsize not found in upstream GDP data; has it been renamed?"
+        )
+
     return ds
 
 
@@ -566,7 +573,7 @@ def to_raggedarray(
         name_coords=gdp.GDP_COORDS,
         name_meta=gdp.GDP_METADATA,
         name_data=GDP_DATA,
-        rowsize_func=gdp.rowsize,
+        count_func=gdp.count,
         filename_pattern=filename_pattern,
         tmp_path=GDP_TMP_PATH,
     )
@@ -273,7 +273,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
             "long_name": "Global Drifter Program Buoy ID repeated along observations",
             "units": "-",
         },
-        "rowsize": {
+        "count": {
             "long_name": "Number of observations per trajectory",
             "sample_dimension": "obs",
             "units": "-",
@@ -418,6 +418,13 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
     # rename variables
     ds = ds.rename_vars({"longitude": "lon", "latitude": "lat"})
 
+    if "rowsize" in ds.variables:
+        ds = ds.rename_vars({"rowsize": "count"})
+    else:
+        warnings.warn(
+            "Variable rowsize not found in upstream GDP data; has it been renamed?"
+        )
+
     return ds
 
 
@@ -481,7 +488,7 @@ def to_raggedarray(
         name_coords=gdp.GDP_COORDS,
         name_meta=gdp.GDP_METADATA,
         name_data=GDP_DATA,
-        rowsize_func=gdp.rowsize,
+        count_func=gdp.count,
         filename_pattern="drifter_{id}.nc",
         tmp_path=GDP_TMP_PATH,
     )
@@ -16,15 +16,15 @@
 def apply_ragged(
     func: callable,
     arrays: list[np.ndarray],
-    rowsize: list[int],
+    count: list[int],
     *args: tuple,
     executor: futures.Executor = futures.ThreadPoolExecutor(max_workers=None),
     **kwargs: dict,
 ) -> Union[tuple[np.ndarray], np.ndarray]:
     """Apply a function to a ragged array.
 
     The function ``func`` will be applied to each contiguous row of ``arrays`` as
-    indicated by row sizes ``rowsize``. The output of ``func`` will be
+    indicated by row sizes ``count``. The output of ``func`` will be
     concatenated into a single ragged array.
 
     By default this function uses ``concurrent.futures.ThreadPoolExecutor`` to
@@ -41,7 +41,7 @@ def apply_ragged(
         Function to apply to each row of each ragged array in ``arrays``.
     arrays : list[np.ndarray] or np.ndarray
         An array or a list of arrays to apply ``func`` to.
-    rowsize : list
+    count : list
         List of integers specifying the number of data points in each row.
     *args : tuple
         Additional arguments to pass to ``func``.
@@ -64,31 +64,31 @@ def apply_ragged(
     multiple particles, the coordinates of which are found in the ragged arrays x, y, and t
     that share row sizes 2, 3, and 4:
 
-    >>> rowsize = [2, 3, 4]
+    >>> count = [2, 3, 4]
     >>> x = np.array([1, 2, 10, 12, 14, 30, 33, 36, 39])
     >>> y = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
     >>> t = np.array([1, 2, 1, 2, 3, 1, 2, 3, 4])
-    >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, coord_system="cartesian")
+    >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], count, coord_system="cartesian")
     array([1., 1., 2., 2., 2., 3., 3., 3., 3.]),
     array([1., 1., 1., 1., 1., 1., 1., 1., 1.]))
 
     Raises
     ------
     ValueError
-        If the sum of ``rowsize`` does not equal the length of ``arrays``.
+        If the sum of ``count`` does not equal the length of ``arrays``.
     IndexError
         If empty ``arrays``.
     """
     # make sure the arrays is iterable
     if type(arrays) not in [list, tuple]:
         arrays = [arrays]
-    # validate rowsize
+    # validate count
     for arr in arrays:
-        if not sum(rowsize) == len(arr):
-            raise ValueError("The sum of rowsize must equal the length of arr.")
+        if not sum(count) == len(arr):
+            raise ValueError("The sum of count must equal the length of arr.")
 
     # split the array(s) into trajectories
-    arrays = [unpack_ragged(arr, rowsize) for arr in arrays]
+    arrays = [unpack_ragged(arr, count) for arr in arrays]
     iter = [[arrays[i][j] for i in range(len(arrays))] for j in range(len(arrays[0]))]
 
     # parallel execution
@@ -180,8 +180,8 @@ def chunk(
     notice that you must pass the array to chunk as an array-like, not a list:
 
     >>> x = np.array([1, 2, 3, 4, 5])
-    >>> rowsize = [2, 1, 2]
-    >>> apply_ragged(chunk, x, rowsize, 2)
+    >>> count = [2, 1, 2]
+    >>> apply_ragged(chunk, x, count, 2)
     array([[1, 2],
            [4, 5]])
 
@@ -217,18 +217,18 @@ def chunk(
 
 def prune(
     ragged: Union[list, np.ndarray, pd.Series, xr.DataArray],
-    rowsize: Union[list, np.ndarray, pd.Series, xr.DataArray],
-    min_rowsize: float,
+    count: Union[list, np.ndarray, pd.Series, xr.DataArray],
+    min_count: float,
 ) -> Tuple[np.ndarray, np.ndarray]:
     """Within a ragged array, removes arrays less than a specified row size.
 
     Parameters
     ----------
     ragged : np.ndarray or pd.Series or xr.DataArray
         A ragged array.
-    rowsize : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
+    count : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
         The size of each row in the input ragged array.
-    min_rowsize :
+    min_count :
         The minimum row size that will be kept.
 
     Returns
@@ -244,7 +244,7 @@ def prune(
     Raises
     ------
     ValueError
-        If the sum of ``rowsize`` does not equal the length of ``arrays``.
+        If the sum of ``count`` does not equal the length of ``arrays``.
     IndexError
         If empty ``ragged``.
 
@@ -256,17 +256,17 @@ def prune(
     ragged = apply_ragged(
         lambda x, min_len: x if len(x) >= min_len else np.empty(0, dtype=x.dtype),
         np.array(ragged),
-        rowsize,
-        min_len=min_rowsize,
+        count,
+        min_len=min_count,
     )
-    rowsize = apply_ragged(
+    count = apply_ragged(
         lambda x, min_len: x if x >= min_len else np.empty(0, dtype=x.dtype),
-        np.array(rowsize),
-        np.ones_like(rowsize),
-        min_len=min_rowsize,
+        np.array(count),
+        np.ones_like(count),
+        min_len=min_count,
     )
 
-    return ragged, rowsize
+    return ragged, count
 
 
 def regular_to_ragged(
@@ -313,14 +313,14 @@ def regular_to_ragged(
 
 def ragged_to_regular(
     ragged: Union[np.ndarray, pd.Series, xr.DataArray],
-    rowsize: Union[list, np.ndarray, pd.Series, xr.DataArray],
+    count: Union[list, np.ndarray, pd.Series, xr.DataArray],
     fill_value: float = np.nan,
 ) -> np.ndarray:
     """Convert a ragged array to a two-dimensional array such that each contiguous segment
     of a ragged array is a row in the two-dimensional array. Each row of the two-dimensional
     array is padded with NaNs as needed. The length of the first dimension of the output
-    array is the length of ``rowsize``. The length of the second dimension is the maximum
-    element of ``rowsize``.
+    array is the length of ``count``. The length of the second dimension is the maximum
+    element of ``count``.
 
     Note: Although this function accepts parameters of type ``xarray.DataArray``,
     passing NumPy arrays is recommended for performance reasons.
@@ -329,7 +329,7 @@ def ragged_to_regular(
     ----------
     ragged : np.ndarray or pd.Series or xr.DataArray
         A ragged array.
-    rowsize : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
+    count : list or np.ndarray[int] or pd.Series or xr.DataArray[int]
         The size of each row in the ragged array.
     fill_value : float, optional
         Fill value to use for the trailing elements of each row of the resulting
@@ -359,17 +359,17 @@ def ragged_to_regular(
     --------
     :func:`regular_to_ragged`
     """
-    res = fill_value * np.ones((len(rowsize), int(max(rowsize))), dtype=ragged.dtype)
-    unpacked = unpack_ragged(ragged, rowsize)
-    for n in range(len(rowsize)):
-        res[n, : int(rowsize[n])] = unpacked[n]
+    res = fill_value * np.ones((len(count), int(max(count))), dtype=ragged.dtype)
+    unpacked = unpack_ragged(ragged, count)
+    for n in range(len(count)):
+        res[n, : int(count[n])] = unpacked[n]
     return res
 
 
 def segment(
     x: np.ndarray,
     tolerance: Union[float, np.timedelta64, timedelta, pd.Timedelta],
-    rowsize: np.ndarray[int] = None,
+    count: np.ndarray[int] = None,
 ) -> np.ndarray[int]:
     """Divide an array into segments based on a tolerance value.
 
@@ -380,7 +380,7 @@ def segment(
     tolerance : float, np.timedelta64, timedelta, pd.Timedelta
         The maximum signed difference between consecutive points in a segment.
         The array x will be segmented wherever differences exceed the tolerance.
-    rowsize : np.ndarray[int], optional
+    count : np.ndarray[int], optional
         The size of rows if x is originally a ragged array. If present, x will be
         divided both by gaps that exceed the tolerance, and by the original rows
         of the ragged array.
@@ -401,12 +401,12 @@ def segment(
     array([1, 3, 2, 4, 1])
 
     If the array is already previously segmented (e.g. multiple rows in
-    a ragged array), then the ``rowsize`` argument can be used to preserve
+    a ragged array), then the ``count`` argument can be used to preserve
     the original segments:
 
     >>> x = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4]
-    >>> rowsize = [3, 2, 6]
-    >>> segment(x, 0.5, rowsize)
+    >>> count = [3, 2, 6]
+    >>> segment(x, 0.5, count)
     array([1, 2, 1, 1, 1, 4, 1])
 
     The tolerance can also be negative. In this case, the input array is
@@ -419,11 +419,11 @@ def segment(
 
     To segment an array for both positive and negative gaps, invoke the function
     twice, once for a positive tolerance and once for a negative tolerance.
-    The result of the first invocation can be passed as the ``rowsize`` argument
+    The result of the first invocation can be passed as the ``count`` argument
     to the first ``segment`` invocation:
 
     >>> x = [1, 1, 2, 2, 1, 1, 2, 2]
-    >>> segment(x, 0.5, rowsize=segment(x, -0.5))
+    >>> segment(x, 0.5, count=segment(x, -0.5))
     array([2, 2, 2, 2])
 
     If the input array contains time objects, the tolerance must be a time interval:
@@ -444,7 +444,7 @@ def segment(
     else:
         positive_tol = tolerance >= 0
 
-    if rowsize is None:
+    if count is None:
         if positive_tol:
             exceeds_tolerance = np.diff(x) > tolerance
         else:
@@ -453,11 +453,11 @@ def segment(
         segment_sizes = np.append(segment_sizes, len(x) - np.sum(segment_sizes))
         return segment_sizes
     else:
-        if not sum(rowsize) == len(x):
-            raise ValueError("The sum of rowsize must equal the length of x.")
+        if not sum(count) == len(x):
+            raise ValueError("The sum of count must equal the length of x.")
         segment_sizes = []
         start = 0
-        for r in rowsize:
+        for r in count:
             end = start + int(r)
             segment_sizes.append(segment(x[start:end], tolerance))
             start = end
@@ -990,7 +990,7 @@ def subset(ds: xr.Dataset, criteria: dict) -> xr.Dataset:
             raise ValueError(f"Unknown variable '{key}'.")
 
     # remove data when trajectories are filtered
-    traj_idx = np.insert(np.cumsum(ds["rowsize"].values), 0, 0)
+    traj_idx = np.insert(np.cumsum(ds["count"].values), 0, 0)
     for i in np.where(~mask_traj)[0]:
         mask_obs[slice(traj_idx[i], traj_idx[i + 1])] = False
 
@@ -1005,16 +1005,14 @@ def subset(ds: xr.Dataset, criteria: dict) -> xr.Dataset:
     else:
         # apply the filtering for both dimensions
         ds_sub = ds.isel({"traj": mask_traj, "obs": mask_obs})
-        # update the rowsize
-        ds_sub["rowsize"].values = segment(
-            ds_sub.ids, 0.5, rowsize=segment(ds_sub.ids, -0.5)
+        # update the count
+        ds_sub["count"].values = segment(
+            ds_sub.ids, 0.5, count=segment(ds_sub.ids, -0.5)
         )
         return ds_sub
 
 
-def unpack_ragged(
-    ragged_array: np.ndarray, rowsize: np.ndarray[int]
-) -> list[np.ndarray]:
+def unpack_ragged(ragged_array: np.ndarray, count: np.ndarray[int]) -> list[np.ndarray]:
     """Unpack a ragged array into a list of regular arrays.
 
     Unpacking a ``np.ndarray`` ragged array is about 2 orders of magnitude
@@ -1025,15 +1023,15 @@ def unpack_ragged(
     ----------
     ragged_array : array-like
         A ragged_array to unpack
-    rowsize : array-like
+    count : array-like
         An array of integers whose values is the size of each row in the ragged
         array
 
     Returns
     -------
     list
         A list of array-likes with sizes that correspond to the values in
-        rowsize, and types that correspond to the type of ragged_array
+        count, and types that correspond to the type of ragged_array
 
     Examples
     --------
@@ -1042,20 +1040,20 @@ def unpack_ragged(
 
     .. code-block:: python
 
-        lon = unpack_ragged(ds.lon, ds.rowsize) # return a list[xr.DataArray] (slower)
-        lon = unpack_ragged(ds.lon.values, ds.rowsize) # return a list[np.ndarray] (faster)
+        lon = unpack_ragged(ds.lon, ds["count"]) # return a list[xr.DataArray] (slower)
+        lon = unpack_ragged(ds.lon.values, ds["count"]) # return a list[np.ndarray] (faster)
 
     Looping over trajectories in a ragged Xarray Dataset to compute velocities
     for each:
 
     .. code-block:: python
 
         for lon, lat, time in list(zip(
-            unpack_ragged(ds.lon.values, ds.rowsize),
-            unpack_ragged(ds.lat.values, ds.rowsize),
-            unpack_ragged(ds.time.values, ds.rowsize)
+            unpack_ragged(ds.lon.values, ds["count"]),
+            unpack_ragged(ds.lat.values, ds["count"]),
+            unpack_ragged(ds.time.values, ds["count"])
         )):
             u, v = velocity_from_position(lon, lat, time)
     """
-    indices = np.insert(np.cumsum(np.array(rowsize)), 0, 0)
+    indices = np.insert(np.cumsum(np.array(count)), 0, 0)
     return [ragged_array[indices[n] : indices[n + 1]] for n in range(indices.size - 1)]