22This module provides functions to easily access ragged array datasets. If the datasets are
33not accessed via cloud storage platforms or are not found on the local filesystem,
44they will be downloaded from their upstream repositories and stored for later access
5- (~/.clouddrift for unix -based systems).
5+ (~/.clouddrift for UNIX -based systems).
66"""
7-
87from clouddrift import adapters
98import os
109import xarray as xr
1110
1211
13- def gdp1h () -> xr .Dataset :
12+ def gdp1h (decode_times : bool = True ) -> xr .Dataset :
1413 """Returns the latest version of the NOAA Global Drifter Program (GDP) hourly
1514 dataset as a ragged array Xarray dataset.
1615
1716 The data is accessed from zarr archive hosted on a public AWS S3 bucket accessible at
1817 https://registry.opendata.aws/noaa-oar-hourly-gdp/. Original data source from NOAA NCEI
1918 is https://doi.org/10.25921/x46c-3620).
2019
20+ Parameters
21+ ----------
22+ decode_times : bool, optional
23+ If True, decode the time coordinate into a datetime object. If False, the time
24+ coordinate will be an int64 or float64 array of increments since the origin
25+ time indicated in the units attribute. Default is True.
26+
2127 Returns
2228 -------
2329 xarray.Dataset
@@ -31,7 +37,7 @@ def gdp1h() -> xr.Dataset:
3137 <xarray.Dataset>
3238 Dimensions: (traj: 19396, obs: 197214787)
3339 Coordinates:
34- ids (obs ) int64 ...
40+ id (traj ) int64 ...
3541 time (obs) datetime64[ns] ...
3642 Dimensions without coordinates: traj, obs
3743 Data variables: (12/60)
@@ -68,17 +74,26 @@ def gdp1h() -> xr.Dataset:
6874 :func:`gdp6h`
6975 """
7076 url = "https://noaa-oar-hourly-gdp-pds.s3.amazonaws.com/latest/gdp-v2.01.zarr"
71- return xr .open_dataset (url , engine = "zarr" )
77+ ds = xr .open_dataset (url , engine = "zarr" , decode_times = decode_times )
78+ ds = ds .rename_vars ({"ID" : "id" }).assign_coords ({"id" : ds .ID }).drop_vars (["ids" ])
79+ return ds
7280
7381
74- def gdp6h () -> xr .Dataset :
82+ def gdp6h (decode_times : bool = True ) -> xr .Dataset :
7583 """Returns the NOAA Global Drifter Program (GDP) 6-hourly dataset as a ragged array
7684 Xarray dataset.
7785
7886 The data is accessed from a public HTTPS server at NOAA's Atlantic
7987 Oceanographic and Meteorological Laboratory (AOML) accessible at
8088 https://www.aoml.noaa.gov/phod/gdp/index.php.
8189
90+ Parameters
91+ ----------
92+ decode_times : bool, optional
93+ If True, decode the time coordinate into a datetime object. If False, the time
94+ coordinate will be an int64 or float64 array of increments since the origin
95+ time indicated in the units attribute. Default is True.
96+
8297 Returns
8398 -------
8499 xarray.Dataset
@@ -92,13 +107,12 @@ def gdp6h() -> xr.Dataset:
92107 <xarray.Dataset>
93108 Dimensions: (traj: 26843, obs: 44544647)
94109 Coordinates:
95- ids (obs ) int64 ...
110+ id (traj ) int64 ...
96111 time (obs) datetime64[ns] ...
97112 lon (obs) float32 ...
98113 lat (obs) float32 ...
99114 Dimensions without coordinates: traj, obs
100115 Data variables: (12/44)
101- ID (traj) int64 ...
102116 rowsize (traj) int32 ...
103117 WMO (traj) int32 ...
104118 expno (traj) int32 ...
@@ -131,19 +145,28 @@ def gdp6h() -> xr.Dataset:
131145 :func:`gdp1h`
132146 """
133147 url = "https://www.aoml.noaa.gov/ftp/pub/phod/buoydata/gdp_jul22_ragged_6h.nc#mode=bytes"
134- return xr .open_dataset (url )
148+ ds = xr .open_dataset (url , decode_times = decode_times )
149+ ds = ds .rename_vars ({"ID" : "id" }).assign_coords ({"id" : ds .ID }).drop_vars (["ids" ])
150+ return ds
135151
136152
137- def glad () -> xr .Dataset :
153+ def glad (decode_times : bool = True ) -> xr .Dataset :
138154 """Returns the Grand LAgrangian Deployment (GLAD) dataset as a ragged array
139- Xarray dataset.
155+ Xarray dataset.
140156
141157 The function will first look for the ragged-array dataset on the local
142158 filesystem. If it is not found, the dataset will be downloaded using the
143159 corresponding adapter function and stored for later access.
144160
145161 The upstream data is available at https://doi.org/10.7266/N7VD6WC8.
146162
163+ Parameters
164+ ----------
165+ decode_times : bool, optional
166+ If True, decode the time coordinate into a datetime object. If False, the time
167+ coordinate will be an int64 or float64 array of increments since the origin
168+ time indicated in the units attribute. Default is True.
169+
147170 Returns
148171 -------
149172 xarray.Dataset
@@ -157,8 +180,8 @@ def glad() -> xr.Dataset:
157180 <xarray.Dataset>
158181 Dimensions: (obs: 1602883, traj: 297)
159182 Coordinates:
160- * time (obs) datetime64[ns] 2012-07-20T01:15:00.143960 ... 2012- ...
161- * id (traj) object 'CARTHE_001' 'CARTHE_002' ... 'CARTHE_451'
183+ time (obs) datetime64[ns] ...
184+ id (traj) object ...
162185 Data variables:
163186 latitude (obs) float32 ...
164187 longitude (obs) float32 ...
@@ -190,11 +213,11 @@ def glad() -> xr.Dataset:
190213 os .makedirs (os .path .dirname (glad_path ), exist_ok = True )
191214 ds .to_netcdf (glad_path )
192215 else :
193- ds = xr .open_dataset (glad_path )
216+ ds = xr .open_dataset (glad_path , decode_times = decode_times )
194217 return ds
195218
196219
197- def mosaic () -> xr .Dataset :
220+ def mosaic (decode_times : bool = True ) -> xr .Dataset :
198221 """Returns the MOSAiC sea-ice drift dataset as a ragged array Xarray dataset.
199222
200223 The function will first look for the ragged-array dataset on the local
@@ -214,6 +237,13 @@ def mosaic() -> xr.Dataset:
214237 for the Study of Arctic Climate (MOSAiC) expedition 2019 - 2021. Arctic Data Center.
215238 doi:10.18739/A2KP7TS83.
216239
240+ Parameters
241+ ----------
242+ decode_times : bool, optional
243+ If True, decode the time coordinate into a datetime object. If False, the time
244+ coordinate will be an int64 or float64 array of increments since the origin
245+ time indicated in the units attribute. Default is True.
246+
217247 Returns
218248 -------
219249 xarray.Dataset
@@ -257,16 +287,23 @@ def mosaic() -> xr.Dataset:
257287 os .makedirs (os .path .dirname (mosaic_path ), exist_ok = True )
258288 ds .to_netcdf (mosaic_path )
259289 else :
260- ds = xr .open_dataset (mosaic_path )
290+ ds = xr .open_dataset (mosaic_path , decode_times = decode_times )
261291 return ds
262292
263293
264- def spotters () -> xr .Dataset :
265- """Returns the SOFAR ocean drifters ragged array dataset as an Xarray dataset.
294+ def spotters (decode_times : bool = True ) -> xr .Dataset :
295+ """Returns the Sofar Ocean Spotter drifters ragged array dataset as an Xarray dataset.
266296
267297 The data is accessed from a zarr archive hosted on a public AWS S3 bucket accessible
268298 at https://sofar-spotter-archive.s3.amazonaws.com/spotter_data_bulk_zarr.
269299
300+ Parameters
301+ ----------
302+ decode_times : bool, optional
303+ If True, decode the time coordinate into a datetime object. If False, the time
304+ coordinate will be an int64 or float64 array of increments since the origin
305+ time indicated in the units attribute. Default is True.
306+
270307 Returns
271308 -------
272309 xarray.Dataset
@@ -304,10 +341,10 @@ def spotters() -> xr.Dataset:
304341 title: Sofar Spotter Data Archive - Bulk Wave Parameters
305342 """
306343 url = "https://sofar-spotter-archive.s3.amazonaws.com/spotter_data_bulk_zarr"
307- return xr .open_dataset (url , engine = "zarr" )
344+ return xr .open_dataset (url , engine = "zarr" , decode_times = decode_times )
308345
309346
310- def subsurface_floats () -> xr .Dataset :
347+ def subsurface_floats (decode_times : bool = True ) -> xr .Dataset :
311348 """Returns the subsurface floats dataset as a ragged array Xarray dataset.
312349
313350 The data is accessed from a public HTTPS server at NOAA's Atlantic
@@ -335,6 +372,13 @@ def subsurface_floats() -> xr.Dataset:
335372 compiled in a single Matlab data set. See here for more information on the variables
336373 contained in these files.
337374
375+ Parameters
376+ ----------
377+ decode_times : bool, optional
378+ If True, decode the time coordinate into a datetime object. If False, the time
379+ coordinate will be an int64 or float64 array of increments since the origin
380+ time indicated in the units attribute. Default is True.
381+
338382 Returns
339383 -------
340384 xarray.Dataset
@@ -390,23 +434,24 @@ def subsurface_floats() -> xr.Dataset:
390434 print (f"{ local_file } not found; download from upstream repository." )
391435 ds = adapters .subsurface_floats .to_xarray ()
392436 else :
393- ds = xr .open_dataset (local_file )
437+ ds = xr .open_dataset (local_file , decode_times = decode_times )
394438 return ds
395439
396440
397- def yomaha () -> xr .Dataset :
441+ def yomaha (decode_times : bool = True ) -> xr .Dataset :
398442 """Returns the YoMaHa dataset as a ragged array Xarray dataset.
399443
400444 The function will first look for the ragged-array dataset on the local
401445 filesystem. If it is not found, the dataset will be downloaded using the
402446 corresponding adapter function and stored for later access. The upstream
403447 data is available at http://apdrc.soest.hawaii.edu/projects/yomaha/.
404448
405- Reference
406- ---------
407- Lebedev, K. V., Yoshinari, H., Maximenko, N. A., & Hacker, P. W. (2007). Velocity data
408- assessed from trajectories of Argo floats at parking level and at the sea
409- surface. IPRC Technical Note, 4(2), 1-16.
449+ Parameters
450+ ----------
451+ decode_times : bool, optional
452+ If True, decode the time coordinate into a datetime object. If False, the time
453+ coordinate will be an int64 or float64 array of increments since the origin
454+ time indicated in the units attribute. Default is True.
410455
411456 Returns
412457 -------
@@ -449,6 +494,12 @@ def yomaha() -> xr.Dataset:
449494 publisher_name: Asia-Pacific Data Research Center
450495 publisher_url: http://apdrc.soest.hawaii.edu/index.php
451496 license: Creative Commons Attribution 4.0 International License..
497+
498+ Reference
499+ ---------
500+ Lebedev, K. V., Yoshinari, H., Maximenko, N. A., & Hacker, P. W. (2007). Velocity data
501+ assessed from trajectories of Argo floats at parking level and at the sea
502+ surface. IPRC Technical Note, 4(2), 1-16.
452503 """
453504 clouddrift_path = (
454505 os .path .expanduser ("~/.clouddrift" )
@@ -462,23 +513,25 @@ def yomaha() -> xr.Dataset:
462513 os .makedirs (os .path .dirname (local_file ), exist_ok = True )
463514 ds .to_netcdf (local_file )
464515 else :
465- ds = xr .open_dataset (local_file )
516+ ds = xr .open_dataset (local_file , decode_times = decode_times )
466517 return ds
467518
468519
469- def andro () -> xr .Dataset :
520+ def andro (decode_times : bool = True ) -> xr .Dataset :
470521 """Returns the ANDRO as a ragged array Xarray dataset.
471522
472523 The function will first look for the ragged-array dataset on the local
473524 filesystem. If it is not found, the dataset will be downloaded using the
474525 corresponding adapter function and stored for later access. The upstream
475526 data is available at https://www.seanoe.org/data/00360/47077/.
476527
477- Reference
478- ---------
479- Ollitrault Michel, Rannou Philippe, Brion Emilie, Cabanes Cecile, Piron Anne, Reverdin Gilles,
480- Kolodziejczyk Nicolas (2022). ANDRO: An Argo-based deep displacement dataset.
481- SEANOE. https://doi.org/10.17882/47077
528+ Parameters
529+ ----------
530+ decode_times : bool, optional
531+ If True, decode the time coordinate into a datetime object. If False, the time
532+ coordinate will be an int64 or float64 array of increments since the origin
533+ time indicated in the units attribute. Default is True.
534+
482535 Returns
483536 -------
484537 xarray.Dataset
@@ -518,6 +571,12 @@ def andro() -> xr.Dataset:
518571 publisher_name: SEANOE (SEA scieNtific Open data Edition)
519572 publisher_url: https://www.seanoe.org/data/00360/47077/
520573 license: freely available
574+
575+ Reference
576+ ---------
577+ Ollitrault Michel, Rannou Philippe, Brion Emilie, Cabanes Cecile, Piron Anne, Reverdin Gilles,
578+ Kolodziejczyk Nicolas (2022). ANDRO: An Argo-based deep displacement dataset.
579+ SEANOE. https://doi.org/10.17882/47077
521580 """
522581 clouddrift_path = (
523582 os .path .expanduser ("~/.clouddrift" )
@@ -531,5 +590,5 @@ def andro() -> xr.Dataset:
531590 os .makedirs (os .path .dirname (local_file ), exist_ok = True )
532591 ds .to_netcdf (local_file )
533592 else :
534- ds = xr .open_dataset (local_file )
593+ ds = xr .open_dataset (local_file , decode_times = decode_times )
535594 return ds
0 commit comments