Skip to content

Commit e37610f

Browse files
selipotKevinShuman
andauthored
Fix datetime parsing by correcting day initialization in _parse_datetime_with_day_ratio (#548)
* Fix datetime parsing by correcting day initialization in _parse_datetime_with_day_ratio * Removes gdp1h experimental link (bad link) from test * Removes gdp1h experimental link (bad link) from test * Removes gdp1h experimental tmp directory * Addresses linting error * Enhance datetime parsing with timezone support and improve docstring for to_raggedarray function * Improve docstring for to_raggedarray function to specify NOAA as the source of GDP data --------- Co-authored-by: Kevin <[email protected]>
1 parent 45040b6 commit e37610f

File tree

2 files changed

+41
-8
lines changed

2 files changed

+41
-8
lines changed

clouddrift/adapters/gdp/gdpsource.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,13 @@ def _parse_datetime_with_day_ratio(
367367
dayratio = day_with_ratio - day
368368
seconds = dayratio * _SECONDS_IN_DAY
369369
dt_ns = (
370-
datetime.datetime(year=int(year), month=int(month), day=int(1))
371-
+ datetime.timedelta(days=int(day), seconds=seconds)
370+
datetime.datetime(
371+
year=int(year),
372+
month=int(month),
373+
day=int(day),
374+
tzinfo=datetime.timezone.utc,
375+
)
376+
+ datetime.timedelta(seconds=seconds)
372377
).timestamp() * 10**9
373378
values.append(int(dt_ns))
374379
return np.array(values).astype("datetime64[ns]")
@@ -603,7 +608,39 @@ def to_raggedarray(
603608
use_fill_values: bool = True,
604609
max_chunks: int | None = None,
605610
) -> xr.Dataset:
606-
"""Get the GDP source dataset."""
611+
"""
612+
Convert GDP source data into a ragged array format and return it as an xarray Dataset.
613+
614+
This function processes drifter data from the NOAA GDP (Global Drifter Program) source,
615+
organizes it into a ragged array format, and returns the resulting dataset. It
616+
supports downloading, filtering, and parallel processing of the data.
617+
618+
Args:
619+
tmp_path (str): Path to the temporary directory for storing downloaded files.
620+
Defaults to `_TMP_PATH`.
621+
skip_download (bool): If True, skips downloading the data and assumes it is
622+
already available in `tmp_path`. Defaults to False.
623+
max (int | None): Maximum number of requests to process for testing purposes.
624+
If None, processes all requests. Defaults to None.
625+
chunk_size (int): Number of observations to process in each chunk. Defaults to 100,000.
626+
use_fill_values (bool): Whether to use fill values for missing data. Defaults to True.
627+
max_chunks (int | None): Maximum number of chunks to process. If None, processes all
628+
chunks. Defaults to None.
629+
630+
Returns:
631+
xr.Dataset: An xarray Dataset containing the processed GDP drifter data in a
632+
ragged array format. The dataset includes both observation and
633+
trajectory metadata variables, with appropriate attributes added.
634+
635+
Raises:
636+
Any exceptions raised during file operations, data processing, or async tasks
637+
will propagate to the caller.
638+
639+
Notes:
640+
- The function performs parallel processing of drifter data using asyncio.
641+
- The resulting dataset is sorted by the start date of each drifter.
642+
- Metadata attributes for variables are added based on predefined mappings.
643+
"""
607644

608645
os.makedirs(tmp_path, exist_ok=True)
609646

tests/adapters/gdp/gdp1h_integ_tests.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ class gdp1h_integration_tests(testutils.DisableProgressTestCase):
1111
def test_load_subset_and_create_aggregate(self):
1212
test_tasks = [
1313
(gdp1h.GDP_TMP_PATH, gdp1h.GDP_DATA_URL),
14-
(gdp1h.GDP_TMP_PATH_EXPERIMENTAL, gdp1h.GDP_DATA_URL_EXPERIMENTAL),
1514
]
1615

1716
for path, url in test_tasks:
@@ -47,7 +46,4 @@ def test_load_subset_and_create_aggregate(self):
4746

4847
@classmethod
4948
def tearDownClass(cls):
50-
[
51-
shutil.rmtree(dir)
52-
for dir in [gdp1h.GDP_TMP_PATH, gdp1h.GDP_TMP_PATH_EXPERIMENTAL]
53-
]
49+
[shutil.rmtree(dir) for dir in [gdp1h.GDP_TMP_PATH]]

0 commit comments

Comments
 (0)