Skip to content

Commit 9fb443a

Browse files
b8raoultfloriankrbanaprietonem
authored
feat: fetch files from ecfs if path starts with ec: or ectmp: (#585)
## Description Fetch files from ecfs if path starts with ec: or ectmp: when creating datasets ## What problem does this change solve? <!-- Describe if it's a bugfix, new feature, doc update, or breaking change --> ## What issue or task does this change relate to? <!-- link to Issue Number --> ## Additional notes ## <!-- Include any additional information, caveats, or considerations that the reviewer should be aware of. --> ***As a contributor to the Anemoi framework, please ensure that your changes include unit tests, updates to any affected dependencies and documentation, and have been tested in a parallel setting (i.e., with multiple GPUs). As a reviewer, you are also responsible for verifying these aspects and requesting changes if they are not adequately addressed. For guidelines about those please refer to https://anemoi.readthedocs.io/en/latest/*** By opening this pull request, I affirm that all authors agree to the [Contributor License Agreement.](https://github.com/ecmwf/codex/blob/main/Legal/contributor_license_agreement.md) --------- Co-authored-by: Florian Pinault <[email protected]> Co-authored-by: anaprietonem <[email protected]> Co-authored-by: Ana Prieto Nemesio <[email protected]>
1 parent f434a15 commit 9fb443a

3 files changed

Lines changed: 41 additions & 0 deletions

File tree

src/anemoi/datasets/create/ecfs.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# (C) Copyright 2026- Anemoi contributors.
2+
#
3+
# This software is licensed under the terms of the Apache Licence Version 2.0
4+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5+
#
6+
# In applying this licence, ECMWF does not waive the privileges and immunities
7+
# granted to it by virtue of its status as an intergovernmental organisation
8+
# nor does it submit to any jurisdiction.
9+
10+
import hashlib
11+
import logging
12+
import os
13+
import subprocess
14+
import tempfile
15+
16+
TMPDIR = None
17+
LOG = logging.getLogger(__name__)
18+
19+
20+
def get_ecfs_file(path: str) -> str:
21+
global TMPDIR
22+
if TMPDIR is None:
23+
TMPDIR = tempfile.mkdtemp()
24+
25+
_, ext = os.path.splitext(path)
26+
local_name = os.path.join(TMPDIR, hashlib.sha1(path.encode()).hexdigest() + ext)
27+
LOG.info(f"Calling ecp {path} {local_name}")
28+
subprocess.check_call(["ecp", path, local_name])
29+
return local_name

src/anemoi/datasets/create/sources/grib.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def _execute(
120120
The loaded dataset.
121121
"""
122122
given_paths = path if isinstance(path, list) else [path]
123+
123124
if flavour is not None:
124125
flavour = RuleBasedFlavour(flavour)
125126

@@ -145,6 +146,12 @@ def _execute(
145146

146147
for path in _expand(paths):
147148
context.trace("📁", "PATH", path)
149+
150+
if isinstance(path, str) and (path.startswith("ec:") or path.startswith("ectmp:")):
151+
from anemoi.datasets.create.ecfs import get_ecfs_file
152+
153+
path = get_ecfs_file(path)
154+
148155
s = from_source("file", path)
149156
if flavour is not None:
150157
s = flavour.map(s)

src/anemoi/datasets/create/sources/xarray_support/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ def load_one(
9696

9797
context.trace(emoji, dataset, options, kwargs)
9898

99+
if isinstance(dataset, str) and (dataset.startswith("ec:") or dataset.startswith("ectmp:")):
100+
from anemoi.datasets.create.ecfs import get_ecfs_file
101+
102+
dataset = get_ecfs_file(dataset)
103+
99104
if isinstance(dataset, str) and dataset.endswith(".zarr"):
100105
# If the dataset is a zarr store, we need to use the zarr engine
101106
options["engine"] = "zarr"

0 commit comments

Comments
 (0)