-
Notifications
You must be signed in to change notification settings - Fork 54
feat: Add Zarr v2 metadata access and basic auth support for OIDC-protected assets #2112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
a02c5ad
5729f6d
666e1cc
f82f17c
0052e02
2224a32
ca62273
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| from __future__ import annotations | ||
|
|
||
| import base64 | ||
| import json | ||
| import logging | ||
| import os | ||
| import re | ||
|
|
@@ -27,11 +28,15 @@ | |
|
|
||
| import orjson | ||
| import requests | ||
| from requests import RequestException | ||
| from boto3 import Session | ||
| from boto3.resources.base import ServiceResource | ||
| from requests import PreparedRequest, RequestException | ||
| from requests.auth import AuthBase | ||
| from requests.structures import CaseInsensitiveDict | ||
| from shapely import geometry | ||
| from shapely.errors import ShapelyError | ||
|
|
||
| from eodag.plugins.authentication.aws_auth import AwsAuth | ||
| from eodag.types.queryables import CommonStacMetadata | ||
| from eodag.types.stac_metadata import create_stac_metadata_model | ||
|
|
||
|
|
@@ -63,7 +68,12 @@ | |
| format_string, | ||
| get_geometry_from_various, | ||
| ) | ||
| from eodag.utils.exceptions import DownloadError, MisconfiguredError, ValidationError | ||
| from eodag.utils.exceptions import ( | ||
| DatasetCreationError, | ||
| DownloadError, | ||
| MisconfiguredError, | ||
| ValidationError, | ||
| ) | ||
| from eodag.utils.repr import dict_to_html_table | ||
|
|
||
| if TYPE_CHECKING: | ||
|
|
@@ -525,6 +535,88 @@ def stream_download( | |
| **kwargs, | ||
| ) | ||
|
|
||
| def get_storage_options( | ||
| self, | ||
| asset_key: Optional[str] = None, | ||
| ) -> dict[str, Any]: | ||
| """ | ||
| Get fsspec storage_options keyword arguments | ||
| """ | ||
| auth = self.downloader_auth.authenticate() if self.downloader_auth else None | ||
| if self.downloader is None: | ||
| return {} | ||
|
|
||
| # default url and headers | ||
| try: | ||
| url = self.assets[asset_key]["href"] if asset_key else self.location | ||
| except KeyError as e: | ||
| raise DatasetCreationError(f"{asset_key} not found in {self} assets") from e | ||
| headers = {**USER_AGENT} | ||
|
|
||
| if isinstance(auth, ServiceResource) and isinstance( | ||
| self.downloader_auth, AwsAuth | ||
| ): | ||
| auth_kwargs: dict[str, Any] = dict() | ||
| # AwsAuth | ||
| if s3_endpoint := getattr(self.downloader_auth.config, "s3_endpoint", None): | ||
| auth_kwargs["client_kwargs"] = {"endpoint_url": s3_endpoint} | ||
| if creds := cast( | ||
| Session, self.downloader_auth.s3_session | ||
| ).get_credentials(): | ||
| auth_kwargs["key"] = creds.access_key | ||
| auth_kwargs["secret"] = creds.secret_key | ||
| if creds.token: | ||
| auth_kwargs["token"] = creds.token | ||
| if requester_pays := getattr( | ||
| self.downloader_auth.config, "requester_pays", False | ||
| ): | ||
| auth_kwargs["requester_pays"] = requester_pays | ||
| else: | ||
| auth_kwargs["anon"] = True | ||
| return {"path": url, **auth_kwargs} | ||
|
|
||
| if isinstance(auth, AuthBase): | ||
| # update url and headers with auth | ||
| req = PreparedRequest() | ||
| req.url = url | ||
| req.headers = CaseInsensitiveDict(headers) | ||
| if auth: | ||
| auth(req) | ||
| return {"path": req.url, "headers": dict(req.headers)} | ||
|
|
||
| return {"path": url} | ||
|
|
||
| def request_asset( | ||
| self, | ||
| url: str, | ||
| ) -> requests.Response: | ||
| """Perform a GET request to the given URL using product's authentication headers.""" | ||
| headers = self.get_storage_options().get("headers", {}) | ||
| return requests.get(url, headers=headers, stream=True) | ||
|
Comment on lines
+589
to
+595
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should use the stream download method from EODAG download plugins instead of creating a new method. |
||
|
|
||
| def list_zarr_files_from_metadata( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In zarr, we have |
||
| self, | ||
| base_url: str, | ||
| ) -> list[str]: | ||
| """List file paths from a Zarr store metadata file.""" | ||
| import fsspec # type: ignore[import-untyped] | ||
|
|
||
| headers = self.get_storage_options().get("headers", {}) | ||
| mapper = fsspec.get_mapper( | ||
| base_url, | ||
| client_kwargs={"headers": headers, "trust_env": False}, | ||
| ) | ||
|
Comment on lines
+605
to
+608
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure about introducing fsspec in EODAG library. My opinion:
@sbrunato what do you think? |
||
|
|
||
| if ".zmetadata" in mapper: | ||
| meta = json.loads(mapper[".zmetadata"]) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
| return [".zmetadata", *meta["metadata"].keys()] | ||
|
|
||
| # TODO: Support Zarr v3 when test data becomes available. | ||
| # Zarr v2 uses `.zmetadata`, while Zarr v3 exposes `zarr.json`. | ||
| # The implementation should be straightforward once we can validate it | ||
| # against real examples. | ||
| raise ValueError(f"No Zarr metadata file found at {base_url}") | ||
|
Comment on lines
+614
to
+618
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With Destination Earth, we do have a zarr v3 store: DanubeHis data. I don't understand what is blocking here. |
||
|
|
||
| def _init_progress_bar( | ||
| self, | ||
| progress_callback: Optional[ProgressCallback], | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,6 +59,9 @@ dependencies = [ | |
| "typing_extensions >= 4.8.0", | ||
| "urllib3", | ||
| "zipstream-ng", | ||
| "fsspec", | ||
| "aiohttp", | ||
| "requests" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| ] | ||
| dynamic = ["version"] | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are not creating a xarray here. The exception class is not accurate.