Skip to content
2 changes: 1 addition & 1 deletion clouddrift/adapters/glad.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

def get_dataframe() -> pd.DataFrame:
"""Get the GLAD dataset as a pandas DataFrame."""
url = "https://data.gulfresearchinitiative.org/pelagos-symfony/api/file/download/169841"
url = "https://data.gulfresearchinitiative.org/api/file/download/169841"
# GRIIDC server doesn't provide Content-Length header, so we'll hardcode
# the expected data length here.
file_size = 155330876
Expand Down
7 changes: 5 additions & 2 deletions clouddrift/adapters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from tqdm import tqdm

_DISABLE_SHOW_PROGRESS = False # purely to de-noise our test suite output, should never be used/configured outside of that.
_BROWSER_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
Comment on lines +26 to +27
Copy link

Copilot AI Aug 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The hardcoded User-Agent string may become outdated over time. Consider using a more generic or library-generated User-Agent, or add a comment explaining why this specific version is required.

Suggested change
_BROWSER_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
"User-Agent": requests.utils.default_user_agent()

Copilot uses AI. Check for mistakes.
}


def _before_call(rcs: RetryCallState):
Expand Down Expand Up @@ -147,7 +150,7 @@ def _download_with_progress(

# Get last modified time of the remote file
try:
res = requests.head(url, timeout=5)
res = requests.head(url, headers=_BROWSER_HEADERS, timeout=10)
remote_last_modified_str = res.headers.get("Last-Modified")
if remote_last_modified_str:
remote_last_modified = datetime.strptime(
Expand All @@ -173,7 +176,7 @@ def _download_with_progress(
buffer: BufferedWriter | BufferedIOBase | None = None

try:
resp = requests.get(url, timeout=10, stream=True)
resp = requests.get(url, headers=_BROWSER_HEADERS, timeout=60, stream=True)
temp_output = f"{output}.part" if isinstance(output, str) else None

if isinstance(output, str) and temp_output is not None:
Expand Down
8 changes: 4 additions & 4 deletions tests/adapters/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_download_new_update_buffer(self):

# Assertions to verify the behavior
self.requests_mock.get.assert_called_with(
"some.url.com", timeout=10, stream=True
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
)
buffer.write.assert_has_calls(
[call(b"a"), call(b"b"), call(b"c"), call(b"d")]
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_download_new_update_file(self):

# Assertions to verify the behavior
self.requests_mock.get.assert_called_with(
"some.url.com", timeout=10, stream=True
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
)
self.open_mock.assert_called_with(output_file + ".part", "wb")
handle = self.open_mock()
Expand Down Expand Up @@ -255,7 +255,7 @@ def test_progress_mechanism_enabled_file(self):
tqdm_mock.assert_called_once()
self.bar_mock.update.assert_has_calls([call(1), call(1), call(1), call(1)])
self.requests_mock.get.assert_called_with(
"some.url.com", timeout=10, stream=True
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
)
self.open_mock.assert_called_with(output_file + ".part", "wb")

Expand Down Expand Up @@ -285,7 +285,7 @@ def test_progress_mechanism_disabled_file(self):
tqdm_mock.assert_not_called()
self.bar_mock.update.assert_not_called()
self.requests_mock.get.assert_called_with(
"some.url.com", timeout=10, stream=True
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
)
self.open_mock.assert_called_with(output_file + ".part", "wb")

Expand Down
Loading