Skip to content

Commit 537411d

Browse files
🐛 fix url for glad dataset (#577)
1 parent 34dac75 commit 537411d

File tree

3 files changed

+10
-7
lines changed

3 files changed

+10
-7
lines changed

clouddrift/adapters/glad.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
def get_dataframe() -> pd.DataFrame:
2727
"""Get the GLAD dataset as a pandas DataFrame."""
28-
url = "https://data.gulfresearchinitiative.org/pelagos-symfony/api/file/download/169841"
28+
url = "https://data.gulfresearchinitiative.org/api/file/download/169841"
2929
# GRIIDC server doesn't provide Content-Length header, so we'll hardcode
3030
# the expected data length here.
3131
file_size = 155330876

clouddrift/adapters/utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
from tqdm import tqdm
2424

2525
_DISABLE_SHOW_PROGRESS = False # purely to de-noise our test suite output, should never be used/configured outside of that.
26+
_BROWSER_HEADERS = {
27+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
28+
}
2629

2730

2831
def _before_call(rcs: RetryCallState):
@@ -147,7 +150,7 @@ def _download_with_progress(
147150

148151
# Get last modified time of the remote file
149152
try:
150-
res = requests.head(url, timeout=5)
153+
res = requests.head(url, headers=_BROWSER_HEADERS, timeout=10)
151154
remote_last_modified_str = res.headers.get("Last-Modified")
152155
if remote_last_modified_str:
153156
remote_last_modified = datetime.strptime(
@@ -173,7 +176,7 @@ def _download_with_progress(
173176
buffer: BufferedWriter | BufferedIOBase | None = None
174177

175178
try:
176-
resp = requests.get(url, timeout=10, stream=True)
179+
resp = requests.get(url, headers=_BROWSER_HEADERS, timeout=60, stream=True)
177180
temp_output = f"{output}.part" if isinstance(output, str) else None
178181

179182
if isinstance(output, str) and temp_output is not None:

tests/adapters/utils_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def test_download_new_update_buffer(self):
117117

118118
# Assertions to verify the behavior
119119
self.requests_mock.get.assert_called_with(
120-
"some.url.com", timeout=10, stream=True
120+
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
121121
)
122122
buffer.write.assert_has_calls(
123123
[call(b"a"), call(b"b"), call(b"c"), call(b"d")]
@@ -153,7 +153,7 @@ def test_download_new_update_file(self):
153153

154154
# Assertions to verify the behavior
155155
self.requests_mock.get.assert_called_with(
156-
"some.url.com", timeout=10, stream=True
156+
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
157157
)
158158
self.open_mock.assert_called_with(output_file + ".part", "wb")
159159
handle = self.open_mock()
@@ -255,7 +255,7 @@ def test_progress_mechanism_enabled_file(self):
255255
tqdm_mock.assert_called_once()
256256
self.bar_mock.update.assert_has_calls([call(1), call(1), call(1), call(1)])
257257
self.requests_mock.get.assert_called_with(
258-
"some.url.com", timeout=10, stream=True
258+
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
259259
)
260260
self.open_mock.assert_called_with(output_file + ".part", "wb")
261261

@@ -285,7 +285,7 @@ def test_progress_mechanism_disabled_file(self):
285285
tqdm_mock.assert_not_called()
286286
self.bar_mock.update.assert_not_called()
287287
self.requests_mock.get.assert_called_with(
288-
"some.url.com", timeout=10, stream=True
288+
"some.url.com", headers=utils._BROWSER_HEADERS, timeout=60, stream=True
289289
)
290290
self.open_mock.assert_called_with(output_file + ".part", "wb")
291291

0 commit comments

Comments
 (0)