|
1 | 1 | import hashlib |
2 | 2 | import os |
3 | 3 | import re |
4 | | -from typing import Dict, List, Optional, Tuple |
| 4 | +from typing import Dict, List, Optional, Set, Tuple |
5 | 5 |
|
6 | 6 | import yaml |
7 | 7 |
|
@@ -215,3 +215,209 @@ def _get_step_name(image: str, step_key: str, test_names: List[str]) -> str: |
215 | 215 | for test_name in test_names[:2]: |
216 | 216 | step_name += f" {test_name}" |
217 | 217 | return step_name |
| 218 | + |
| 219 | + |
| 220 | +def collect_needed_variants( |
| 221 | + tests: List[Test], |
| 222 | +) -> Tuple[Set[str], Set[str], Dict[str, Optional[Set[str]]]]: |
| 223 | + """Collect needed build variants from selected tests. |
| 224 | +
|
| 225 | + Returns: |
| 226 | + needed_python: Python version strings (e.g., {"3.10", "3.12"}). |
| 227 | + needed_image_types: Image type categories: |
| 228 | + "ray-cpu", "ray-cuda", "ray-ml", "ray-llm". |
| 229 | + cuda_needs: Per-image-type CUDA platform needs. A set of full |
| 230 | + platform strings (e.g., {"cu12.3.2-cudnn9"}) means only those |
| 231 | + platforms are needed. ``None`` means keep all CUDA variants |
| 232 | + (used when the test's byod_type doesn't map to a specific |
| 233 | + platform, e.g. "gpu" for ray-ml). |
| 234 | + """ |
| 235 | + needed_python: Set[str] = set() |
| 236 | + needed_image_types: Set[str] = set() |
| 237 | + cuda_needs: Dict[str, Optional[Set[str]]] = {} |
| 238 | + |
| 239 | + for test in tests: |
| 240 | + needed_python.add(test.get_python_version()) |
| 241 | + tag_suffix = test.get_tag_suffix() |
| 242 | + |
| 243 | + if test.use_byod_ml_image(): |
| 244 | + img_type = "ray-ml" |
| 245 | + elif test.use_byod_llm_image(): |
| 246 | + img_type = "ray-llm" |
| 247 | + elif tag_suffix == "cpu": |
| 248 | + needed_image_types.add("ray-cpu") |
| 249 | + continue |
| 250 | + else: |
| 251 | + img_type = "ray-cuda" |
| 252 | + |
| 253 | + needed_image_types.add(img_type) |
| 254 | + platform = _SHORT_PLATFORM_MAP.get(tag_suffix) |
| 255 | + |
| 256 | + if platform: |
| 257 | + if img_type not in cuda_needs or cuda_needs[img_type] is not None: |
| 258 | + cuda_needs.setdefault(img_type, set()).add(platform) |
| 259 | + else: |
| 260 | + # Can't determine specific CUDA platform; keep all variants. |
| 261 | + cuda_needs[img_type] = None |
| 262 | + |
| 263 | + return needed_python, needed_image_types, cuda_needs |
| 264 | + |
| 265 | + |
| 266 | +def _get_step_image_type(step: dict) -> Optional[str]: |
| 267 | + """Determine the image type category of a build step. |
| 268 | +
|
| 269 | + Returns one of "ray-cpu", "ray-cuda", "ray-ml", "ray-llm", or None. |
| 270 | + """ |
| 271 | + name = step.get("name", "") |
| 272 | + key = step.get("key", "") |
| 273 | + image_type = step.get("env", {}).get("IMAGE_TYPE", "") |
| 274 | + |
| 275 | + if image_type == "ray-llm" or "llm" in name or "llm" in key: |
| 276 | + return "ray-llm" |
| 277 | + if image_type == "ray-ml" or "ray-ml" in name or "ml" in key: |
| 278 | + return "ray-ml" |
| 279 | + if "cpu" in name or "cpu" in key: |
| 280 | + return "ray-cpu" |
| 281 | + if "cuda" in name or "cuda" in key: |
| 282 | + return "ray-cuda" |
| 283 | + return None |
| 284 | + |
| 285 | + |
| 286 | +def _filter_array_dimension( |
| 287 | + values: List[str], |
| 288 | + allowed: Set[str], |
| 289 | + prefix: str = "", |
| 290 | +) -> List[str]: |
| 291 | + """Filter an array dimension to only allowed values. |
| 292 | +
|
| 293 | + ``prefix`` is prepended to each allowed value before comparison. |
| 294 | + For cuda arrays whose values lack the "cu" prefix, pass prefix="cu". |
| 295 | + """ |
| 296 | + return [v for v in values if f"{prefix}{v}" in allowed] |
| 297 | + |
| 298 | + |
| 299 | +def _global_cuda_filter( |
| 300 | + cuda_needs: Dict[str, Optional[Set[str]]], |
| 301 | +) -> Optional[Set[str]]: |
| 302 | + """Merge per-image-type CUDA needs into one global filter. |
| 303 | +
|
| 304 | + Returns a set of full platform strings, or ``None`` (keep all CUDA). |
| 305 | + An empty set means no CUDA is needed at all. |
| 306 | + """ |
| 307 | + result: Set[str] = set() |
| 308 | + for platforms in cuda_needs.values(): |
| 309 | + if platforms is None: |
| 310 | + return None |
| 311 | + result.update(platforms) |
| 312 | + return result |
| 313 | + |
| 314 | + |
| 315 | +def filter_release_build_yaml( |
| 316 | + path: str, |
| 317 | + needed_python: Set[str], |
| 318 | + needed_image_types: Set[str], |
| 319 | + cuda_needs: Dict[str, Optional[Set[str]]], |
| 320 | + *, |
| 321 | + filter_by_image_type: bool = True, |
| 322 | +) -> None: |
| 323 | + """Filter a rayci YAML file to only include needed variants. |
| 324 | +
|
| 325 | + Modifies the file in-place. Array dimensions (python, platform, cuda) |
| 326 | + and adjustments are trimmed to the needed values. Steps left with no |
| 327 | + viable combinations are removed. |
| 328 | +
|
| 329 | + When *filter_by_image_type* is True (the default, used for |
| 330 | + ``build.rayci.yml``), steps whose image type is not in |
| 331 | + *needed_image_types* are removed entirely and CUDA filtering uses |
| 332 | + per-image-type needs from *cuda_needs*. |
| 333 | +
|
| 334 | + When False (used for shared files like ``_images.rayci.yml`` and |
| 335 | + ``_wheel-build.rayci.yml`` whose steps have cross-type dependencies), |
| 336 | + no steps are removed by image type. CUDA filtering uses a global |
| 337 | + filter — the union of all *cuda_needs* values. |
| 338 | + """ |
| 339 | + with open(path) as f: |
| 340 | + data = yaml.safe_load(f) |
| 341 | + |
| 342 | + global_cuda = _global_cuda_filter(cuda_needs) if not filter_by_image_type else None |
| 343 | + |
| 344 | + filtered_steps = [] |
| 345 | + for step in data.get("steps", []): |
| 346 | + img_type = _get_step_image_type(step) |
| 347 | + |
| 348 | + if filter_by_image_type: |
| 349 | + if img_type is not None and img_type not in needed_image_types: |
| 350 | + continue |
| 351 | + |
| 352 | + array = step.get("array") |
| 353 | + if not array: |
| 354 | + filtered_steps.append(step) |
| 355 | + continue |
| 356 | + |
| 357 | + # Filter python dimension. |
| 358 | + if "python" in array: |
| 359 | + array["python"] = [v for v in array["python"] if v in needed_python] |
| 360 | + |
| 361 | + # Determine which CUDA platforms to keep. |
| 362 | + if filter_by_image_type: |
| 363 | + cuda_filter = cuda_needs.get(img_type) if img_type else None |
| 364 | + should_filter_cuda = img_type in cuda_needs and cuda_filter is not None |
| 365 | + else: |
| 366 | + cuda_filter = global_cuda |
| 367 | + should_filter_cuda = global_cuda is not None |
| 368 | + |
| 369 | + if should_filter_cuda: |
| 370 | + if "platform" in array: |
| 371 | + array["platform"] = [v for v in array["platform"] if v in cuda_filter] |
| 372 | + if "cuda" in array: |
| 373 | + # cuda array values don't have the "cu" prefix. |
| 374 | + cuda_no_prefix = {p.removeprefix("cu") for p in cuda_filter} |
| 375 | + array["cuda"] = [v for v in array["cuda"] if v in cuda_no_prefix] |
| 376 | + |
| 377 | + # Filter adjustments. |
| 378 | + if "adjustments" in array: |
| 379 | + filtered_adj = [] |
| 380 | + for adj in array["adjustments"]: |
| 381 | + w = adj.get("with", {}) |
| 382 | + keep = True |
| 383 | + if "python" in w and w["python"] not in needed_python: |
| 384 | + keep = False |
| 385 | + if should_filter_cuda: |
| 386 | + if "platform" in w and w["platform"] not in cuda_filter: |
| 387 | + keep = False |
| 388 | + if "cuda" in w: |
| 389 | + cuda_no_prefix = {p.removeprefix("cu") for p in cuda_filter} |
| 390 | + if w["cuda"] not in cuda_no_prefix: |
| 391 | + keep = False |
| 392 | + if keep: |
| 393 | + filtered_adj.append(adj) |
| 394 | + if filtered_adj: |
| 395 | + array["adjustments"] = filtered_adj |
| 396 | + elif "adjustments" in array: |
| 397 | + del array["adjustments"] |
| 398 | + |
| 399 | + # If base arrays were emptied but adjustments remain, reconstruct |
| 400 | + # minimal base arrays from adjustment values so rayci can expand them. |
| 401 | + remaining_adj = array.get("adjustments", []) |
| 402 | + for dim in list(array.keys()): |
| 403 | + if dim == "adjustments" or not isinstance(array[dim], list): |
| 404 | + continue |
| 405 | + if len(array[dim]) == 0 and remaining_adj: |
| 406 | + values = sorted( |
| 407 | + {a["with"][dim] for a in remaining_adj if dim in a.get("with", {})} |
| 408 | + ) |
| 409 | + array[dim] = values |
| 410 | + |
| 411 | + # Check if any combinations remain. |
| 412 | + base_dims = [ |
| 413 | + v for k, v in array.items() if k != "adjustments" and isinstance(v, list) |
| 414 | + ] |
| 415 | + has_base_combos = all(len(d) > 0 for d in base_dims) if base_dims else False |
| 416 | + has_adjustments = bool(array.get("adjustments")) |
| 417 | + |
| 418 | + if has_base_combos or has_adjustments: |
| 419 | + filtered_steps.append(step) |
| 420 | + |
| 421 | + data["steps"] = filtered_steps |
| 422 | + with open(path, "w") as f: |
| 423 | + yaml.dump(data, f, default_flow_style=False, sort_keys=False) |
0 commit comments