ray-project
diff --git a/‎release/ray_release/custom_byod_build_init_helper.py‎
Lines changed: 207 additions & 1 deletion b/‎release/ray_release/custom_byod_build_init_helper.py‎
Lines changed: 207 additions & 1 deletion
@@ -1,7 +1,7 @@
 import hashlib
 import os
 import re
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Set, Tuple
 
 import yaml
 
@@ -215,3 +215,209 @@ def _get_step_name(image: str, step_key: str, test_names: List[str]) -> str:
     for test_name in test_names[:2]:
         step_name += f" {test_name}"
     return step_name
+
+
+def collect_needed_variants(
+    tests: List[Test],
+) -> Tuple[Set[str], Set[str], Dict[str, Optional[Set[str]]]]:
+    """Collect needed build variants from selected tests.
+
+    Returns:
+        needed_python: Python version strings (e.g., {"3.10", "3.12"}).
+        needed_image_types: Image type categories:
+            "ray-cpu", "ray-cuda", "ray-ml", "ray-llm".
+        cuda_needs: Per-image-type CUDA platform needs.  A set of full
+            platform strings (e.g., {"cu12.3.2-cudnn9"}) means only those
+            platforms are needed.  ``None`` means keep all CUDA variants
+            (used when the test's byod_type doesn't map to a specific
+            platform, e.g. "gpu" for ray-ml).
+    """
+    needed_python: Set[str] = set()
+    needed_image_types: Set[str] = set()
+    cuda_needs: Dict[str, Optional[Set[str]]] = {}
+
+    for test in tests:
+        needed_python.add(test.get_python_version())
+        tag_suffix = test.get_tag_suffix()
+
+        if test.use_byod_ml_image():
+            img_type = "ray-ml"
+        elif test.use_byod_llm_image():
+            img_type = "ray-llm"
+        elif tag_suffix == "cpu":
+            needed_image_types.add("ray-cpu")
+            continue
+        else:
+            img_type = "ray-cuda"
+
+        needed_image_types.add(img_type)
+        platform = _SHORT_PLATFORM_MAP.get(tag_suffix)
+
+        if platform:
+            if img_type not in cuda_needs or cuda_needs[img_type] is not None:
+                cuda_needs.setdefault(img_type, set()).add(platform)
+        else:
+            # Can't determine specific CUDA platform; keep all variants.
+            cuda_needs[img_type] = None
+
+    return needed_python, needed_image_types, cuda_needs
+
+
+def _get_step_image_type(step: dict) -> Optional[str]:
+    """Determine the image type category of a build step.
+
+    Returns one of "ray-cpu", "ray-cuda", "ray-ml", "ray-llm", or None.
+    """
+    name = step.get("name", "")
+    key = step.get("key", "")
+    image_type = step.get("env", {}).get("IMAGE_TYPE", "")
+
+    if image_type == "ray-llm" or "llm" in name or "llm" in key:
+        return "ray-llm"
+    if image_type == "ray-ml" or "ray-ml" in name or "ml" in key:
+        return "ray-ml"
+    if "cpu" in name or "cpu" in key:
+        return "ray-cpu"
+    if "cuda" in name or "cuda" in key:
+        return "ray-cuda"
+    return None
+
+
+def _filter_array_dimension(
+    values: List[str],
+    allowed: Set[str],
+    prefix: str = "",
+) -> List[str]:
+    """Filter an array dimension to only allowed values.
+
+    ``prefix`` is prepended to each allowed value before comparison.
+    For cuda arrays whose values lack the "cu" prefix, pass prefix="cu".
+    """
+    return [v for v in values if f"{prefix}{v}" in allowed]
+
+
+def _global_cuda_filter(
+    cuda_needs: Dict[str, Optional[Set[str]]],
+) -> Optional[Set[str]]:
+    """Merge per-image-type CUDA needs into one global filter.
+
+    Returns a set of full platform strings, or ``None`` (keep all CUDA).
+    An empty set means no CUDA is needed at all.
+    """
+    result: Set[str] = set()
+    for platforms in cuda_needs.values():
+        if platforms is None:
+            return None
+        result.update(platforms)
+    return result
+
+
+def filter_release_build_yaml(
+    path: str,
+    needed_python: Set[str],
+    needed_image_types: Set[str],
+    cuda_needs: Dict[str, Optional[Set[str]]],
+    *,
+    filter_by_image_type: bool = True,
+) -> None:
+    """Filter a rayci YAML file to only include needed variants.
+
+    Modifies the file in-place.  Array dimensions (python, platform, cuda)
+    and adjustments are trimmed to the needed values.  Steps left with no
+    viable combinations are removed.
+
+    When *filter_by_image_type* is True (the default, used for
+    ``build.rayci.yml``), steps whose image type is not in
+    *needed_image_types* are removed entirely and CUDA filtering uses
+    per-image-type needs from *cuda_needs*.
+
+    When False (used for shared files like ``_images.rayci.yml`` and
+    ``_wheel-build.rayci.yml`` whose steps have cross-type dependencies),
+    no steps are removed by image type.  CUDA filtering uses a global
+    filter — the union of all *cuda_needs* values.
+    """
+    with open(path) as f:
+        data = yaml.safe_load(f)
+
+    global_cuda = _global_cuda_filter(cuda_needs) if not filter_by_image_type else None
+
+    filtered_steps = []
+    for step in data.get("steps", []):
+        img_type = _get_step_image_type(step)
+
+        if filter_by_image_type:
+            if img_type is not None and img_type not in needed_image_types:
+                continue
+
+        array = step.get("array")
+        if not array:
+            filtered_steps.append(step)
+            continue
+
+        # Filter python dimension.
+        if "python" in array:
+            array["python"] = [v for v in array["python"] if v in needed_python]
+
+        # Determine which CUDA platforms to keep.
+        if filter_by_image_type:
+            cuda_filter = cuda_needs.get(img_type) if img_type else None
+            should_filter_cuda = img_type in cuda_needs and cuda_filter is not None
+        else:
+            cuda_filter = global_cuda
+            should_filter_cuda = global_cuda is not None
+
+        if should_filter_cuda:
+            if "platform" in array:
+                array["platform"] = [v for v in array["platform"] if v in cuda_filter]
+            if "cuda" in array:
+                # cuda array values don't have the "cu" prefix.
+                cuda_no_prefix = {p.removeprefix("cu") for p in cuda_filter}
+                array["cuda"] = [v for v in array["cuda"] if v in cuda_no_prefix]
+
+        # Filter adjustments.
+        if "adjustments" in array:
+            filtered_adj = []
+            for adj in array["adjustments"]:
+                w = adj.get("with", {})
+                keep = True
+                if "python" in w and w["python"] not in needed_python:
+                    keep = False
+                if should_filter_cuda:
+                    if "platform" in w and w["platform"] not in cuda_filter:
+                        keep = False
+                    if "cuda" in w:
+                        cuda_no_prefix = {p.removeprefix("cu") for p in cuda_filter}
+                        if w["cuda"] not in cuda_no_prefix:
+                            keep = False
+                if keep:
+                    filtered_adj.append(adj)
+            if filtered_adj:
+                array["adjustments"] = filtered_adj
+            elif "adjustments" in array:
+                del array["adjustments"]
+
+        # If base arrays were emptied but adjustments remain, reconstruct
+        # minimal base arrays from adjustment values so rayci can expand them.
+        remaining_adj = array.get("adjustments", [])
+        for dim in list(array.keys()):
+            if dim == "adjustments" or not isinstance(array[dim], list):
+                continue
+            if len(array[dim]) == 0 and remaining_adj:
+                values = sorted(
+                    {a["with"][dim] for a in remaining_adj if dim in a.get("with", {})}
+                )
+                array[dim] = values
+
+        # Check if any combinations remain.
+        base_dims = [
+            v for k, v in array.items() if k != "adjustments" and isinstance(v, list)
+        ]
+        has_base_combos = all(len(d) > 0 for d in base_dims) if base_dims else False
+        has_adjustments = bool(array.get("adjustments"))
+
+        if has_base_combos or has_adjustments:
+            filtered_steps.append(step)
+
+    data["steps"] = filtered_steps
+    with open(path, "w") as f:
+        yaml.dump(data, f, default_flow_style=False, sort_keys=False)