Skip to content

Commit 0be9745

Browse files
[CI] Add filter_release_build_yaml to trim build steps by test needs
Add collect_needed_variants() to extract python versions, image types, and CUDA platforms from selected tests. Add filter_release_build_yaml() to rewrite build.rayci.yml in-place, removing steps for unneeded image types and trimming array dimensions (python, platform, cuda) and adjustments to only needed values. Steps left with no viable combinations are removed entirely. This is the library/logic layer — the init script integration comes in the next commit. Topic: release-build-filter Relative: array-release Labels: draft Signed-off-by: andrew <andrew@anyscale.com>
1 parent 4fbf70f commit 0be9745

File tree

2 files changed

+660
-1
lines changed

2 files changed

+660
-1
lines changed

release/ray_release/custom_byod_build_init_helper.py

Lines changed: 207 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import hashlib
22
import os
33
import re
4-
from typing import Dict, List, Optional, Tuple
4+
from typing import Dict, List, Optional, Set, Tuple
55

66
import yaml
77

@@ -215,3 +215,209 @@ def _get_step_name(image: str, step_key: str, test_names: List[str]) -> str:
215215
for test_name in test_names[:2]:
216216
step_name += f" {test_name}"
217217
return step_name
218+
219+
220+
def collect_needed_variants(
221+
tests: List[Test],
222+
) -> Tuple[Set[str], Set[str], Dict[str, Optional[Set[str]]]]:
223+
"""Collect needed build variants from selected tests.
224+
225+
Returns:
226+
needed_python: Python version strings (e.g., {"3.10", "3.12"}).
227+
needed_image_types: Image type categories:
228+
"ray-cpu", "ray-cuda", "ray-ml", "ray-llm".
229+
cuda_needs: Per-image-type CUDA platform needs. A set of full
230+
platform strings (e.g., {"cu12.3.2-cudnn9"}) means only those
231+
platforms are needed. ``None`` means keep all CUDA variants
232+
(used when the test's byod_type doesn't map to a specific
233+
platform, e.g. "gpu" for ray-ml).
234+
"""
235+
needed_python: Set[str] = set()
236+
needed_image_types: Set[str] = set()
237+
cuda_needs: Dict[str, Optional[Set[str]]] = {}
238+
239+
for test in tests:
240+
needed_python.add(test.get_python_version())
241+
tag_suffix = test.get_tag_suffix()
242+
243+
if test.use_byod_ml_image():
244+
img_type = "ray-ml"
245+
elif test.use_byod_llm_image():
246+
img_type = "ray-llm"
247+
elif tag_suffix == "cpu":
248+
needed_image_types.add("ray-cpu")
249+
continue
250+
else:
251+
img_type = "ray-cuda"
252+
253+
needed_image_types.add(img_type)
254+
platform = _SHORT_PLATFORM_MAP.get(tag_suffix)
255+
256+
if platform:
257+
if img_type not in cuda_needs or cuda_needs[img_type] is not None:
258+
cuda_needs.setdefault(img_type, set()).add(platform)
259+
else:
260+
# Can't determine specific CUDA platform; keep all variants.
261+
cuda_needs[img_type] = None
262+
263+
return needed_python, needed_image_types, cuda_needs
264+
265+
266+
def _get_step_image_type(step: dict) -> Optional[str]:
267+
"""Determine the image type category of a build step.
268+
269+
Returns one of "ray-cpu", "ray-cuda", "ray-ml", "ray-llm", or None.
270+
"""
271+
name = step.get("name", "")
272+
key = step.get("key", "")
273+
image_type = step.get("env", {}).get("IMAGE_TYPE", "")
274+
275+
if image_type == "ray-llm" or "llm" in name or "llm" in key:
276+
return "ray-llm"
277+
if image_type == "ray-ml" or "ray-ml" in name or "ml" in key:
278+
return "ray-ml"
279+
if "cpu" in name or "cpu" in key:
280+
return "ray-cpu"
281+
if "cuda" in name or "cuda" in key:
282+
return "ray-cuda"
283+
return None
284+
285+
286+
def _filter_array_dimension(
287+
values: List[str],
288+
allowed: Set[str],
289+
prefix: str = "",
290+
) -> List[str]:
291+
"""Filter an array dimension to only allowed values.
292+
293+
``prefix`` is prepended to each allowed value before comparison.
294+
For cuda arrays whose values lack the "cu" prefix, pass prefix="cu".
295+
"""
296+
return [v for v in values if f"{prefix}{v}" in allowed]
297+
298+
299+
def _global_cuda_filter(
300+
cuda_needs: Dict[str, Optional[Set[str]]],
301+
) -> Optional[Set[str]]:
302+
"""Merge per-image-type CUDA needs into one global filter.
303+
304+
Returns a set of full platform strings, or ``None`` (keep all CUDA).
305+
An empty set means no CUDA is needed at all.
306+
"""
307+
result: Set[str] = set()
308+
for platforms in cuda_needs.values():
309+
if platforms is None:
310+
return None
311+
result.update(platforms)
312+
return result
313+
314+
315+
def filter_release_build_yaml(
316+
path: str,
317+
needed_python: Set[str],
318+
needed_image_types: Set[str],
319+
cuda_needs: Dict[str, Optional[Set[str]]],
320+
*,
321+
filter_by_image_type: bool = True,
322+
) -> None:
323+
"""Filter a rayci YAML file to only include needed variants.
324+
325+
Modifies the file in-place. Array dimensions (python, platform, cuda)
326+
and adjustments are trimmed to the needed values. Steps left with no
327+
viable combinations are removed.
328+
329+
When *filter_by_image_type* is True (the default, used for
330+
``build.rayci.yml``), steps whose image type is not in
331+
*needed_image_types* are removed entirely and CUDA filtering uses
332+
per-image-type needs from *cuda_needs*.
333+
334+
When False (used for shared files like ``_images.rayci.yml`` and
335+
``_wheel-build.rayci.yml`` whose steps have cross-type dependencies),
336+
no steps are removed by image type. CUDA filtering uses a global
337+
filter — the union of all *cuda_needs* values.
338+
"""
339+
with open(path) as f:
340+
data = yaml.safe_load(f)
341+
342+
global_cuda = _global_cuda_filter(cuda_needs) if not filter_by_image_type else None
343+
344+
filtered_steps = []
345+
for step in data.get("steps", []):
346+
img_type = _get_step_image_type(step)
347+
348+
if filter_by_image_type:
349+
if img_type is not None and img_type not in needed_image_types:
350+
continue
351+
352+
array = step.get("array")
353+
if not array:
354+
filtered_steps.append(step)
355+
continue
356+
357+
# Filter python dimension.
358+
if "python" in array:
359+
array["python"] = [v for v in array["python"] if v in needed_python]
360+
361+
# Determine which CUDA platforms to keep.
362+
if filter_by_image_type:
363+
cuda_filter = cuda_needs.get(img_type) if img_type else None
364+
should_filter_cuda = img_type in cuda_needs and cuda_filter is not None
365+
else:
366+
cuda_filter = global_cuda
367+
should_filter_cuda = global_cuda is not None
368+
369+
if should_filter_cuda:
370+
if "platform" in array:
371+
array["platform"] = [v for v in array["platform"] if v in cuda_filter]
372+
if "cuda" in array:
373+
# cuda array values don't have the "cu" prefix.
374+
cuda_no_prefix = {p.removeprefix("cu") for p in cuda_filter}
375+
array["cuda"] = [v for v in array["cuda"] if v in cuda_no_prefix]
376+
377+
# Filter adjustments.
378+
if "adjustments" in array:
379+
filtered_adj = []
380+
for adj in array["adjustments"]:
381+
w = adj.get("with", {})
382+
keep = True
383+
if "python" in w and w["python"] not in needed_python:
384+
keep = False
385+
if should_filter_cuda:
386+
if "platform" in w and w["platform"] not in cuda_filter:
387+
keep = False
388+
if "cuda" in w:
389+
cuda_no_prefix = {p.removeprefix("cu") for p in cuda_filter}
390+
if w["cuda"] not in cuda_no_prefix:
391+
keep = False
392+
if keep:
393+
filtered_adj.append(adj)
394+
if filtered_adj:
395+
array["adjustments"] = filtered_adj
396+
elif "adjustments" in array:
397+
del array["adjustments"]
398+
399+
# If base arrays were emptied but adjustments remain, reconstruct
400+
# minimal base arrays from adjustment values so rayci can expand them.
401+
remaining_adj = array.get("adjustments", [])
402+
for dim in list(array.keys()):
403+
if dim == "adjustments" or not isinstance(array[dim], list):
404+
continue
405+
if len(array[dim]) == 0 and remaining_adj:
406+
values = sorted(
407+
{a["with"][dim] for a in remaining_adj if dim in a.get("with", {})}
408+
)
409+
array[dim] = values
410+
411+
# Check if any combinations remain.
412+
base_dims = [
413+
v for k, v in array.items() if k != "adjustments" and isinstance(v, list)
414+
]
415+
has_base_combos = all(len(d) > 0 for d in base_dims) if base_dims else False
416+
has_adjustments = bool(array.get("adjustments"))
417+
418+
if has_base_combos or has_adjustments:
419+
filtered_steps.append(step)
420+
421+
data["steps"] = filtered_steps
422+
with open(path, "w") as f:
423+
yaml.dump(data, f, default_flow_style=False, sort_keys=False)

0 commit comments

Comments
 (0)