fix(p0-7): run_validation default-safe output + symlink-resistant containment (#433)

0xmariowu · web-flow · commit 872de347733a · 2026-04-26T13:18:36.000Z
* test(run-validation): add failing scaffold for default-no-delete output (F009 S1)

* fix(run-validation): default output uses run-&lt;ts&gt; subdir, no delete (F009 S2)

* feat(run-validation): add --clean-output opt-in for delete (F009 S3)

* fix(run-validation): containment check restricts --clean-output to repo reports/ (F009 S4)

* fix(run-validation): double-resolve symlinks in containment check (F009 S5)

* style(run-validation): format output guard tests (F009)

* fix(test-e2b-output): mark E2B stub modules as packages with __path__

* fix(papers): align per-source-timeout source loop with F006a delegate refactor
diff --git a/autosearch/skills/channels/papers/methods/via_paper_search.py b/autosearch/skills/channels/papers/methods/via_paper_search.py
@@ -3,6 +3,7 @@
 
 import asyncio
 import os
+from concurrent.futures import ThreadPoolExecutor
 from datetime import UTC, datetime
 
 import structlog
@@ -41,14 +42,31 @@ async def search(
         return []
     fetched_at = datetime.now(UTC)
     source_items = list(active_sources.items())
-    tasks = [
-        asyncio.wait_for(
-            asyncio.to_thread(_search_source, searcher_cls, query.text, max_results_per_source),
-            timeout=per_source_timeout_seconds,
-        )
-        for _, searcher_cls in source_items
-    ]
-    results = await asyncio.gather(*tasks, return_exceptions=True)
+    if not source_items:
+        return []
+
+    loop = asyncio.get_running_loop()
+    executor = ThreadPoolExecutor(
+        max_workers=len(source_items),
+        thread_name_prefix="papers-source",
+    )
+    try:
+        tasks = [
+            asyncio.wait_for(
+                loop.run_in_executor(
+                    executor,
+                    _search_source,
+                    searcher_cls,
+                    query.text,
+                    max_results_per_source,
+                ),
+                timeout=per_source_timeout_seconds,
+            )
+            for _, searcher_cls in source_items
+        ]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+    finally:
+        executor.shutdown(wait=False, cancel_futures=True)
 
     evidence_by_source: dict[str, list[Evidence]] = {}
     for (source_name, _), result in zip(source_items, results, strict=True):
diff --git a/scripts/e2b/run_validation.py b/scripts/e2b/run_validation.py
@@ -46,6 +46,11 @@ def parse_args() -> argparse.Namespace:
         "--source-dir", help="Pack this directory to a temp tarball and use it as --tarball"
     )
     parser.add_argument("--phase", help="Optional CSV filter for phases to run")
+    parser.add_argument(
+        "--clean-output",
+        action="store_true",
+        help="Delete the base output directory before creating this run's report directory",
+    )
     return parser.parse_args()
 
 
@@ -389,23 +394,50 @@ def execute_phase(
     return phase_summary
 
 
+def get_reports_root() -> Path:
+    return Path(__file__).resolve().parents[2] / "reports"
+
+
+def _path_is_inside(child: Path, parent: Path) -> bool:
+    return child != parent and child.is_relative_to(parent)
+
+
 def clean_output_dir(output_dir: Path, console: Console) -> None:
-    resolved = output_dir.expanduser().resolve()
-    dangerous_paths = {
-        Path("/"),
-        Path.home().resolve(),
-        Path.home().resolve().parent,
-        Path.cwd().resolve(),
-    }
-    if resolved in dangerous_paths:
-        raise ValueError(f"Refusing to wipe dangerous path: {resolved}")
-    if len(resolved.parts) < 3:
-        raise ValueError(f"Output dir must be at least 2 levels deep: {resolved}")
+    reports_root = get_reports_root().expanduser().resolve()
+    clean_target = output_dir.expanduser().resolve()
+    if not _path_is_inside(clean_target, reports_root):
+        raise ValueError(
+            f"Refusing to clean output outside repo reports/: {clean_target} "
+            f"is not inside {reports_root}"
+        )
 
-    if output_dir.exists():
-        console.print(f"[yellow]WARN[/] wiping existing output directory {resolved}")
-        shutil.rmtree(resolved)
-    resolved.mkdir(parents=True, exist_ok=True)
+    if clean_target.exists():
+        console.print(f"[yellow]WARN[/] wiping existing output directory {clean_target}")
+        shutil.rmtree(clean_target)
+    clean_target.mkdir(parents=True, exist_ok=True)
+
+
+def create_run_output_dir(
+    base_output_dir: Path,
+    *,
+    clean_output: bool,
+    console: Console,
+) -> Path:
+    if clean_output:
+        clean_output_dir(base_output_dir, console)
+    else:
+        base_output_dir.mkdir(parents=True, exist_ok=True)
+
+    for _attempt in range(100):
+        timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ")
+        run_output_dir = base_output_dir / f"run-{timestamp}"
+        try:
+            run_output_dir.mkdir()
+        except FileExistsError:
+            time.sleep(0.001)
+            continue
+        return run_output_dir
+    raise ValueError(f"Unable to create a unique run output directory under {base_output_dir}")
 
 
 def select_phases(all_phases: list[PhaseSpec], phase_filter: str | None) -> list[PhaseSpec]:
@@ -456,7 +488,7 @@ def main() -> int:
         args = parse_args()
         matrix_path = Path(args.matrix).expanduser().resolve()
         secrets_path = Path(args.secrets).expanduser()
-        output_dir = Path(args.output).expanduser()
+        base_output_dir = Path(args.output).expanduser()
         tarball = Path(args.tarball).expanduser().resolve() if args.tarball else None
         source_dir = Path(args.source_dir).expanduser().resolve() if args.source_dir else None
 
@@ -471,7 +503,11 @@ def main() -> int:
         if "E2B_API_KEY" in secrets and "E2B_API_KEY" not in os.environ:
             os.environ["E2B_API_KEY"] = secrets["E2B_API_KEY"]
 
-        clean_output_dir(output_dir, stderr_console)
+        output_dir = create_run_output_dir(
+            base_output_dir,
+            clean_output=args.clean_output,
+            console=stderr_console,
+        )
 
         started_at = datetime.now(timezone.utc).isoformat()
         with Progress(
diff --git a/tests/scripts/test_e2b_run_validation_output.py b/tests/scripts/test_e2b_run_validation_output.py
@@ -0,0 +1,228 @@
+from __future__ import annotations
+
+import importlib
+import sys
+from pathlib import Path
+from types import ModuleType, SimpleNamespace
+
+
+ROOT = Path(__file__).resolve().parents[2]
+E2B_SCRIPT_DIR = ROOT / "scripts" / "e2b"
+
+
+def _install_e2b_stubs(monkeypatch) -> None:
+    e2b_module = ModuleType("e2b")
+    sandbox_module = ModuleType("e2b.sandbox")
+    commands_module = ModuleType("e2b.sandbox.commands")
+    command_handle_module = ModuleType("e2b.sandbox.commands.command_handle")
+    interpreter_module = ModuleType("e2b_code_interpreter")
+    e2b_module.__path__ = []
+    sandbox_module.__path__ = []
+    commands_module.__path__ = []
+
+    class CommandExitException(Exception):
+        exit_code = 1
+        stdout = ""
+        stderr = ""
+        error = "stubbed command failure"
+
+    class Sandbox:
+        @classmethod
+        def create(cls, **_kwargs):
+            raise AssertionError("Sandbox.create should be mocked by these tests")
+
+    command_handle_module.CommandExitException = CommandExitException
+    interpreter_module.Sandbox = Sandbox
+
+    monkeypatch.setitem(sys.modules, "e2b", e2b_module)
+    monkeypatch.setitem(sys.modules, "e2b.sandbox", sandbox_module)
+    monkeypatch.setitem(sys.modules, "e2b.sandbox.commands", commands_module)
+    monkeypatch.setitem(
+        sys.modules,
+        "e2b.sandbox.commands.command_handle",
+        command_handle_module,
+    )
+    monkeypatch.setitem(sys.modules, "e2b_code_interpreter", interpreter_module)
+
+
+def test_e2b_stubs_support_nested_command_handle_import(monkeypatch) -> None:
+    _install_e2b_stubs(monkeypatch)
+
+    command_handle = importlib.import_module("e2b.sandbox.commands.command_handle")
+
+    assert sys.modules["e2b"].__path__ == []
+    assert sys.modules["e2b.sandbox"].__path__ == []
+    assert sys.modules["e2b.sandbox.commands"].__path__ == []
+    assert command_handle.CommandExitException.__name__ == "CommandExitException"
+
+
+def _load_run_validation(monkeypatch):
+    _install_e2b_stubs(monkeypatch)
+    monkeypatch.syspath_prepend(str(E2B_SCRIPT_DIR))
+    sys.modules.pop("run_validation", None)
+    return importlib.import_module("run_validation")
+
+
+def _patch_successful_run(monkeypatch, run_validation):
+    phase = run_validation.PhaseSpec(
+        id="smoke",
+        timeout=1,
+        parallel=1,
+        tasks=[run_validation.TaskSpec(id="noop", cmd="true")],
+    )
+    output_dirs: list[Path] = []
+
+    def fake_execute_phase(**kwargs):
+        output_dirs.append(kwargs["output_dir"])
+        return {
+            "phase": kwargs["phase"].id,
+            "parallel": 1,
+            "timeout": 1,
+            "template": "default",
+            "wall_seconds": 0.0,
+            "passed": 1,
+            "failed": 0,
+            "sandboxes": [],
+        }
+
+    monkeypatch.setattr(
+        run_validation, "load_matrix", lambda _path: SimpleNamespace(phases=[phase])
+    )
+    monkeypatch.setattr(run_validation, "load_secrets", lambda _path: {})
+    monkeypatch.setattr(run_validation, "execute_phase", fake_execute_phase)
+    return output_dirs
+
+
+def _run_main(monkeypatch, run_validation, matrix_path: Path, output_dir: Path, *extra: str) -> int:
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        [
+            "run_validation.py",
+            "--project",
+            "autosearch-test",
+            "--matrix",
+            str(matrix_path),
+            "--secrets",
+            str(matrix_path.parent / "secrets.env"),
+            "--output",
+            str(output_dir),
+            "--parallel",
+            "1",
+            *extra,
+        ],
+    )
+    return run_validation.main()
+
+
+def _matrix_path(tmp_path: Path) -> Path:
+    matrix_path = tmp_path / "matrix.yaml"
+    matrix_path.write_text("phases: {}\n", encoding="utf-8")
+    return matrix_path
+
+
+def test_default_does_not_delete_existing(tmp_path, monkeypatch) -> None:
+    run_validation = _load_run_validation(monkeypatch)
+    output_dir = tmp_path / "validation-output"
+    output_dir.mkdir()
+    marker = output_dir / "marker.txt"
+    marker.write_text("keep me", encoding="utf-8")
+    matrix_path = _matrix_path(tmp_path)
+    output_dirs = _patch_successful_run(monkeypatch, run_validation)
+
+    assert _run_main(monkeypatch, run_validation, matrix_path, output_dir) == 0
+
+    assert marker.read_text(encoding="utf-8") == "keep me"
+    assert len(output_dirs) == 1
+    assert output_dirs[0].parent == output_dir
+    assert output_dirs[0].name.startswith("run-")
+
+
+def test_clean_output_flag_required_for_delete(tmp_path, monkeypatch) -> None:
+    run_validation = _load_run_validation(monkeypatch)
+    reports_root = tmp_path / "reports"
+    output_dir = reports_root / "validation-output"
+    output_dir.mkdir(parents=True)
+    marker = output_dir / "marker.txt"
+    marker.write_text("keep me", encoding="utf-8")
+    matrix_path = _matrix_path(tmp_path)
+    output_dirs = _patch_successful_run(monkeypatch, run_validation)
+    monkeypatch.setattr(run_validation, "get_reports_root", lambda: reports_root, raising=False)
+
+    assert _run_main(monkeypatch, run_validation, matrix_path, output_dir) == 0
+    assert marker.read_text(encoding="utf-8") == "keep me"
+
+    marker.write_text("delete me", encoding="utf-8")
+    assert _run_main(monkeypatch, run_validation, matrix_path, output_dir, "--clean-output") == 0
+
+    assert not marker.exists()
+    assert output_dir.exists()
+    assert len(output_dirs) == 2
+    assert output_dirs[-1].parent == output_dir
+    assert output_dirs[-1].name.startswith("run-")
+
+
+def test_output_outside_reports_root_rejected(tmp_path, monkeypatch, capsys) -> None:
+    run_validation = _load_run_validation(monkeypatch)
+    reports_root = tmp_path / "reports"
+    reports_root.mkdir()
+    output_dir = tmp_path / "outside"
+    output_dir.mkdir()
+    marker = output_dir / "marker.txt"
+    marker.write_text("keep me", encoding="utf-8")
+    matrix_path = _matrix_path(tmp_path)
+    output_dirs = _patch_successful_run(monkeypatch, run_validation)
+    monkeypatch.setattr(run_validation, "get_reports_root", lambda: reports_root, raising=False)
+
+    assert _run_main(monkeypatch, run_validation, matrix_path, output_dir, "--clean-output") == 2
+
+    assert marker.read_text(encoding="utf-8") == "keep me"
+    assert output_dirs == []
+    assert "outside repo reports" in capsys.readouterr().err
+
+
+def test_symlink_escape_rejected(tmp_path, monkeypatch, capsys) -> None:
+    run_validation = _load_run_validation(monkeypatch)
+    reports_root = tmp_path / "reports"
+    reports_root.mkdir()
+    outside = tmp_path / "outside"
+    outside.mkdir()
+    marker = outside / "marker.txt"
+    marker.write_text("keep me", encoding="utf-8")
+    output_link = reports_root / "escape"
+    output_link.symlink_to(outside, target_is_directory=True)
+    matrix_path = _matrix_path(tmp_path)
+    output_dirs = _patch_successful_run(monkeypatch, run_validation)
+    monkeypatch.setattr(run_validation, "get_reports_root", lambda: reports_root, raising=False)
+
+    assert _run_main(monkeypatch, run_validation, matrix_path, output_link, "--clean-output") == 2
+
+    assert marker.read_text(encoding="utf-8") == "keep me"
+    assert output_dirs == []
+    assert "outside repo reports" in capsys.readouterr().err
+
+
+def test_reports_root_symlink_resolved_consistently(tmp_path, monkeypatch) -> None:
+    run_validation = _load_run_validation(monkeypatch)
+    real_reports_root = tmp_path / "real-reports"
+    real_reports_root.mkdir()
+    reports_root_link = tmp_path / "reports-link"
+    reports_root_link.symlink_to(real_reports_root, target_is_directory=True)
+    output_dir = reports_root_link / "validation-output"
+    output_dir.mkdir()
+    marker = output_dir / "marker.txt"
+    marker.write_text("delete me", encoding="utf-8")
+    matrix_path = _matrix_path(tmp_path)
+    output_dirs = _patch_successful_run(monkeypatch, run_validation)
+    monkeypatch.setattr(
+        run_validation,
+        "get_reports_root",
+        lambda: reports_root_link,
+        raising=False,
+    )
+
+    assert _run_main(monkeypatch, run_validation, matrix_path, output_dir, "--clean-output") == 0
+
+    assert not marker.exists()
+    assert len(output_dirs) == 1
+    assert output_dirs[0].resolve().is_relative_to(real_reports_root.resolve())