TimmyOVO
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 6 deletions b/‎.gitignore‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎benchsuite/README.md‎
Lines changed: 175 additions & 0 deletions b/‎benchsuite/README.md‎
Lines changed: 175 additions & 0 deletions
diff --git a/‎benchsuite/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎benchsuite/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎benchsuite/cli.py‎
Lines changed: 182 additions & 0 deletions b/‎benchsuite/cli.py‎
Lines changed: 182 additions & 0 deletions
@@ -2,12 +2,6 @@
 *.iml
 target
 .DS_Store
-DeepSeek-OCR
-DeepSeek-OCR-2
-PaddleOCR-VL
-dots.ocr
-baselines/sample
-baselines/fixtures
 __pycache__
 .venv
 .hf-cache
 
@@ -0,0 +1,175 @@
+# Benchsuite
+
+统一的基准与门禁子项目，采用“统一入口 + 模型适配器（package）”结构。
+
+## 设计
+
+- 统一入口：`python -m benchsuite.cli`
+- 子命令：
+  - `gate`：strict token 对齐门禁
+  - `bench-python`：Python 侧单次基准
+  - `bench-rust`：Rust CLI 侧单次基准
+  - `perf`：按 model/device/precision/case matrix 自动跑 Python+Rust，对比并保存 run 历史
+  - `matrix-gate`：按 model/device/precision/case matrix 执行 strict gate（prompt+token）
+- 模型适配器：`benchsuite/models/<model>.py`
+  - 当前实现：`glm.py`（`GlmAdapter`）
+
+## 安装
+
+在仓库根目录：
+
+```bash
+python -m pip install -e '.[bench]'
+```
+
+安装后可用统一命令：
+
+```bash
+benchsuite --help
+```
+
+也可以继续用模块调用：
+
+```bash
+python -m benchsuite.cli --help
+```
+
+## 离线约束
+
+所有子命令统一设置：
+
+- `HF_HUB_OFFLINE=1`
+- `TRANSFORMERS_OFFLINE=1`
+- `HF_HOME=.hf-cache`
+- `TRANSFORMERS_CACHE=.hf-cache`
+- `DEEPSEEK_OCR_CONFIG_DIR=.cli-config`
+- `DEEPSEEK_OCR_CACHE_DIR=.cli-cache`
+
+## 用法
+
+### 1) strict token gate
+
+```bash
+python -m benchsuite.cli gate \
+  --model glm-ocr \
+  --baseline baselines/glm/matrix_v20/formula__image__n8/baseline.json \
+  --rust baselines/glm/matrix_v33/formula__image__n8/rust_output.json \
+  --output baselines/glm/matrix_v33/formula__image__n8/compare.json
+```
+
+### 2) 单次 Python / Rust 基准
+
+```bash
+python -m benchsuite.cli bench-python \
+  --model glm-ocr \
+  --model-dir .cli-cache/models/glm-ocr \
+  --image baselines/sample/images/test.png \
+  --prompt "Formula Recognition:" \
+  --device cpu \
+  --dtype f32 \
+  --max-new-tokens 8 \
+  --output baselines/glm/perf_py_v22/formula__test__n8/cpu_f32/bench.json
+
+python -m benchsuite.cli bench-rust \
+  --model glm-ocr \
+  --cli target/release/deepseek-ocr-cli \
+  --image baselines/sample/images/test.png \
+  --prompt "Formula Recognition:" \
+  --device cpu \
+  --dtype f32 \
+  --max-new-tokens 8 \
+  --output baselines/glm/perf_rs_v22/formula__test__n8/cpu_f32/bench.json
+```
+
+### 3) 一键 perf 矩阵（自动跑两边 + 自动对比 + 历史 run 对比）
+
+```bash
+python -m benchsuite.cli perf \
+  --run v23 \
+  --include-models glm-ocr \
+  --include-devices cpu mps \
+  --include-precision f32 f16
+```
+
+输出包括：
+
+- `baselines/benchsuite/runs/<run>/perf/summary.json`（结构化结果）
+- `baselines/benchsuite/runs/<run>/perf/report.txt`（可读对比表）
+- `baselines/benchsuite/runs/<run>/perf/<model>/<case>/<device_dtype>/{python,rust,compare}.json`
+
+你也可以显式指定单 case：
+
+```bash
+python -m benchsuite.cli perf \
+  --run adhoc \
+  --include-models glm-ocr \
+  --include-devices cpu \
+  --include-precision f32 \
+  --image baselines/sample/images/test.png \
+  --prompt "Formula Recognition:" \
+  --max-new-tokens 64
+```
+
+快速迭代（只跑前 N 个 case）：
+
+```bash
+python -m benchsuite.cli perf \
+  --run smoke \
+  --include-models glm-ocr \
+  --include-devices cpu \
+  --include-precision f32 \
+  --limit 1
+```
+
+### 4) 一键 matrix strict gate（默认 24-case）
+
+```bash
+python -m benchsuite.cli matrix-gate \
+  --run gate_v34 \
+  --include-models glm-ocr \
+  --include-devices cpu mps \
+  --include-precision f32 f16
+```
+
+输出包括：
+
+- `baselines/benchsuite/runs/<run>/matrix/summary.json`
+- `baselines/benchsuite/runs/<run>/matrix/report.txt`
+- `baselines/benchsuite/runs/<run>/matrix/<model>/<case>/<device_dtype>/{python,compare}.json`
+
+常用筛选：
+
+```bash
+python -m benchsuite.cli matrix-gate \
+  --run smoke \
+  --include-models glm-ocr \
+  --include-devices cpu \
+  --include-precision f32 \
+  --limit 1
+
+python -m benchsuite.cli matrix-gate \
+  --run formula_only \
+  --include-models glm-ocr \
+  --include-devices cpu \
+  --include-precision f32 \
+  --cases formula__image__n8 formula__test__n8
+```
+
+ad-hoc 单条输入（不走内建 matrix）：
+
+```bash
+python -m benchsuite.cli matrix-gate \
+  --run adhoc_gate \
+  --include-models glm-ocr \
+  --include-devices cpu \
+  --include-precision f32 \
+  --image baselines/sample/images/test.png \
+  --prompt "Formula Recognition:" \
+  --max-new-tokens 8
+```
+
+## 扩展新模型
+
+1. 新建 `benchsuite/models/<name>.py`，实现 `<Name>Adapter`
+2. 在 `benchsuite/registry.py` 注册名称
+3. 复用统一入口，无需再新增散脚本
@@ -0,0 +1,2 @@
+"""Unified benchmark/gate toolkit for OCR backends."""
+
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from benchsuite.common import repo_root, write_json
+from benchsuite.orchestrator import BenchOrchestrator
+from benchsuite.registry import get_adapter
+
+try:
+    from tqdm.auto import tqdm
+except Exception:  # pragma: no cover
+    tqdm = None
+
+
+_ORCHESTRATOR = BenchOrchestrator()
+
+
+def _single_job_progress(desc: str):
+    if tqdm is None:
+        return None
+    return tqdm(total=1, desc=desc, unit="job")
+
+
+def _run_gate(args: argparse.Namespace) -> int:
+    adapter = get_adapter(args.model)
+    report = adapter.compare_tokens(args.baseline, args.rust)
+    out = args.output if args.output else args.rust.parent / "compare.json"
+    write_json(out, report)
+    print(out)
+    return 0 if report["match"] else 1
+
+
+def _run_bench_python(args: argparse.Namespace) -> int:
+    adapter = get_adapter(args.model)
+    pbar = _single_job_progress("bench-python")
+    try:
+        payload = adapter.run_python_bench(
+            model_dir=args.model_dir,
+            image=args.image,
+            prompt=args.prompt,
+            max_new_tokens=args.max_new_tokens,
+            py_device=args.device,
+            py_dtype=args.dtype,
+            output=args.output,
+            repo_root=repo_root(),
+        )
+        if pbar is not None:
+            pbar.update(1)
+    finally:
+        if pbar is not None:
+            pbar.close()
+    print(args.output)
+    if args.print_json:
+        import json
+
+        print(json.dumps(payload, ensure_ascii=False))
+    return 0
+
+
+def _run_bench_rust(args: argparse.Namespace) -> int:
+    adapter = get_adapter(args.model)
+    pbar = _single_job_progress("bench-rust")
+    try:
+        payload = adapter.run_rust_bench(
+            cli=args.cli,
+            image=args.image,
+            prompt=args.prompt,
+            max_new_tokens=args.max_new_tokens,
+            rs_device=args.device,
+            rs_dtype=args.dtype,
+            output=args.output,
+            repo_root=repo_root(),
+        )
+        if pbar is not None:
+            pbar.update(1)
+    finally:
+        if pbar is not None:
+            pbar.close()
+    print(args.output)
+    if args.print_json:
+        import json
+
+        print(json.dumps(payload, ensure_ascii=False))
+    return 0
+
+
+def _run_perf(args: argparse.Namespace) -> int:
+    return _ORCHESTRATOR.run_perf(args)
+
+
+def _run_matrix_gate(args: argparse.Namespace) -> int:
+    return _ORCHESTRATOR.run_matrix_gate(args)
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="python -m benchsuite.cli",
+        description="Unified benchmark + gate CLI with model adapters",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p = sub.add_parser("gate", help="strict token gate: baseline vs rust output")
+    p.add_argument("--model", default="glm-ocr")
+    p.add_argument("--baseline", required=True, type=Path)
+    p.add_argument("--rust", required=True, type=Path)
+    p.add_argument("--output", type=Path)
+    p.set_defaults(func=_run_gate)
+
+    p = sub.add_parser("bench-python", help="run python benchmark for one model case")
+    p.add_argument("--model", default="glm-ocr")
+    p.add_argument("--model-dir", required=True, type=Path)
+    p.add_argument("--image", required=True, type=Path)
+    p.add_argument("--prompt", required=True)
+    p.add_argument("--device", required=True, choices=["cpu", "mps"])
+    p.add_argument("--dtype", required=True, choices=["f32", "f16"])
+    p.add_argument("--max-new-tokens", type=int, default=64)
+    p.add_argument("--output", required=True, type=Path)
+    p.add_argument("--print-json", action="store_true")
+    p.set_defaults(func=_run_bench_python)
+
+    p = sub.add_parser("bench-rust", help="run rust benchmark for one model case")
+    p.add_argument("--model", default="glm-ocr")
+    p.add_argument("--cli", default=Path("target/release/deepseek-ocr-cli"), type=Path)
+    p.add_argument("--image", required=True, type=Path)
+    p.add_argument("--prompt", required=True)
+    p.add_argument("--device", required=True, choices=["cpu", "metal"])
+    p.add_argument("--dtype", required=True, choices=["f32", "f16"])
+    p.add_argument("--max-new-tokens", type=int, required=True)
+    p.add_argument("--output", required=True, type=Path)
+    p.add_argument("--print-json", action="store_true")
+    p.set_defaults(func=_run_bench_rust)
+
+    p = sub.add_parser("perf", help="one-command run: py+rust compare with history")
+    p.add_argument("--run", help="run id used under baselines/*/runs/<run>")
+    p.add_argument("--tag", default="latest")
+    p.add_argument("--include-models", nargs="*", default=[])
+    p.add_argument("--include-devices", nargs="*", default=[])
+    p.add_argument("--include-precision", nargs="*", default=[])
+    p.add_argument("--cli", default=Path("target/release/deepseek-ocr-cli"), type=Path)
+    p.add_argument("--model-dir", type=Path)
+    p.add_argument("--case-name")
+    p.add_argument("--baseline-json", type=Path)
+    p.add_argument("--matrix-source", type=Path)
+    p.add_argument("--image", type=Path)
+    p.add_argument("--prompt")
+    p.add_argument("--max-new-tokens", type=int)
+    p.add_argument("--cases", nargs="*")
+    p.add_argument("--limit", type=int)
+    p.add_argument("--output-root", type=Path)
+    p.set_defaults(func=_run_perf)
+
+    p = sub.add_parser("matrix-gate", help="one-command strict matrix gate run")
+    p.add_argument("--run", help="run id used under baselines/*/runs/<run>")
+    p.add_argument("--tag", default="latest")
+    p.add_argument("--include-models", nargs="*", default=[])
+    p.add_argument("--include-devices", nargs="*", default=[])
+    p.add_argument("--include-precision", nargs="*", default=[])
+    p.add_argument("--cli", default=Path("target/release/deepseek-ocr-cli"), type=Path)
+    p.add_argument("--model-dir", type=Path)
+    p.add_argument("--source-matrix", type=Path)
+    p.add_argument("--output-root", type=Path)
+    p.add_argument("--case-name", default="adhoc")
+    p.add_argument("--image", type=Path)
+    p.add_argument("--prompt")
+    p.add_argument("--max-new-tokens", type=int)
+    p.add_argument("--cases", nargs="*")
+    p.add_argument("--limit", type=int)
+    p.set_defaults(func=_run_matrix_gate)
+
+    return parser
+
+
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+    return int(args.func(args))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+"""Unified benchmark/gate toolkit for OCR backends."""`
	`2`	`+`