Skip to content

Commit c84f59d

Browse files
committed
add sample results, minor fixes
1 parent 6932213 commit c84f59d

15 files changed

Lines changed: 257 additions & 71 deletions

benchmark/main.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,18 @@
11
# Import benchmark modules
2-
from benchmark.operators import (
3-
classify_video_zero_shot,
4-
cluster_embeddings,
5-
detect_lewd_images,
6-
detect_text_in_image_tesseract,
7-
dimension_reduction,
8-
image_vec_rep_resnet,
9-
vid_vec_rep_clip,
10-
video_hash_tmk,
11-
)
2+
from benchmark.operators import all_operators
123
from benchmark.report import BenchmarkReport
134

145

156
def main():
167
report = BenchmarkReport()
178

18-
# List of benchmark modules to run
19-
benchmark_modules = [
20-
("vid_vec_rep_clip", vid_vec_rep_clip),
21-
("video_hash_tmk", video_hash_tmk),
22-
("image_vec_rep_resnet", image_vec_rep_resnet),
23-
("detect_text_in_image_tesseract", detect_text_in_image_tesseract),
24-
("detect_lewd_images", detect_lewd_images),
25-
("classify_video_zero_shot", classify_video_zero_shot),
26-
("dimension_reduction", dimension_reduction),
27-
("cluster_embeddings", cluster_embeddings),
28-
]
29-
30-
for name, module in benchmark_modules:
31-
print(f"\n=== Benchmarking {name} ===")
32-
results = module.benchmark()
33-
for result in results:
34-
report.add(result)
9+
for operator in all_operators:
10+
results = operator.benchmark()
11+
report.extend(results)
3512

3613
report.save_json()
3714
report.save_markdown()
3815

3916

4017
if __name__ == "__main__":
4118
main()
42-
43-
44-
import operators
45-
for op in operators.__all__:
46-
print(f"Loaded operator: {op}")

benchmark/operators/__init__.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,21 @@
1-
# Benchmark modules for individual operators
1+
from . import (
2+
classify_video_zero_shot,
3+
cluster_embeddings,
4+
detect_lewd_images,
5+
detect_text_in_image_tesseract,
6+
dimension_reduction,
7+
image_vec_rep_resnet,
8+
vid_vec_rep_clip,
9+
video_hash_tmk,
10+
)
11+
12+
all_operators = [
13+
classify_video_zero_shot,
14+
cluster_embeddings,
15+
detect_lewd_images,
16+
detect_text_in_image_tesseract,
17+
dimension_reduction,
18+
image_vec_rep_resnet,
19+
vid_vec_rep_clip,
20+
video_hash_tmk,
21+
]

benchmark/operators/classify_video_zero_shot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operators.classify_video_zero_shot import VideoClassifier
33

44

5-
def benchmark():
5+
def benchmark() -> list[dict]:
66
"""Benchmark the VideoClassifier operator."""
77
test_data = [
88
{"file": "test/image1.jpg", "labels": ["label1", "label2"]},

benchmark/operators/cluster_embeddings.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from operators.cluster_embeddings import ClusterEmbeddings
44

55

6-
def benchmark():
6+
def benchmark() -> list[dict]:
77
"""Benchmark the ClusterEmbeddings operator."""
88
results = []
99

@@ -29,8 +29,11 @@ def benchmark():
2929
# Test with different clustering methods
3030
for method in ["kmeans", "dbscan", "agglomerative"]:
3131
runtime_kwargs = {
32-
"embeddings": embeddings,
33-
"method": method,
32+
"input_data": [
33+
{"payload": f"sample_{i}", "embedding": embedding.tolist()}
34+
for i, embedding in enumerate(embeddings)
35+
],
36+
"modality": "video",
3437
}
3538

3639
# For kmeans, specify the number of clusters

benchmark/operators/detect_lewd_images.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operators.detect_lewd_images import LewdImageDetector
33

44

5-
def benchmark():
5+
def benchmark() -> list[dict]:
66
"""Benchmark the LewdImageDetector operator."""
77
test_data = [
88
{"file": "test_images/image1.jpg"},
@@ -18,12 +18,4 @@ def benchmark():
1818
)
1919
results.append(result)
2020

21-
if result["status"] == "success":
22-
print(
23-
f" Time: {result['execution']['execution_time_seconds']:.2f}s, "
24-
f"Memory: {result['execution']['memory_change_mb']:.2f}MB"
25-
)
26-
else:
27-
print(f" Failed: {result.get('error', 'Unknown error')}")
28-
2921
return results

benchmark/operators/detect_text_in_image_tesseract.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operators.detect_text_in_image_tesseract import ImageTextDetector
33

44

5-
def benchmark():
5+
def benchmark() -> list[dict]:
66
"""Benchmark the ImageTextDetector operator."""
77
test_data = [
88
{"file": "test_images/image1.jpg"},
@@ -18,5 +18,4 @@ def benchmark():
1818
)
1919
results.append(result)
2020

21-
2221
return results

benchmark/operators/dimension_reduction.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import random
2+
13
from benchmark.data_generator import DataGenerator
24
from benchmark.profiler import Profiler
35
from operators.dimension_reduction import DimensionReduction
46

57

6-
def benchmark():
8+
def benchmark() -> list[dict]:
79
"""Benchmark the DimensionReduction operator."""
810
results = []
911

@@ -18,18 +20,28 @@ def benchmark():
1820
for dataset_name, embeddings in test_datasets.items():
1921
print(f"Processing: {dataset_name} (shape: {embeddings.shape})")
2022

21-
# Test with different reduction methods
22-
for method in ["tsne", "pca", "umap"]:
23+
# Test with different reduction methods (only supported ones)
24+
for method in ["tsne", "umap"]:
25+
# Convert embeddings to the format expected by the operator
2326
runtime_kwargs = {
24-
"embeddings": embeddings,
25-
"method": method,
26-
"n_components": 2,
27+
"input_data": [
28+
{"payload": f"sample_{i}", "embedding": embedding.tolist()}
29+
for i, embedding in enumerate(embeddings)
30+
],
31+
}
32+
33+
operator_kwargs = {
34+
"model_type": method,
35+
"params": {
36+
"n_components": random.randint(2, 3),
37+
},
2738
}
2839

2940
result = Profiler.benchmark_operator(
3041
operator_class=DimensionReduction,
3142
operator_name=f"dimension_reduction_{method}",
3243
runtime_kwargs=runtime_kwargs,
44+
operator_kwargs=operator_kwargs,
3345
)
3446

3547
# Add dataset info to result

benchmark/operators/image_vec_rep_resnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operators.image_vec_rep_resnet import ImageVecRepResnet
33

44

5-
def benchmark():
5+
def benchmark() -> list[dict]:
66
"""Benchmark the ImageVecRepResnet operator."""
77
test_data = [
88
{"file": "test_images/image1.jpg"},

benchmark/operators/vid_vec_rep_clip.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operators.vid_vec_rep_clip import VidVecRepClip
33

44

5-
def benchmark():
5+
def benchmark() -> list[dict]:
66
"""Benchmark the VidVecRepClip operator."""
77
test_data = [
88
{"file": "test_videos/video1.mp4"},
@@ -12,12 +12,11 @@ def benchmark():
1212
results = []
1313

1414
for test_item in test_data:
15-
1615
result = Profiler.benchmark_operator(
1716
operator_class=VidVecRepClip,
1817
operator_name="vid_vec_rep_clip",
1918
runtime_kwargs=test_item,
2019
)
2120
results.append(result)
22-
21+
2322
return results

benchmark/operators/video_hash_tmk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operators.video_hash_tmk import VideoHashTmk
33

44

5-
def benchmark():
5+
def benchmark() -> list[dict]:
66
"""Benchmark the VideoHashTmk operator."""
77
test_data = [
88
{"file": "test_videos/video1.mp4"},

0 commit comments

Comments
 (0)