-
Notifications
You must be signed in to change notification settings - Fork 198
Expand file tree
/
Copy pathremote_multimodal_dual_automation.rs
More file actions
122 lines (108 loc) · 4.7 KB
/
remote_multimodal_dual_automation.rs
File metadata and controls
122 lines (108 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//! Dual-model multi-round automation example.
//!
//! Shows dual-model routing for multi-round browser automation (not just
//! extraction). A vision model handles the first round and stagnation,
//! while a cheaper text model drives mid-round actions via HTML context.
//!
//! Run with:
//! ```bash
//! OPEN_ROUTER=your-api-key cargo run --example remote_multimodal_dual_automation --features "spider/sync spider/chrome spider/agent_chrome"
//! ```
//!
//! EXAMPLE output
//! === Page Received ===
//! URL: https://books.toscrape.com/
//!
//! === AI Results ===
//! Result 1:
//! Content: {"extracted":{"title":"A Light in the Attic","price":"£51.77","availability":"In stock (22 available)"},"steps":[{"Click":{"selector":"article.product_pod h3 a"}}]}
//! Tokens: 3541 prompt + 296 completion = 3837 total (2 LLM calls)
//!
//! === Completed in 5.31s ===
extern crate spider;
use spider::features::automation::{ModelEndpoint, RemoteMultimodalConfigs, VisionRouteMode};
use spider::tokio;
use spider::website::Website;
#[tokio::main]
async fn main() {
env_logger::init();
let api_key =
std::env::var("OPEN_ROUTER").expect("OPEN_ROUTER environment variable must be set");
// A page that requires multi-round interaction
let url = "https://books.toscrape.com/";
// ── Dual-model routing for automation ─────────────────────────────
//
// VisionFirst: vision model for rounds 0-1 (screenshot), then text model
// for stable mid-rounds, upgrading back to vision on stagnation/stuck.
//
// You can also use different providers for each model by setting
// `api_url` and `api_key` on the ModelEndpoint:
//
// ModelEndpoint::new("gpt-4o")
// .with_api_url("https://api.openai.com/v1/chat/completions")
// .with_api_key("sk-openai-...")
//
// Fields left as None inherit from the parent RemoteMultimodalConfigs.
let mm_config = RemoteMultimodalConfigs::new(
"https://openrouter.ai/api/v1/chat/completions",
"qwen/qwen-2.5-vl-72b-instruct",
)
.with_api_key(&api_key)
.with_dual_models(
ModelEndpoint::new("qwen/qwen-2.5-vl-72b-instruct"), // vision rounds
ModelEndpoint::new("qwen/qwen-2.5-72b-instruct"), // text rounds
)
.with_vision_route_mode(VisionRouteMode::VisionFirst);
// ── Multi-round automation config ─────────────────────────────────
//
// All cfg fields are now available as `with_*` builders on
// RemoteMultimodalConfigs, so the entire config can be built
// in a single fluent chain.
let mm_config = mm_config
.with_extra_ai_data(true)
.with_include_html(true)
.with_include_title(true)
.with_include_url(true)
.with_max_rounds(4)
.with_request_json_object(true)
.with_extraction_prompt(
"Navigate to the first book in the catalog. Extract its title, price, and availability.",
)
.with_user_message_extra(
"Click on the first book link to navigate to its detail page, then extract the book data.",
);
// ── Run ───────────────────────────────────────────────────────────
let mut website: Website = Website::new(url)
.with_limit(1)
.with_remote_multimodal(Some(mm_config))
.build()
.unwrap();
let mut rx = website.subscribe(16);
let join_handle = tokio::spawn(async move {
while let Ok(page) = rx.recv().await {
println!("=== Page Received ===");
println!("URL: {}", page.get_url());
if let Some(ref ai_data) = page.extra_remote_multimodal_data {
println!("\n=== AI Results ===");
for (i, result) in ai_data.iter().enumerate() {
println!("Result {}:", i + 1);
println!(" Content: {}", result.content_output);
if let Some(ref usage) = result.usage {
println!(
" Tokens: {} prompt + {} completion = {} total ({} LLM calls)",
usage.prompt_tokens,
usage.completion_tokens,
usage.total_tokens,
usage.llm_calls,
);
}
}
}
}
});
let start = tokio::time::Instant::now();
website.crawl().await;
website.unsubscribe();
let _ = join_handle.await;
println!("\n=== Completed in {:?} ===", start.elapsed());
}