-
Notifications
You must be signed in to change notification settings - Fork 195
Expand file tree
/
Copy pathparallel_backends.rs
More file actions
76 lines (63 loc) · 2.18 KB
/
parallel_backends.rs
File metadata and controls
76 lines (63 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
//! Race a CDP backend alongside the primary HTTP crawl.
//!
//! ```bash
//! # Start a CDP server first (e.g. navi, Chrome, etc.):
//! # navi --listen 127.0.0.1:9222
//!
//! cargo run --example parallel_backends --features "spider/parallel_backends_full spider/sync"
//! ```
extern crate spider;
use spider::configuration::{BackendEndpoint, BackendEngine, ParallelBackendsConfig};
use spider::tokio;
use spider::website::Website;
use std::time::Instant;
#[tokio::main]
async fn main() {
env_logger::init();
let mut website = Website::new("https://choosealicense.com");
// Configure a CDP backend running on localhost:9222.
website.configuration.parallel_backends = Some(ParallelBackendsConfig {
backends: vec![BackendEndpoint {
engine: BackendEngine::Cdp,
endpoint: Some("ws://127.0.0.1:9222".to_string()),
binary_path: None,
protocol: None, // inferred as CDP from engine
proxy: None,
}],
grace_period_ms: 500,
enabled: true,
fast_accept_threshold: 80,
max_consecutive_errors: 5,
connect_timeout_ms: 5000,
..Default::default()
});
// Subscribe to see which backend won each page.
let mut rx = website.subscribe(100);
let handle = tokio::spawn(async move {
let mut primary_wins = 0u32;
let mut backend_wins = 0u32;
while let Ok(page) = rx.recv().await {
let source = page.backend_source.as_deref().unwrap_or("unknown");
let url = page.get_url();
let status = page.status_code.as_u16();
if source == "primary" {
primary_wins += 1;
} else {
backend_wins += 1;
}
println!("[{source}] {status} {url}");
}
(primary_wins, backend_wins)
});
let start = Instant::now();
website.crawl().await;
let duration = start.elapsed();
// Signal subscriber to finish.
drop(website);
if let Ok((primary_wins, backend_wins)) = handle.await {
println!(
"\nCrawled in {:?} — primary won: {}, backend won: {}",
duration, primary_wins, backend_wins
);
}
}