-
Notifications
You must be signed in to change notification settings - Fork 195
Expand file tree
/
Copy pathsubscribe_multiple.rs
More file actions
55 lines (44 loc) · 1.55 KB
/
subscribe_multiple.rs
File metadata and controls
55 lines (44 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
//! `cargo run --example subscribe_multiple`
extern crate spider;
use spider::{tokio, website::Website};
use tokio::io::AsyncWriteExt;
#[tokio::main]
async fn main() {
let mut website: Website = Website::new("https://example.com?target=1");
let mut rx2: tokio::sync::broadcast::Receiver<spider::page::Page> = website.subscribe(0);
let mut website2 = website.clone();
website2.set_url_only("https://example.com?target=2");
// usually you want to use another proxy.
// website2.with_proxies(Some(vec!["http://myproxy.com"]));
let mut stdout = tokio::io::stdout();
let sub = async move {
while let Ok(res) = rx2.recv().await {
let _ = stdout
.write_all(format!("- {}\n", res.get_url()).as_bytes())
.await;
}
stdout
};
let start = std::time::Instant::now();
let c1 = async {
website.crawl().await;
website.unsubscribe();
};
let c2 = async {
website2.crawl().await;
website2.unsubscribe();
};
// you can also use a select to cancel a crawl if you want to see which proxy comes first.
let (mut stdout, _crawl_one, _crawl_two) = tokio::join!(sub, c1, c2);
let duration = start.elapsed();
let _ = stdout
.write_all(
format!(
"Time elapsed in website.crawl() and website2.crawl() is: {:?} for total pages: {:?}",
duration,
website.get_size().await + website2.get_size().await
)
.as_bytes(),
)
.await;
}