Skip to content

spider-rs/spider

Spider

Crates.io Downloads Documentation

Website | Guides | API | Examples | Discord

The fastest web crawler and scraper for Rust.

Quick Start

[dependencies]
spider = { version = "2", features = ["spider_cloud"] }
use spider::{
    configuration::{SpiderCloudConfig, SpiderCloudMode, SpiderCloudReturnFormat},
    tokio, // re-export
    website::Website,
};

#[tokio::main]
async fn main() {
    // Get your API key free at https://spider.cloud
    let config = SpiderCloudConfig::new("YOUR_API_KEY")
        .with_mode(SpiderCloudMode::Smart)
        .with_return_format(SpiderCloudReturnFormat::Markdown);

    let mut website = Website::new("https://example.com")
        .with_limit(10)
        .with_spider_cloud_config(config)
        .build()
        .unwrap();

    let mut rx = website.subscribe(16);

    tokio::spawn(async move {
        while let Ok(page) = rx.recv().await {
            let url = page.get_url();
            let markdown = page.get_content();
            let status = page.status_code;

            println!("[{status}] {url}\n---\n{markdown}\n");
        }
    });

    website.crawl().await;
    website.unsubscribe();
}

Also supports headless Chrome, WebDriver, and AI automation.

Install

Package Command
spider cargo add spider
spider_cli cargo install spider_cli
spider-nodejs npm i @spider-rs/spider-rs
spider-py pip install spider_rs
Spider Cloud Managed crawling — free credits on signup

License

MIT