Nightly ClickGrab Analysis #444
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly ClickGrab Analysis | |
| on: | |
| schedule: | |
| # Runs "At 01:00 UTC every day" | |
| - cron: '0 1 * * *' | |
| workflow_dispatch: | |
| env: | |
| FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true | |
| permissions: | |
| contents: write | |
| jobs: | |
| # Job 1: Fetch and scan URLs from feeds | |
| fetch_and_scan: | |
| name: Fetch & Scan URLs | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 45 | |
| outputs: | |
| scan_date: ${{ steps.set_date.outputs.date }} | |
| has_results: ${{ steps.check_results.outputs.has_results }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Need full history for cache file | |
| lfs: true | |
| - name: Set date output | |
| id: set_date | |
| run: echo "date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| cache: 'pip' | |
| - name: Install Python dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Create required directories | |
| run: | | |
| mkdir -p nightly_reports | |
| mkdir -p analysis | |
| echo "Created required directories" | |
| - name: Run ClickGrab Scan (URLs only, new domains only) | |
| id: run_scan | |
| timeout-minutes: 40 | |
| run: | | |
| TODAY=$(date +%Y-%m-%d) | |
| echo "π Scanning URLs for $TODAY" | |
| # Set encoding for proper UTF-8 handling | |
| export PYTHONIOENCODING=utf-8 | |
| # Run scan - the caching mechanism in collect_clickfix_gist_urls() ensures | |
| # we only scan NEW domains that weren't in the last run's cache | |
| python clickgrab.py \ | |
| --download \ | |
| --clickfix-gist \ | |
| --output-dir nightly_reports \ | |
| --format json \ | |
| --limit 100 \ | |
| --export-intel \ | |
| --debug | |
| echo "β ClickGrab scan complete" | |
| - name: Check if we have results | |
| id: check_results | |
| run: | | |
| if [ -n "$(find nightly_reports -name '*.json' -type f 2>/dev/null)" ]; then | |
| echo "has_results=true" >> $GITHUB_OUTPUT | |
| echo "β Found scan results" | |
| else | |
| echo "has_results=false" >> $GITHUB_OUTPUT | |
| echo "β οΈ No new domains to scan (cache hit)" | |
| fi | |
| - name: Standardize report filenames | |
| if: steps.check_results.outputs.has_results == 'true' | |
| run: | | |
| TODAY=$(date +%Y-%m-%d) | |
| # Find the latest JSON report | |
| LATEST_JSON=$(find nightly_reports -name "*.json" -type f -printf "%T@ %p\n" | sort -n | tail -1 | cut -d' ' -f2-) | |
| if [ -n "$LATEST_JSON" ]; then | |
| echo "π Found report: $LATEST_JSON" | |
| # Create standardized filename | |
| cp "$LATEST_JSON" "nightly_reports/clickgrab_report_${TODAY}.json" | |
| cp "$LATEST_JSON" "latest_consolidated_report.json" | |
| # Show summary | |
| echo "=== Report Summary ===" | |
| jq -r '.summary // {} | to_entries | .[] | "\(.key): \(.value)"' "$LATEST_JSON" 2>/dev/null || echo "No summary" | |
| echo "Total sites: $(jq -r '.sites | length' "$LATEST_JSON" 2>/dev/null || echo '0')" | |
| echo "====================" | |
| fi | |
| - name: Upload scan results | |
| if: steps.check_results.outputs.has_results == 'true' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: scan-results-${{ steps.set_date.outputs.date }} | |
| path: | | |
| nightly_reports/*.json | |
| nightly_reports/threat_intel/*.json | |
| nightly_reports/threat_intel/*.csv | |
| latest_consolidated_report.json | |
| latest_threat_intel.json | |
| analysis/clickfix_gist_cache.json | |
| retention-days: 7 | |
| # Job 2: Advanced analysis and blog generation | |
| analyze_and_blog: | |
| name: Analyze & Generate Blog | |
| needs: fetch_and_scan | |
| if: needs.fetch_and_scan.outputs.has_results == 'true' | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| lfs: true | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| cache: 'pip' | |
| - name: Install Python dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Create directories | |
| run: | | |
| mkdir -p nightly_reports | |
| mkdir -p analysis | |
| - name: Download scan results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: scan-results-${{ needs.fetch_and_scan.outputs.scan_date }} | |
| path: . | |
| - name: Run advanced analysis | |
| timeout-minutes: 15 | |
| run: | | |
| TODAY="${{ needs.fetch_and_scan.outputs.scan_date }}" | |
| echo "π Running advanced threat analysis for $TODAY" | |
| # Generate blog content from scan results | |
| python bin/analyze.py -d "$TODAY" -v | |
| echo "β Advanced analysis complete" | |
| - name: Upload analysis results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: analysis-results-${{ needs.fetch_and_scan.outputs.scan_date }} | |
| path: | | |
| analysis/blog_data_${{ needs.fetch_and_scan.outputs.scan_date }}.json | |
| analysis/report_${{ needs.fetch_and_scan.outputs.scan_date }}.md | |
| retention-days: 7 | |
| # Job 3: Build static site and publish | |
| build_and_publish: | |
| name: Build Site & Publish | |
| needs: [fetch_and_scan, analyze_and_blog] | |
| if: needs.fetch_and_scan.outputs.has_results == 'true' | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| lfs: true | |
| - name: Set up Git LFS | |
| run: git lfs install | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| cache: 'pip' | |
| - name: Install Python dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Create directories | |
| run: | | |
| mkdir -p nightly_reports | |
| mkdir -p analysis | |
| mkdir -p docs/assets/images | |
| mkdir -p public | |
| - name: Copy logo files | |
| run: | | |
| if [ -f "assets/logo.png" ]; then | |
| cp assets/logo.png docs/assets/images/logo.png | |
| echo "β Copied logo" | |
| elif [ -f "assets/images/logo.png" ]; then | |
| cp assets/images/logo.png docs/assets/images/logo.png | |
| echo "β Copied logo" | |
| else | |
| echo "β οΈ Logo not found" | |
| fi | |
| - name: Download scan results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: scan-results-${{ needs.fetch_and_scan.outputs.scan_date }} | |
| path: . | |
| - name: Download analysis results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: analysis-results-${{ needs.fetch_and_scan.outputs.scan_date }} | |
| path: analysis/ | |
| - name: Build static website | |
| timeout-minutes: 5 | |
| run: | | |
| echo "ποΈ Building static website" | |
| echo "Note: Using simplified report templates (full data in JSON)" | |
| echo "Building: index, reports, analysis, techniques, blog posts" | |
| # Generate complete website with Jinja2 templates | |
| # Use unbuffered output so we see progress in real-time | |
| time python -u bin/build.py | |
| echo "β Website build complete" | |
| - name: Verify sync to docs | |
| run: | | |
| # build.py already synced public/ to docs/ using rsync | |
| if [ -d "docs" ] && [ -f "docs/index.html" ]; then | |
| echo "β Docs directory verified" | |
| echo "Reports: $(ls docs/reports 2>/dev/null | wc -l || echo 0)" | |
| echo "Analysis: $(ls docs/analysis 2>/dev/null | wc -l || echo 0)" | |
| else | |
| echo "β Docs directory not found or incomplete" | |
| exit 1 | |
| fi | |
| - name: Verify generated files | |
| run: | | |
| echo "π Verifying generated files" | |
| # Check key files | |
| [ -f "docs/index.html" ] && echo "β index.html" || echo "β Missing index.html" | |
| [ -f "docs/analysis.html" ] && echo "β analysis.html" || echo "β Missing analysis.html" | |
| [ -f "docs/techniques.html" ] && echo "β techniques.html" || echo "β Missing techniques.html" | |
| [ -f "latest_consolidated_report.json" ] && echo "β latest_consolidated_report.json" || echo "β Missing report" | |
| echo "" | |
| echo "π Sample generated files:" | |
| find docs -type f -name "*.html" | head -5 | |
| - name: Commit and push results | |
| run: | | |
| git config --global user.name 'github-actions[bot]' | |
| git config --global user.email 'github-actions[bot]@users.noreply.github.com' | |
| TODAY="${{ needs.fetch_and_scan.outputs.scan_date }}" | |
| # Ensure LFS is tracking patterns from .gitattributes | |
| git lfs track "nightly_reports/*.json" "nightly_reports/*.html" \ | |
| "nightly_reports/threat_intel/*.json" "nightly_reports/threat_intel/*.csv" \ | |
| "docs/nightly_reports/*.json" "analysis/*.json" "analysis/*.md" \ | |
| "latest_consolidated_report.json" "latest_threat_intel.json" | |
| git add .gitattributes | |
| # Add today's generated files only (avoid re-staging old large files) | |
| git add latest_consolidated_report.json | |
| git add latest_threat_intel.json || true | |
| git add "nightly_reports/clickgrab_report_${TODAY}"* || true | |
| git add "nightly_reports/clickgrab_report_$(date +%Y%m%d)"* || true | |
| git add nightly_reports/threat_intel/ || true | |
| git add "analysis/blog_data_${TODAY}.json" || true | |
| git add "analysis/report_${TODAY}.md" || true | |
| git add docs/ | |
| git add public/ || true | |
| # Commit if changes exist | |
| if ! git diff --staged --quiet; then | |
| echo "π Committing changes" | |
| git commit -m "chore: nightly analysis results ($TODAY)" \ | |
| -m "- Scanned new ClickFix domains from gist" \ | |
| -m "- Advanced threat intelligence analysis" \ | |
| -m "- Updated static website" \ | |
| -m "- Auto-generated blog posts" | |
| git push | |
| echo "β Successfully pushed changes" | |
| else | |
| echo "βΉοΈ No changes to commit" | |
| fi | |
| # Summary job (always runs to report status) | |
| summary: | |
| name: Workflow Summary | |
| needs: [fetch_and_scan, analyze_and_blog, build_and_publish] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Report status | |
| run: | | |
| echo "=== Nightly Analysis Summary ===" | |
| echo "Date: ${{ needs.fetch_and_scan.outputs.scan_date }}" | |
| echo "Scan job: ${{ needs.fetch_and_scan.result }}" | |
| echo "Analysis job: ${{ needs.analyze_and_blog.result }}" | |
| echo "Build job: ${{ needs.build_and_publish.result }}" | |
| echo "" | |
| if [ "${{ needs.fetch_and_scan.outputs.has_results }}" == "true" ]; then | |
| echo "β New domains were scanned and processed" | |
| else | |
| echo "βΉοΈ No new domains found (cache hit - all domains already analyzed)" | |
| fi | |
| echo "===============================" |