Skip to content

Nightly ClickGrab Analysis #444

Nightly ClickGrab Analysis

Nightly ClickGrab Analysis #444

Workflow file for this run

name: Nightly ClickGrab Analysis
on:
schedule:
# Runs "At 01:00 UTC every day"
- cron: '0 1 * * *'
workflow_dispatch:
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
permissions:
contents: write
jobs:
# Job 1: Fetch and scan URLs from feeds
fetch_and_scan:
name: Fetch & Scan URLs
runs-on: ubuntu-latest
timeout-minutes: 45
outputs:
scan_date: ${{ steps.set_date.outputs.date }}
has_results: ${{ steps.check_results.outputs.has_results }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Need full history for cache file
lfs: true
- name: Set date output
id: set_date
run: echo "date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Create required directories
run: |
mkdir -p nightly_reports
mkdir -p analysis
echo "Created required directories"
- name: Run ClickGrab Scan (URLs only, new domains only)
id: run_scan
timeout-minutes: 40
run: |
TODAY=$(date +%Y-%m-%d)
echo "πŸ” Scanning URLs for $TODAY"
# Set encoding for proper UTF-8 handling
export PYTHONIOENCODING=utf-8
# Run scan - the caching mechanism in collect_clickfix_gist_urls() ensures
# we only scan NEW domains that weren't in the last run's cache
python clickgrab.py \
--download \
--clickfix-gist \
--output-dir nightly_reports \
--format json \
--limit 100 \
--export-intel \
--debug
echo "βœ… ClickGrab scan complete"
- name: Check if we have results
id: check_results
run: |
if [ -n "$(find nightly_reports -name '*.json' -type f 2>/dev/null)" ]; then
echo "has_results=true" >> $GITHUB_OUTPUT
echo "βœ… Found scan results"
else
echo "has_results=false" >> $GITHUB_OUTPUT
echo "⚠️ No new domains to scan (cache hit)"
fi
- name: Standardize report filenames
if: steps.check_results.outputs.has_results == 'true'
run: |
TODAY=$(date +%Y-%m-%d)
# Find the latest JSON report
LATEST_JSON=$(find nightly_reports -name "*.json" -type f -printf "%T@ %p\n" | sort -n | tail -1 | cut -d' ' -f2-)
if [ -n "$LATEST_JSON" ]; then
echo "πŸ“„ Found report: $LATEST_JSON"
# Create standardized filename
cp "$LATEST_JSON" "nightly_reports/clickgrab_report_${TODAY}.json"
cp "$LATEST_JSON" "latest_consolidated_report.json"
# Show summary
echo "=== Report Summary ==="
jq -r '.summary // {} | to_entries | .[] | "\(.key): \(.value)"' "$LATEST_JSON" 2>/dev/null || echo "No summary"
echo "Total sites: $(jq -r '.sites | length' "$LATEST_JSON" 2>/dev/null || echo '0')"
echo "===================="
fi
- name: Upload scan results
if: steps.check_results.outputs.has_results == 'true'
uses: actions/upload-artifact@v4
with:
name: scan-results-${{ steps.set_date.outputs.date }}
path: |
nightly_reports/*.json
nightly_reports/threat_intel/*.json
nightly_reports/threat_intel/*.csv
latest_consolidated_report.json
latest_threat_intel.json
analysis/clickfix_gist_cache.json
retention-days: 7
# Job 2: Advanced analysis and blog generation
analyze_and_blog:
name: Analyze & Generate Blog
needs: fetch_and_scan
if: needs.fetch_and_scan.outputs.has_results == 'true'
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Create directories
run: |
mkdir -p nightly_reports
mkdir -p analysis
- name: Download scan results
uses: actions/download-artifact@v4
with:
name: scan-results-${{ needs.fetch_and_scan.outputs.scan_date }}
path: .
- name: Run advanced analysis
timeout-minutes: 15
run: |
TODAY="${{ needs.fetch_and_scan.outputs.scan_date }}"
echo "πŸ“Š Running advanced threat analysis for $TODAY"
# Generate blog content from scan results
python bin/analyze.py -d "$TODAY" -v
echo "βœ… Advanced analysis complete"
- name: Upload analysis results
uses: actions/upload-artifact@v4
with:
name: analysis-results-${{ needs.fetch_and_scan.outputs.scan_date }}
path: |
analysis/blog_data_${{ needs.fetch_and_scan.outputs.scan_date }}.json
analysis/report_${{ needs.fetch_and_scan.outputs.scan_date }}.md
retention-days: 7
# Job 3: Build static site and publish
build_and_publish:
name: Build Site & Publish
needs: [fetch_and_scan, analyze_and_blog]
if: needs.fetch_and_scan.outputs.has_results == 'true'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
lfs: true
- name: Set up Git LFS
run: git lfs install
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Create directories
run: |
mkdir -p nightly_reports
mkdir -p analysis
mkdir -p docs/assets/images
mkdir -p public
- name: Copy logo files
run: |
if [ -f "assets/logo.png" ]; then
cp assets/logo.png docs/assets/images/logo.png
echo "βœ… Copied logo"
elif [ -f "assets/images/logo.png" ]; then
cp assets/images/logo.png docs/assets/images/logo.png
echo "βœ… Copied logo"
else
echo "⚠️ Logo not found"
fi
- name: Download scan results
uses: actions/download-artifact@v4
with:
name: scan-results-${{ needs.fetch_and_scan.outputs.scan_date }}
path: .
- name: Download analysis results
uses: actions/download-artifact@v4
with:
name: analysis-results-${{ needs.fetch_and_scan.outputs.scan_date }}
path: analysis/
- name: Build static website
timeout-minutes: 5
run: |
echo "πŸ—οΈ Building static website"
echo "Note: Using simplified report templates (full data in JSON)"
echo "Building: index, reports, analysis, techniques, blog posts"
# Generate complete website with Jinja2 templates
# Use unbuffered output so we see progress in real-time
time python -u bin/build.py
echo "βœ… Website build complete"
- name: Verify sync to docs
run: |
# build.py already synced public/ to docs/ using rsync
if [ -d "docs" ] && [ -f "docs/index.html" ]; then
echo "βœ… Docs directory verified"
echo "Reports: $(ls docs/reports 2>/dev/null | wc -l || echo 0)"
echo "Analysis: $(ls docs/analysis 2>/dev/null | wc -l || echo 0)"
else
echo "❌ Docs directory not found or incomplete"
exit 1
fi
- name: Verify generated files
run: |
echo "πŸ” Verifying generated files"
# Check key files
[ -f "docs/index.html" ] && echo "βœ… index.html" || echo "❌ Missing index.html"
[ -f "docs/analysis.html" ] && echo "βœ… analysis.html" || echo "❌ Missing analysis.html"
[ -f "docs/techniques.html" ] && echo "βœ… techniques.html" || echo "❌ Missing techniques.html"
[ -f "latest_consolidated_report.json" ] && echo "βœ… latest_consolidated_report.json" || echo "❌ Missing report"
echo ""
echo "πŸ“ Sample generated files:"
find docs -type f -name "*.html" | head -5
- name: Commit and push results
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
TODAY="${{ needs.fetch_and_scan.outputs.scan_date }}"
# Ensure LFS is tracking patterns from .gitattributes
git lfs track "nightly_reports/*.json" "nightly_reports/*.html" \
"nightly_reports/threat_intel/*.json" "nightly_reports/threat_intel/*.csv" \
"docs/nightly_reports/*.json" "analysis/*.json" "analysis/*.md" \
"latest_consolidated_report.json" "latest_threat_intel.json"
git add .gitattributes
# Add today's generated files only (avoid re-staging old large files)
git add latest_consolidated_report.json
git add latest_threat_intel.json || true
git add "nightly_reports/clickgrab_report_${TODAY}"* || true
git add "nightly_reports/clickgrab_report_$(date +%Y%m%d)"* || true
git add nightly_reports/threat_intel/ || true
git add "analysis/blog_data_${TODAY}.json" || true
git add "analysis/report_${TODAY}.md" || true
git add docs/
git add public/ || true
# Commit if changes exist
if ! git diff --staged --quiet; then
echo "πŸ“ Committing changes"
git commit -m "chore: nightly analysis results ($TODAY)" \
-m "- Scanned new ClickFix domains from gist" \
-m "- Advanced threat intelligence analysis" \
-m "- Updated static website" \
-m "- Auto-generated blog posts"
git push
echo "βœ… Successfully pushed changes"
else
echo "ℹ️ No changes to commit"
fi
# Summary job (always runs to report status)
summary:
name: Workflow Summary
needs: [fetch_and_scan, analyze_and_blog, build_and_publish]
if: always()
runs-on: ubuntu-latest
steps:
- name: Report status
run: |
echo "=== Nightly Analysis Summary ==="
echo "Date: ${{ needs.fetch_and_scan.outputs.scan_date }}"
echo "Scan job: ${{ needs.fetch_and_scan.result }}"
echo "Analysis job: ${{ needs.analyze_and_blog.result }}"
echo "Build job: ${{ needs.build_and_publish.result }}"
echo ""
if [ "${{ needs.fetch_and_scan.outputs.has_results }}" == "true" ]; then
echo "βœ… New domains were scanned and processed"
else
echo "ℹ️ No new domains found (cache hit - all domains already analyzed)"
fi
echo "==============================="