-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJustfile
More file actions
453 lines (385 loc) · 15.8 KB
/
Justfile
File metadata and controls
453 lines (385 loc) · 15.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# justfile for dagnostics project
# Install just: https://github.com/casey/just
# Default recipe to display help
default:
@just --list
# Setup development environment
setup:
@echo "Setting up development environment..."
uv sync --extra dev
uv run pre-commit install
@echo "Development environment ready!"
# Clean build artifacts and cache files
clean:
@echo "Cleaning build artifacts..."
find . -name "build" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name "dist" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name "*.egg-info" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name "*.pyc" -type f -delete 2>/dev/null || true
find . -name ".mypy_cache" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name ".pytest_cache" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name "htmlcov" -type d -exec rm -rf {} + 2>/dev/null || true
find . -name ".coverage" -type f -delete 2>/dev/null || true
@echo "Cleanup complete!"
# Format code with black and isort
format:
@echo "Formatting code..."
uv run black .
uv run isort .
@echo "Code formatted!"
# Run all linters
lint:
@echo "Running linters..."
uv run flake8 .
uv run mypy .
uv run pre-commit run --all-files
@echo "Linting complete!"
# Run tests
test:
@echo "Running tests..."
uv run pytest
@echo "Tests complete!"
# Run tests with coverage
test-cov:
@echo "Running tests with coverage..."
uv run pytest --cov=dagnostics --cov-report=html --cov-report=term
@echo "Coverage report generated!"
# Build the package
build:
@echo "Building package..."
uv build
@echo "Package built!"
# Publish to TestPyPI
publish-test:
@echo "Publishing to TestPyPI..."
uv publish --repository testpypi
@echo "Published to TestPyPI!"
# Publish to PyPI
publish:
@echo "Publishing to PyPI..."
uv publish
@echo "Published to PyPI!"
# Sync dependencies
sync:
@echo "Syncing dependencies..."
uv sync --extra dev
@echo "Dependencies synced!"
# Run the main application
run:
@echo "Starting dagnostics..."
uv run start
# Run the CLI application
cli:
@echo "Starting dagnostics CLI..."
uv run dagnostics
# Analyze a specific task failure
# Usage: just analyze-task <dag_id> <task_id> <run_id> <try_number>
analyze-task dag_id task_id run_id try_number:
@echo "Analyzing task failure..."
uv run dagnostics analyze {{dag_id}} {{task_id}} {{run_id}} {{try_number}}
@echo "Analysis complete!"
# Analyze a specific task failure with verbose output
# Usage: just analyze-task-verbose <dag_id> <task_id> <run_id> <try_number>
analyze-task-verbose dag_id task_id run_id try_number:
@echo "Analyzing task failure with verbose output..."
uv run dagnostics analyze {{dag_id}} {{task_id}} {{run_id}} {{try_number}} --verbose
@echo "Analysis complete!"
# Analyze a specific task failure with custom LLM provider
# Usage: just analyze-task-llm <dag_id> <task_id> <run_id> <try_number> <llm_provider>
analyze-task-llm dag_id task_id run_id try_number llm_provider:
@echo "Analyzing task failure with {{llm_provider}}..."
uv run dagnostics analyze {{dag_id}} {{task_id}} {{run_id}} {{try_number}} --llm {{llm_provider}}
@echo "Analysis complete!"
# Notify about recent task failures (dry run)
# Usage: just notify-failures-dry-run [since_minutes]
notify-failures-dry-run since_minutes="60":
@echo "Checking for failed tasks in last {{since_minutes}} minutes (dry run)..."
uv run dagnostics notify-failures --since-minutes {{since_minutes}} --dry-run
@echo "Dry run complete!"
# Notify about recent task failures (send actual SMS)
# Usage: just notify-failures [since_minutes]
notify-failures since_minutes="60":
@echo "Checking for failed tasks in last {{since_minutes}} minutes..."
uv run dagnostics notify-failures --since-minutes {{since_minutes}}
@echo "Notifications sent!"
# Notify about recent task failures with real-time baseline analysis
# Usage: just notify-failures-realtime [since_minutes]
notify-failures-realtime since_minutes="60":
@echo "Checking for failed tasks with real-time baseline analysis (from config)..."
uv run dagnostics notify-failures --since-minutes {{since_minutes}}
@echo "Notifications sent!"
# Notify about recent task failures with custom LLM provider
# Usage: just notify-failures-llm <since_minutes> <llm_provider>
notify-failures-llm since_minutes llm_provider:
@echo "Checking for failed tasks with {{llm_provider}}..."
uv run dagnostics notify-failures --since-minutes {{since_minutes}} --llm {{llm_provider}}
@echo "Notifications sent!"
# Start continuous monitoring
# Usage: just monitor [interval_minutes]
monitor interval_minutes="5":
@echo "Starting continuous monitoring ({{interval_minutes}}m intervals)..."
uv run dagnostics start --interval {{interval_minutes}}
@echo "Monitoring started!"
# Start continuous monitoring as daemon
# Usage: just monitor-daemon [interval_minutes]
monitor-daemon interval_minutes="5":
@echo "Starting continuous monitoring as daemon..."
uv run dagnostics start --interval {{interval_minutes}} --daemon
@echo "Daemon monitoring started!"
# Generate daily report
# Usage: just report-daily [format]
report-daily format="html":
@echo "Generating daily report in {{format}} format..."
uv run dagnostics report --daily --format {{format}}
@echo "Daily report generated!"
# Generate summary report
# Usage: just report-summary [format]
report-summary format="html":
@echo "Generating summary report in {{format}} format..."
uv run dagnostics report --format {{format}}
@echo "Summary report generated!"
# Complete development workflow
dev: setup format lint test
@echo "Development workflow complete!"
# CI workflow
ci: format lint test
@echo "CI workflow complete!"
# Quick format and lint check
check:
@echo "Running quick checks..."
uv run black --check .
uv run isort --check-only .
uv run flake8 .
@echo "Quick checks complete!"
# Watch files and run tests on change (requires entr)
watch:
find . -name "*.py" | entr -c just test
# =====================================
# Fine-tuning and Training Commands
# =====================================
# Install fine-tuning dependencies
setup-training:
@echo "Installing fine-tuning dependencies..."
uv sync --extra finetuning
@echo "Training environment ready!"
# Install all dependencies including training
setup-full:
@echo "Installing all dependencies..."
uv sync --extra all
@echo "Full environment ready!"
# Check training environment status
training-status:
@echo "Checking training environment..."
uv run dagnostics training status
# Prepare fine-tuning datasets from human-reviewed data
# Usage: just prepare-training-data [dataset_path]
prepare-training-data dataset_path="data/training_dataset_2025-08-17T11-15-10.json":
@echo "Preparing training datasets from {{dataset_path}}..."
uv run dagnostics training prepare-data {{dataset_path}}
@echo "Training datasets prepared!"
# Alternative: run the Python script directly
prepare-data-script dataset_path="data/training_dataset_2025-08-17T11-15-10.json":
@echo "Running data preparation script..."
uv run python scripts/prepare_training_data.py {{dataset_path}}
@echo "Data preparation complete!"
# Fine-tune local model with default settings
train-local-default:
@echo "Starting local fine-tuning with default settings..."
uv run dagnostics training train-local --epochs 3 --batch-size 2
@echo "Local fine-tuning complete!"
# Fine-tune local model with custom settings
# Usage: just train-local <model_name> <epochs> <batch_size>
train-local model_name="microsoft/DialoGPT-small" epochs="3" batch_size="2":
@echo "Fine-tuning {{model_name}} for {{epochs}} epochs..."
uv run dagnostics training train-local \
--model-name "{{model_name}}" \
--epochs {{epochs}} \
--batch-size {{batch_size}} \
--use-quantization true \
--export-for-ollama true
@echo "Local fine-tuning complete!"
# Fine-tune with CPU fallback (for testing/no GPU)
# Usage: just train-cpu [model_name] [epochs] [batch_size]
train-cpu model_name="microsoft/DialoGPT-small" epochs="1" batch_size="1":
@echo "Fine-tuning {{model_name}} on CPU ({{epochs}} epochs, batch size {{batch_size}})..."
uv run dagnostics training train-local \
--model-name "{{model_name}}" \
--epochs {{epochs}} \
--batch-size {{batch_size}} \
--force-cpu \
--use-quantization false \
--export-for-ollama false
@echo "CPU fine-tuning complete!"
# Quick CPU training test
train-cpu-test:
@echo "Running quick CPU training test..."
uv run python test_cpu_training.py
@echo "CPU training test complete!"
# Fine-tune with OpenAI API
# Usage: just train-openai [model] [suffix]
train-openai model="gpt-3.5-turbo" suffix="dagnostics-error-extractor":
@echo "Starting OpenAI fine-tuning with {{model}}..."
uv run dagnostics training train-openai \
--model "{{model}}" \
--suffix "{{suffix}}" \
--wait true
@echo "OpenAI fine-tuning complete!"
# Prepare data for Anthropic fine-tuning
# Usage: just train-anthropic [model]
train-anthropic model="claude-3-haiku-20240307":
@echo "Preparing Anthropic data for {{model}}..."
uv run dagnostics training train-anthropic --model "{{model}}"
@echo "Anthropic data preparation complete!"
# Submit remote training job
# Usage: just train-remote [server_url] [model_name] [epochs]
train-remote server_url="http://localhost:8001" model_name="microsoft/DialoGPT-small" epochs="3":
@echo "Submitting remote training job to {{server_url}}..."
uv run dagnostics training remote-train \
--server-url "{{server_url}}" \
--model-name "{{model_name}}" \
--epochs {{epochs}} \
--wait true
@echo "Remote training job submitted!"
# Check remote training job status
# Usage: just remote-status <job_id> [server_url]
remote-status job_id server_url="http://localhost:8001":
@echo "Checking status of job {{job_id}}..."
uv run dagnostics training remote-status {{job_id}} --server-url "{{server_url}}"
# Download remote training result
# Usage: just remote-download <job_id> [output_dir] [server_url]
remote-download job_id output_dir="models/fine_tuned" server_url="http://localhost:8001":
@echo "Downloading model from job {{job_id}}..."
uv run dagnostics training remote-download {{job_id}} \
--output-dir "{{output_dir}}" \
--server-url "{{server_url}}"
@echo "Model downloaded!"
# Evaluate a fine-tuned model
# Usage: just evaluate-model <model_path> [test_dataset] [model_type]
evaluate-model model_path test_dataset="data/fine_tuning/validation_dataset.jsonl" model_type="local":
@echo "Evaluating model: {{model_path}}..."
uv run dagnostics training evaluate "{{model_path}}" \
--test-dataset "{{test_dataset}}" \
--model-type "{{model_type}}"
@echo "Model evaluation complete!"
# Deploy model to Ollama
# Usage: just deploy-ollama <model_path> [model_name]
deploy-ollama model_path model_name="dagnostics-error-extractor":
@echo "Deploying {{model_path}} to Ollama as {{model_name}}..."
uv run dagnostics training deploy-ollama "{{model_path}}" \
--model-name "{{model_name}}" \
--auto-build true
@echo "Model deployed to Ollama!"
# Show feedback statistics
feedback-stats:
@echo "Showing feedback statistics..."
uv run dagnostics training feedback-stats
# Export feedback for training
# Usage: just export-feedback [min_rating]
export-feedback min_rating="3":
@echo "Exporting feedback with minimum rating {{min_rating}}..."
uv run dagnostics training export-feedback --min-rating {{min_rating}}
@echo "Feedback exported!"
# Complete fine-tuning workflow (prepare → train → evaluate)
# Usage: just train-workflow [model_name] [epochs]
train-workflow model_name="microsoft/DialoGPT-small" epochs="3":
@echo "Running complete fine-tuning workflow..."
@just prepare-training-data
@just train-local "{{model_name}}" "{{epochs}}" "2"
@echo "Fine-tuning workflow complete!"
# CPU-only training workflow (for testing/no GPU)
# Usage: just train-workflow-cpu [model_name] [epochs]
train-workflow-cpu model_name="microsoft/DialoGPT-small" epochs="1":
@echo "Running CPU-only fine-tuning workflow..."
@just prepare-training-data
@just train-cpu "{{model_name}}" "{{epochs}}" "1"
@echo "CPU fine-tuning workflow complete!"
# Quick training setup and status check
train-setup:
@echo "Setting up training environment..."
@just setup-training
@just training-status
@echo "Training setup complete!"
# Setup remote training infrastructure
# Usage: just setup-remote [mode]
setup-remote mode="":
@echo "Setting up remote training infrastructure..."
@if [ "{{mode}}" = "" ]; then \
uv run dagnostics training setup-remote; \
else \
uv run dagnostics training setup-remote --mode {{mode}}; \
fi
@echo "Remote training setup complete!"
# Start training server (for remote training)
# Usage: just start-training-server [host] [port]
start-training-server host="0.0.0.0" port="8001":
@echo "Starting training server on {{host}}:{{port}}..."
uv run dagnostics training start-server --host {{host}} --port {{port}}
# Setup and start local training server
train-server-local:
@echo "Setting up and starting local training server..."
@just setup-training
@just start-training-server localhost 8001
# Start CPU-only training server
start-training-server-cpu host="0.0.0.0" port="8001":
@echo "Starting CPU-only training server on {{host}}:{{port}}..."
DAGNOSTICS_FORCE_CPU=true uv run dagnostics training start-server --host {{host}} --port {{port}}
# Setup and start CPU training server
train-server-cpu:
@echo "Setting up and starting CPU training server..."
@just setup-training
@just start-training-server-cpu localhost 8001
# =====================================
# Web Dashboard Commands
# =====================================
# Start web dashboard with default settings
web:
@echo "Starting web dashboard..."
uv run dagnostics web
@echo "Web dashboard started!"
# Start web dashboard with custom host and port
# Usage: just web-custom <host> <port>
web-custom host="0.0.0.0" port="8000":
@echo "Starting web dashboard on {{host}}:{{port}}..."
uv run dagnostics web --host {{host}} --port {{port}}
@echo "Web dashboard started!"
# Start web dashboard in development mode
web-dev:
@echo "Starting web dashboard in development mode..."
uv run dagnostics web --reload --log-level debug
@echo "Development web dashboard started!"
# =====================================
# Daemon and Monitoring Commands
# =====================================
# Start daemon service
daemon-start:
@echo "Starting daemon service..."
uv run dagnostics daemon start
@echo "Daemon started!"
# Stop daemon service
daemon-stop:
@echo "Stopping daemon service..."
uv run dagnostics daemon stop
@echo "Daemon stopped!"
# Check daemon status
daemon-status:
@echo "Checking daemon status..."
uv run dagnostics daemon status
# Restart daemon service
daemon-restart:
@echo "Restarting daemon service..."
@just daemon-stop
@just daemon-start
@echo "Daemon restarted!"
# =====================================
# Development Workflows
# =====================================
# Complete development setup with training
dev-full: setup-full format lint test
@echo "Full development workflow complete!"
# Training development workflow
dev-training: setup-training train-setup
@echo "Training development workflow complete!"
# Production deployment workflow
deploy-prod: clean format lint test build
@echo "Production deployment workflow complete!"