Skip to content

Commit 643c465

Browse files
authored
Merge pull request #8 from m4r1k/feature/v5-hardening
refactor: v5.0 hardening — rename UPSGroupMonitor, config validation, E2E expansion, docs
2 parents 476b933 + 432cb42 commit 643c465

24 files changed

Lines changed: 544 additions & 73 deletions

.github/workflows/e2e.yml

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,200 @@ jobs:
433433
exit 1
434434
fi
435435
436+
# ========================================
437+
# TEST 14: Multi-UPS concurrent failure
438+
# ========================================
439+
- name: "Test 14: Multi-UPS concurrent failure (both UPSes fail)"
440+
run: |
441+
echo "=== Test 14: Concurrent UPS Failure ==="
442+
443+
# Clean up
444+
rm -f /tmp/eneru-e2e-shutdown-flag*
445+
446+
# Both UPSes go low-battery simultaneously
447+
cp ${{ env.E2E_DIR }}/scenarios/low-battery.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS1.dev
448+
cp ${{ env.E2E_DIR }}/scenarios/low-battery.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS2.dev
449+
sleep 3
450+
451+
# Run multi-UPS Eneru -- both groups should trigger shutdown
452+
eneru run --config ${{ env.E2E_DIR }}/config-e2e-multi-ups.yaml --exit-after-shutdown 2>&1 | tee /tmp/test14.log || true
453+
454+
# Verify shutdown was triggered
455+
if ! grep -q "SHUTDOWN SEQUENCE\|SHUTDOWN INITIATED\|Triggering immediate shutdown" /tmp/test14.log; then
456+
echo "FAIL: No shutdown triggered during concurrent failure"
457+
cat /tmp/test14.log
458+
exit 1
459+
fi
460+
461+
# Verify both UPSes are referenced in the log
462+
if grep -q "E2E UPS1\|UPS1@localhost" /tmp/test14.log; then
463+
echo "PASS: UPS1 shutdown logged"
464+
else
465+
echo "Note: UPS1 identification not verified in logs"
466+
fi
467+
468+
if grep -q "E2E UPS2\|UPS2@localhost" /tmp/test14.log; then
469+
echo "PASS: UPS2 shutdown logged"
470+
else
471+
echo "Note: UPS2 identification not verified in logs"
472+
fi
473+
474+
echo "PASS: Concurrent failure handled correctly"
475+
476+
# ========================================
477+
# TEST 15: Non-local failure (UPS2 fails, UPS1 unaffected)
478+
# ========================================
479+
- name: "Test 15: Non-local failure (UPS2 fails, UPS1 unaffected)"
480+
run: |
481+
echo "=== Test 15: Non-Local UPS Failure ==="
482+
483+
# Clean up
484+
rm -f /tmp/eneru-e2e-shutdown-flag*
485+
486+
# UPS1 stays online, UPS2 goes low-battery
487+
cp ${{ env.E2E_DIR }}/scenarios/online-charging.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS1.dev
488+
cp ${{ env.E2E_DIR }}/scenarios/low-battery.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS2.dev
489+
sleep 3
490+
491+
# Verify UPS states
492+
UPS1_STATUS=$(upsc UPS1@localhost:3493 ups.status 2>/dev/null)
493+
UPS2_STATUS=$(upsc UPS2@localhost:3493 ups.status 2>/dev/null)
494+
echo "UPS1 status: $UPS1_STATUS (should be OL)"
495+
echo "UPS2 status: $UPS2_STATUS (should be OB)"
496+
497+
# Run multi-UPS Eneru -- UPS2 (non-local) should trigger, UPS1 unaffected
498+
eneru run --config ${{ env.E2E_DIR }}/config-e2e-multi-ups.yaml --exit-after-shutdown 2>&1 | tee /tmp/test15.log || true
499+
500+
# Verify UPS2 triggered shutdown
501+
if ! grep -q "SHUTDOWN SEQUENCE\|SHUTDOWN INITIATED\|Triggering immediate shutdown" /tmp/test15.log; then
502+
echo "FAIL: No shutdown triggered for UPS2"
503+
cat /tmp/test15.log
504+
exit 1
505+
fi
506+
507+
# Verify UPS2 is identified in shutdown context
508+
if grep -q "E2E UPS2\|UPS2@localhost" /tmp/test15.log; then
509+
echo "PASS: UPS2 correctly triggered shutdown"
510+
else
511+
echo "Note: UPS2 identification not verified in logs"
512+
fi
513+
514+
echo "PASS: Non-local failure correctly handled"
515+
516+
# ========================================
517+
# TEST 16: Local drain (drain_on_local_shutdown=true)
518+
# ========================================
519+
- name: "Test 16: Local drain (drain_on_local_shutdown=true)"
520+
run: |
521+
echo "=== Test 16: Local Drain ==="
522+
523+
# Clean up
524+
rm -f /tmp/eneru-e2e-shutdown-flag*
525+
526+
# UPS1 (is_local) goes low-battery, UPS2 stays online
527+
cp ${{ env.E2E_DIR }}/scenarios/low-battery.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS1.dev
528+
cp ${{ env.E2E_DIR }}/scenarios/online-charging.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS2.dev
529+
sleep 3
530+
531+
# Run with drain config
532+
eneru run --config ${{ env.E2E_DIR }}/config-e2e-multi-ups-drain.yaml --exit-after-shutdown 2>&1 | tee /tmp/test16.log || true
533+
534+
# Verify shutdown was triggered
535+
if ! grep -q "SHUTDOWN SEQUENCE\|SHUTDOWN INITIATED\|Triggering immediate shutdown" /tmp/test16.log; then
536+
echo "FAIL: No shutdown triggered"
537+
cat /tmp/test16.log
538+
exit 1
539+
fi
540+
541+
# Verify drain message appears
542+
if grep -qi "drain" /tmp/test16.log; then
543+
echo "PASS: Drain message logged"
544+
else
545+
echo "FAIL: Drain message not found in logs"
546+
cat /tmp/test16.log
547+
exit 1
548+
fi
549+
550+
echo "PASS: Local drain correctly executed"
551+
552+
# ========================================
553+
# TEST 17: Local no-drain (drain_on_local_shutdown=false)
554+
# ========================================
555+
- name: "Test 17: Local no-drain (drain_on_local_shutdown=false)"
556+
run: |
557+
echo "=== Test 17: Local No-Drain ==="
558+
559+
# Clean up
560+
rm -f /tmp/eneru-e2e-shutdown-flag*
561+
562+
# UPS1 (is_local) goes low-battery, UPS2 stays online
563+
cp ${{ env.E2E_DIR }}/scenarios/low-battery.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS1.dev
564+
cp ${{ env.E2E_DIR }}/scenarios/online-charging.dev ${{ env.E2E_DIR }}/scenarios/apply-UPS2.dev
565+
sleep 3
566+
567+
# Run with default multi-UPS config (drain=false)
568+
eneru run --config ${{ env.E2E_DIR }}/config-e2e-multi-ups.yaml --exit-after-shutdown 2>&1 | tee /tmp/test17.log || true
569+
570+
# Verify UPS1 shutdown triggered
571+
if ! grep -q "SHUTDOWN SEQUENCE\|SHUTDOWN INITIATED\|Triggering immediate shutdown" /tmp/test17.log; then
572+
echo "FAIL: No shutdown triggered for UPS1"
573+
cat /tmp/test17.log
574+
exit 1
575+
fi
576+
577+
# Verify NO drain message
578+
if grep -qi "drain" /tmp/test17.log; then
579+
echo "FAIL: Drain should NOT occur with drain_on_local_shutdown=false"
580+
exit 1
581+
fi
582+
583+
echo "PASS: No-drain correctly skipped drain step"
584+
585+
# ========================================
586+
# TEST 18: Recovery (OB then power restored)
587+
# ========================================
588+
- name: "Test 18: Recovery - power restored before shutdown"
589+
run: |
590+
echo "=== Test 18: Power Recovery ==="
591+
592+
# Clean up
593+
rm -f /tmp/eneru-e2e-shutdown-flag*
594+
595+
# Start with on-battery (above thresholds -- no shutdown trigger)
596+
cp ${{ env.E2E_DIR }}/scenarios/on-battery.dev ${{ env.E2E_DIR }}/scenarios/apply.dev
597+
sleep 3
598+
599+
# Run Eneru in background
600+
timeout 12 eneru run --config ${{ env.E2E_DIR }}/config-e2e-dry-run.yaml 2>&1 | tee /tmp/test18.log &
601+
ENERU_PID=$!
602+
603+
# Wait for Eneru to detect on-battery state
604+
sleep 4
605+
606+
# Restore power
607+
cp ${{ env.E2E_DIR }}/scenarios/online-charging.dev ${{ env.E2E_DIR }}/scenarios/apply.dev
608+
sleep 3
609+
610+
# Wait for Eneru to detect recovery
611+
wait $ENERU_PID 2>/dev/null || true
612+
613+
# Verify POWER_RESTORED was logged
614+
if grep -qi "POWER_RESTORED\|power.*restored\|Power restored" /tmp/test18.log; then
615+
echo "PASS: Power restoration detected"
616+
else
617+
echo "FAIL: POWER_RESTORED not logged"
618+
cat /tmp/test18.log
619+
exit 1
620+
fi
621+
622+
# Verify NO shutdown was triggered
623+
if grep -q "SHUTDOWN SEQUENCE" /tmp/test18.log; then
624+
echo "FAIL: Shutdown should NOT have been triggered during recovery"
625+
exit 1
626+
fi
627+
628+
echo "PASS: Recovery correctly handled - no shutdown, power restored logged"
629+
436630
- name: Collect logs on failure
437631
if: failure()
438632
run: |

CLAUDE.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ src/eneru/ # Main package
4545
notifications.py # NotificationWorker (Apprise integration)
4646
utils.py # Helper functions (run_command, etc.)
4747
actions.py # REMOTE_ACTIONS templates
48-
monitor.py # UPSMonitor class (core daemon)
48+
monitor.py # UPSGroupMonitor class (core daemon)
4949
cli.py # CLI argument parsing + main()
5050
5151
tests/ # pytest tests
@@ -158,6 +158,7 @@ README.md # Project overview
158158
- Config validation before any changes to config handling
159159
- Always test with `--dry-run` before real shutdown logic changes
160160
- When adding new config feature flags, add them to `examples/config-reference.yaml`
161+
- When adding or removing tests, update `docs/testing.md` (test counts in pyramid/table, per-file breakdown, E2E test case table)
161162

162163
## Git Workflow
163164

CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ src/eneru/
8888
notifications.py # NotificationWorker (Apprise integration)
8989
utils.py # Helper functions (run_command, etc.)
9090
actions.py # REMOTE_ACTIONS templates
91-
monitor.py # UPSMonitor class (core daemon)
91+
monitor.py # UPSGroupMonitor class (core daemon)
9292
cli.py # CLI argument parsing + main()
9393
```
9494

@@ -97,7 +97,7 @@ src/eneru/
9797
Before submitting a PR, ensure:
9898

9999
- [ ] All tests pass (`pytest`)
100-
- [ ] `--validate-config` passes (`python -m eneru --validate-config`)
100+
- [ ] `validate` passes (`python -m eneru validate`)
101101
- [ ] `--dry-run` mode works correctly
102102
- [ ] No Python syntax errors (`python -m py_compile src/eneru/*.py`)
103103
- [ ] Existing features still work

README.md

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ A Python-based UPS monitoring daemon for [Network UPS Tools (NUT)](https://netwo
2525
</div>
2626

2727
<p align="center">
28-
<img src="https://raw.githubusercontent.com/m4r1k/Eneru/main/docs/images/eneru-mon.gif" alt="Eneru Monitor Dashboard" width="700">
28+
<img src="https://raw.githubusercontent.com/m4r1k/Eneru/main/docs/images/eneru-mon.gif" alt="Eneru Monitor Dashboard" width="400">
2929
</p>
3030

3131
---
@@ -49,6 +49,21 @@ Most UPS shutdown tools handle one machine. If you have more than one, things ge
4949

5050
---
5151

52+
## How Eneru is different
53+
54+
NUT's `upsmon` shuts down one machine with two triggers (low battery, forced shutdown). apcupsd does the same for APC hardware. PeaNUT and NUTCase provide dashboards but no shutdown logic. Enterprise tools (Eaton IPM, PowerChute) add virtualization support but are vendor-locked and proprietary.
55+
56+
Eneru sits on top of NUT and adds what these tools lack:
57+
58+
- **Orchestrated multi-resource shutdown**, VMs, compose stacks, containers, remote servers, filesystems, and local system in a coordinated sequence
59+
- **6 independent shutdown triggers**, including depletion rate (computed from observed battery data, not UPS estimates) and extended time on battery. NUT's 2 triggers miss these failure modes
60+
- **Multi-UPS coordination**, monitor multiple UPSes with per-group triggers and shutdown policies, each with independent failure handling
61+
- **Battery anomaly detection**, catches firmware recalibrations and battery degradation with vendor-specific jitter filtering (APC, CyberPower, Ubiquiti)
62+
63+
See the [full comparison](https://eneru.readthedocs.io/latest/#how-eneru-compares) in the documentation.
64+
65+
---
66+
5267
## Use cases
5368

5469
Homelabs, virtualization hosts (Proxmox, ESXi, libvirt), Docker/Podman container hosts, NAS systems (Synology, QNAP, TrueNAS), multi-UPS environments with multiple server groups, and mixed physical/virtual setups.
@@ -143,7 +158,7 @@ See the [full documentation](https://eneru.readthedocs.io/) for complete configu
143158
- Notifications to 100+ services (Discord, Slack, Telegram, ntfy, email) via [Apprise](https://github.com/caronc/apprise/wiki)
144159
- Power quality monitoring: voltage, AVR, bypass, overload
145160
- Dry-run mode for safe testing
146-
- 296 tests, 9 Linux distros, E2E tests with real NUT/SSH/Docker on every commit
161+
- 300 tests, 9 Linux distros, E2E tests with real NUT/SSH/Docker on every commit
147162

148163
---
149164

docs/changelog.md

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,83 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
---
99

10+
## [5.0.0] - 2026-04-11
11+
12+
### Added
13+
- **Multi-UPS Monitoring:** Monitor multiple UPS systems from a single Eneru instance (#4)
14+
- New `UPSGroupConfig` with `is_local` flag to define which UPS powers the Eneru host
15+
- Per-UPS `display_name` for human-readable labels in logs and notifications
16+
- Per-UPS trigger overrides with global defaults inheritance
17+
- `MultiUPSCoordinator` with thread-per-group architecture and shared notification worker
18+
- Defense-in-depth local shutdown coordination (`threading.Lock` + filesystem flag file) prevents duplicate shutdown
19+
- Backward-compatible config detection: dict format = single-UPS (legacy), list format = multi-UPS
20+
- Ownership validation: only the `is_local` group can manage local resources (VMs, containers, filesystems)
21+
- New example configuration: `examples/config-dual-ups.yaml`
22+
- **TUI Dashboard (`eneru monitor`):** Real-time curses-based monitoring interface
23+
- Two-panel layout: gray config/status panel + gold events panel
24+
- Reads daemon state files directly (no NUT polling, no contention with main daemon)
25+
- Color-coded status badges: green (online), red + blink (on battery/critical), magenta (unknown)
26+
- 256-color palette for consistent rendering across SSH sessions
27+
- Interactive controls: `<Q>` quit, `<R>` refresh, `<M>` toggle more logs
28+
- `--once` mode for scripts and cron health checks (single snapshot, no curses)
29+
- Auto-refresh every 5 seconds, configurable with `--interval`
30+
- Multi-UPS display: shows all UPS groups in a single dashboard
31+
- **Battery Anomaly Detection:** Identifies unexpected charge drops while on line power
32+
- Detects >20% charge drops within 120 seconds while UPS reports OL/CHRG status
33+
- Sustained-reading confirmation: requires 3 consecutive polls before firing alert
34+
- Firmware jitter filtering for APC, CyberPower, and Ubiquiti UniFi UPS units after OB→OL transitions
35+
- Catches firmware recalibrations, battery aging, and hardware issues
36+
- Sends notification + log warning with charge delta and timing details
37+
- **CLI Subcommand Architecture:** Modern command-line interface with dedicated subcommands
38+
- `eneru run` — start the UPS monitoring daemon
39+
- `eneru validate` — validate configuration file and show overview
40+
- `eneru monitor` — launch the TUI dashboard
41+
- `eneru test-notifications` — test notification channels
42+
- `eneru version` — display version information
43+
- Bare `eneru` now shows help instead of starting the daemon (prevents accidental start)
44+
45+
### Changed
46+
- **Config Reference Relocated:** `config.yaml``examples/config-reference.yaml` (installed path `/etc/ups-monitor/` unchanged)
47+
- **Systemd Service Relocated:** `eneru.service``packaging/eneru.service` (installed path `/lib/systemd/system/` unchanged)
48+
- **Systemd Service Updated:** ExecStart uses `eneru run` subcommand
49+
- **Changelog Consolidated:** Root `CHANGELOG.md` merged into `docs/changelog.md` with complete version history (v1.0 through v4.11)
50+
- **Test Suite Expanded:** 216 → 300 tests (+84 tests, 39% increase)
51+
- 20+ multi-UPS tests: config parsing, trigger inheritance, ownership validation, coordinator routing, lock synchronization
52+
- 26 monitor core tests: status state machine, shutdown triggers, FSD handling, failsafe, shutdown sequencing
53+
- 23 TUI tests: state file parsing, log filtering, status mapping, color rendering, `--once` output
54+
- **E2E Tests Expanded:** 7 → 18 tests with multi-UPS scenarios
55+
- NUT dummy server extended with UPS1 and UPS2 driver entries and per-UPS state files
56+
- New tests: multi-UPS config validation, UPS isolation (one fails, other unaffected), ownership validation, TUI `--once`
57+
58+
### Technical Details
59+
- Thread-per-group model: each UPS group runs in a dedicated thread with its own `UPSGroupMonitor` instance
60+
- Per-group state files suffixed with sanitized UPS name (e.g., `/var/run/ups-monitor.state.UPS1-192-168-1-10`)
61+
- Single-UPS mode completely unchanged -- full backward compatibility
62+
- Legacy config format (dict) auto-detected and supported alongside new list format
63+
- At most one UPS group can be marked `is_local: true`
64+
- Remote servers allowed on any group; local resources restricted to the `is_local` group
65+
66+
### Migration Notes
67+
68+
CLI invocation changed from bare command to subcommands:
69+
```bash
70+
# Before (v4.x)
71+
eneru --config /etc/ups-monitor/config.yaml
72+
eneru --validate-config --config /etc/ups-monitor/config.yaml
73+
eneru --test-notifications --config /etc/ups-monitor/config.yaml
74+
75+
# After (v5.0)
76+
eneru run --config /etc/ups-monitor/config.yaml
77+
eneru validate --config /etc/ups-monitor/config.yaml
78+
eneru test-notifications --config /etc/ups-monitor/config.yaml
79+
```
80+
81+
- **Package users (deb/rpm):** Systemd service is updated automatically — no action needed
82+
- **Config format:** Existing single-UPS configurations work without any modification
83+
- **No breaking changes** for single-UPS deployments
84+
85+
---
86+
1087
## [4.11.0] - 2026-04-02
1188

1289
### Added
@@ -48,7 +125,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
48125
- `notifications.py` - NotificationWorker (Apprise integration)
49126
- `utils.py` - Helper functions (run_command, command_exists, is_numeric, format_seconds)
50127
- `actions.py` - REMOTE_ACTIONS templates for remote pre-shutdown commands
51-
- `monitor.py` - UPSMonitor class (core daemon logic)
128+
- `monitor.py` - UPSGroupMonitor class (core daemon logic)
52129
- `cli.py` - CLI argument parsing + main()
53130
- **Developer Documentation:** Add `CLAUDE.md` for Claude Code
54131

@@ -464,6 +541,24 @@ During power outages, network connectivity is often unreliable. The previous blo
464541

465542
## Version Comparison
466543

544+
### v5.0 vs v4.11
545+
546+
| Feature | v4.11 | v5.0 |
547+
|---------|-------|------|
548+
| UPS Support | Single UPS only | Multiple UPS with per-group resources |
549+
| UPS Ownership | N/A | `is_local` flag defines host UPS |
550+
| Per-UPS Triggers | N/A | Override global defaults per UPS group |
551+
| TUI Dashboard | Not available | `eneru monitor` with real-time status |
552+
| Battery Anomaly Detection | Not available | Charge drop detection with jitter filtering |
553+
| CLI Interface | Flat flags (`--validate-config`) | Subcommands (`eneru run`, `validate`, `monitor`) |
554+
| Bare `eneru` | Starts daemon | Shows help (safe default) |
555+
| Config Reference Location | `config.yaml` (root) | `examples/config-reference.yaml` |
556+
| Service File Location | Root directory | `packaging/eneru.service` |
557+
| Thread Model | 2 threads (monitor + notifications) | N+2 threads (N UPS monitors + notifications + main) |
558+
| Shutdown Coordination | N/A | Lock + flag file (defense-in-depth) |
559+
| E2E Test Scenarios | 7 (single-UPS) | 18 (single + multi-UPS) |
560+
| Test Count | 216 tests | 300 tests |
561+
467562
### v4.11 vs v4.10
468563

469564
| Feature | v4.10 | v4.11 |

0 commit comments

Comments
 (0)