Skip to content

Commit d0e631d

Browse files
m4r1kclaude
andcommitted
feat: add per-server parallel option for remote shutdown
Add optional `parallel` field to remote server config (default: true). - Servers with `parallel: false` are shutdown sequentially in config order - Servers with `parallel: true` (default) are shutdown concurrently - Useful for dependency ordering (e.g., NAS that other servers mount) Example: ```yaml remote_servers: - name: "App Server" host: "192.168.1.10" # parallel: true (default) - runs in parallel - name: "Storage NAS" host: "192.168.1.50" parallel: false # shutdown last, after parallel batch ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 6aa23da commit d0e631d

4 files changed

Lines changed: 175 additions & 53 deletions

File tree

config.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,10 @@ filesystems:
174174
# Remote Server Shutdown
175175
# Define multiple remote servers to shutdown via SSH
176176
# Each server can have pre_shutdown_commands to gracefully stop services before shutdown
177+
# Servers are processed in two phases:
178+
# 1. Sequential: Servers with parallel: false are shutdown one by one (in config order)
179+
# 2. Parallel: Remaining servers (parallel: true, the default) are shutdown concurrently
180+
# Use parallel: false for servers with dependencies (e.g., NAS that other servers mount)
177181
remote_servers:
178182
- name: "Synology NAS"
179183
enabled: true
@@ -188,6 +192,9 @@ remote_servers:
188192
# SSH options (optional)
189193
ssh_options:
190194
- "-o StrictHostKeyChecking=no"
195+
# Parallel shutdown (default: true)
196+
# Set to false to shutdown this server sequentially before the parallel batch
197+
# parallel: false
191198

192199
# Example: Proxmox server with VMs and containers
193200
# - name: "Proxmox Host"
@@ -222,6 +229,14 @@ remote_servers:
222229
# - action: "sync"
223230
# shutdown_command: "shutdown -h now"
224231

232+
# Example: Server with dependency - must shutdown LAST
233+
# - name: "Storage Server"
234+
# enabled: true
235+
# host: "192.168.178.200"
236+
# user: "root"
237+
# parallel: false # Shutdown sequentially (after all parallel servers)
238+
# shutdown_command: "shutdown -h now"
239+
225240
# Local Server Shutdown
226241
local_shutdown:
227242
enabled: true

examples/config-homelab.yaml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,12 @@ filesystems:
6262
options: "-l"
6363

6464
# Remote servers with optional pre-shutdown commands
65+
# Servers are processed in two phases:
66+
# 1. Sequential: Servers with parallel: false are shutdown one by one (in config order)
67+
# 2. Parallel: Remaining servers (parallel: true, the default) are shutdown concurrently
6568
remote_servers:
66-
# Simple NAS - just shutdown command
67-
- name: "Synology NAS"
68-
enabled: true
69-
host: "192.168.1.50"
70-
user: "admin"
71-
shutdown_command: "sudo -i synoshutdown -s"
72-
7369
# Proxmox hypervisor - gracefully stop VMs/CTs before shutdown
70+
# Runs in parallel (default) with other parallel servers
7471
- name: "Proxmox Host"
7572
enabled: true
7673
host: "192.168.1.60"
@@ -84,5 +81,14 @@ remote_servers:
8481
- action: "sync"
8582
shutdown_command: "shutdown -h now"
8683

84+
# NAS - shutdown LAST since other servers may mount its storage
85+
# parallel: false ensures this runs after all parallel servers complete
86+
- name: "Synology NAS"
87+
enabled: true
88+
host: "192.168.1.50"
89+
user: "admin"
90+
parallel: false # Shutdown sequentially (after parallel batch)
91+
shutdown_command: "sudo -i synoshutdown -s"
92+
8793
local_shutdown:
8894
enabled: true

tests/test_config.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,72 @@ def test_pre_shutdown_commands_not_specified(self, temp_config_file):
655655
server = config.remote_servers[0]
656656
assert server.pre_shutdown_commands == []
657657

658+
@pytest.mark.unit
659+
def test_parallel_option_default_true(self, temp_config_file):
660+
"""Test that parallel defaults to True when not specified."""
661+
config_data = """
662+
remote_servers:
663+
- name: "Server Without Parallel"
664+
enabled: true
665+
host: "192.168.1.50"
666+
user: "admin"
667+
shutdown_command: "shutdown -h now"
668+
"""
669+
temp_config_file.write_text(config_data)
670+
config = ConfigLoader.load(str(temp_config_file))
671+
672+
server = config.remote_servers[0]
673+
assert server.parallel is True
674+
675+
@pytest.mark.unit
676+
def test_parallel_option_explicit_false(self, temp_config_file):
677+
"""Test setting parallel to False."""
678+
config_data = """
679+
remote_servers:
680+
- name: "Sequential Server"
681+
enabled: true
682+
host: "192.168.1.50"
683+
user: "admin"
684+
parallel: false
685+
shutdown_command: "shutdown -h now"
686+
"""
687+
temp_config_file.write_text(config_data)
688+
config = ConfigLoader.load(str(temp_config_file))
689+
690+
server = config.remote_servers[0]
691+
assert server.parallel is False
692+
693+
@pytest.mark.unit
694+
def test_parallel_option_mixed(self, temp_config_file):
695+
"""Test mixed parallel and sequential servers."""
696+
config_data = """
697+
remote_servers:
698+
- name: "Parallel Server 1"
699+
enabled: true
700+
host: "192.168.1.50"
701+
user: "admin"
702+
shutdown_command: "shutdown -h now"
703+
- name: "Sequential Server"
704+
enabled: true
705+
host: "192.168.1.51"
706+
user: "admin"
707+
parallel: false
708+
shutdown_command: "shutdown -h now"
709+
- name: "Parallel Server 2"
710+
enabled: true
711+
host: "192.168.1.52"
712+
user: "admin"
713+
parallel: true
714+
shutdown_command: "shutdown -h now"
715+
"""
716+
temp_config_file.write_text(config_data)
717+
config = ConfigLoader.load(str(temp_config_file))
718+
719+
assert len(config.remote_servers) == 3
720+
assert config.remote_servers[0].parallel is True
721+
assert config.remote_servers[1].parallel is False
722+
assert config.remote_servers[2].parallel is True
723+
658724

659725
class TestConfigValidation:
660726
"""Test configuration validation."""

ups_monitor.py

Lines changed: 81 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
# Version is set at build time via git describe --tags
99
# Format: "4.3.0" for tagged releases, "4.3.0-5-gabcdef1" for dev builds
10-
__version__ = "4.5.0-rc2"
10+
__version__ = "4.5.0-rc4"
1111

1212
import subprocess
1313
import sys
@@ -154,6 +154,7 @@ class RemoteServerConfig:
154154
shutdown_command: str = "sudo shutdown -h now"
155155
ssh_options: List[str] = field(default_factory=list)
156156
pre_shutdown_commands: List[RemoteCommandConfig] = field(default_factory=list)
157+
parallel: bool = True # If False, server is shutdown sequentially before parallel batch
157158

158159

159160
@dataclass
@@ -492,6 +493,7 @@ def _parse_config(cls, data: Dict[str, Any]) -> Config:
492493
shutdown_command=server_data.get('shutdown_command', 'sudo shutdown -h now'),
493494
ssh_options=server_data.get('ssh_options', []),
494495
pre_shutdown_commands=pre_cmds,
496+
parallel=server_data.get('parallel', True),
495497
))
496498
config.remote_servers = servers
497499

@@ -1676,71 +1678,104 @@ def _unmount_filesystems(self):
16761678
self._log_message(f" ℹ️ {mount_point} was likely not mounted.")
16771679

16781680
def _shutdown_remote_servers(self):
1679-
"""Shutdown all enabled remote servers via SSH in parallel.
1681+
"""Shutdown all enabled remote servers via SSH.
16801682
1681-
All remote servers are shut down concurrently using threads to avoid
1682-
sequential timeouts blocking the shutdown sequence. This is critical
1683-
when a server is unreachable - waiting 60+ seconds per dead host could
1684-
mean the UPS battery dies before reaching other servers.
1683+
Servers are processed in two phases:
1684+
1. Sequential phase: Servers with parallel=False are shutdown one by one
1685+
in config order. Use this for servers with dependencies (e.g., a server
1686+
that hosts storage used by other servers should be shutdown last).
1687+
2. Parallel phase: Remaining servers (parallel=True, the default) are
1688+
shutdown concurrently using threads to avoid sequential timeouts.
1689+
1690+
This hybrid approach ensures dependency order while still benefiting from
1691+
parallel execution for independent servers.
16851692
"""
16861693
enabled_servers = [s for s in self.config.remote_servers if s.enabled]
16871694

16881695
if not enabled_servers:
16891696
return
16901697

1698+
# Separate servers into sequential and parallel groups
1699+
sequential_servers = [s for s in enabled_servers if not s.parallel]
1700+
parallel_servers = [s for s in enabled_servers if s.parallel]
1701+
16911702
server_count = len(enabled_servers)
1692-
self._log_message(f"🌐 Shutting down {server_count} remote server(s) in parallel...")
1703+
seq_count = len(sequential_servers)
1704+
par_count = len(parallel_servers)
16931705

1694-
# Calculate max timeout for all servers (for the join timeout)
1695-
# Each server's max time = sum of pre_shutdown timeouts + shutdown timeout + buffer
1696-
def calc_server_timeout(server: RemoteServerConfig) -> int:
1697-
pre_cmd_time = sum(
1698-
(cmd.timeout or server.command_timeout) for cmd in server.pre_shutdown_commands
1706+
if seq_count > 0 and par_count > 0:
1707+
self._log_message(
1708+
f"🌐 Shutting down {server_count} remote server(s) "
1709+
f"({seq_count} sequential, {par_count} parallel)..."
16991710
)
1700-
return pre_cmd_time + server.command_timeout + server.connect_timeout + 60
1701-
1702-
max_timeout = max(calc_server_timeout(s) for s in enabled_servers)
1711+
elif seq_count > 0:
1712+
self._log_message(f"🌐 Shutting down {server_count} remote server(s) sequentially...")
1713+
else:
1714+
self._log_message(f"🌐 Shutting down {server_count} remote server(s) in parallel...")
17031715

1704-
# Track results for logging
1705-
results: Dict[str, Tuple[bool, str]] = {}
1706-
results_lock = threading.Lock()
1716+
completed = 0
17071717

1708-
def shutdown_server_thread(server: RemoteServerConfig):
1709-
"""Thread worker for shutting down a single server."""
1718+
# Phase 1: Sequential servers (in config order)
1719+
for server in sequential_servers:
17101720
display_name = server.name or server.host
17111721
try:
17121722
self._shutdown_remote_server(server)
1713-
with results_lock:
1714-
results[display_name] = (True, "")
1723+
completed += 1
17151724
except Exception as e:
1716-
with results_lock:
1717-
results[display_name] = (False, str(e))
1718-
1719-
# Start all threads
1720-
threads: List[threading.Thread] = []
1721-
for server in enabled_servers:
1722-
t = threading.Thread(
1723-
target=shutdown_server_thread,
1724-
args=(server,),
1725-
name=f"remote-shutdown-{server.name or server.host}"
1726-
)
1727-
t.start()
1728-
threads.append(t)
1725+
self._log_message(f" ❌ {display_name} shutdown failed: {e}")
1726+
1727+
# Phase 2: Parallel servers
1728+
if parallel_servers:
1729+
# Calculate max timeout for parallel servers (for the join timeout)
1730+
def calc_server_timeout(server: RemoteServerConfig) -> int:
1731+
pre_cmd_time = sum(
1732+
(cmd.timeout or server.command_timeout) for cmd in server.pre_shutdown_commands
1733+
)
1734+
return pre_cmd_time + server.command_timeout + server.connect_timeout + 60
17291735

1730-
# Wait for all threads to complete with global timeout
1731-
for t in threads:
1732-
t.join(timeout=max_timeout)
1736+
max_timeout = max(calc_server_timeout(s) for s in parallel_servers)
17331737

1734-
# Check for any threads that are still running (timed out)
1735-
still_running = [t for t in threads if t.is_alive()]
1736-
if still_running:
1737-
self._log_message(
1738-
f" ⚠️ {len(still_running)} remote shutdown(s) still in progress "
1739-
"(continuing with local shutdown)"
1740-
)
1738+
# Track results for logging
1739+
results: Dict[str, Tuple[bool, str]] = {}
1740+
results_lock = threading.Lock()
1741+
1742+
def shutdown_server_thread(server: RemoteServerConfig):
1743+
"""Thread worker for shutting down a single server."""
1744+
display_name = server.name or server.host
1745+
try:
1746+
self._shutdown_remote_server(server)
1747+
with results_lock:
1748+
results[display_name] = (True, "")
1749+
except Exception as e:
1750+
with results_lock:
1751+
results[display_name] = (False, str(e))
1752+
1753+
# Start all threads
1754+
threads: List[threading.Thread] = []
1755+
for server in parallel_servers:
1756+
t = threading.Thread(
1757+
target=shutdown_server_thread,
1758+
args=(server,),
1759+
name=f"remote-shutdown-{server.name or server.host}"
1760+
)
1761+
t.start()
1762+
threads.append(t)
1763+
1764+
# Wait for all threads to complete with global timeout
1765+
for t in threads:
1766+
t.join(timeout=max_timeout)
1767+
1768+
# Check for any threads that are still running (timed out)
1769+
still_running = [t for t in threads if t.is_alive()]
1770+
if still_running:
1771+
self._log_message(
1772+
f" ⚠️ {len(still_running)} remote shutdown(s) still in progress "
1773+
"(continuing with local shutdown)"
1774+
)
1775+
1776+
completed += par_count - len(still_running)
17411777

17421778
# Log summary
1743-
completed = server_count - len(still_running)
17441779
self._log_message(f" ✅ Remote shutdown complete ({completed}/{server_count} servers)")
17451780

17461781
def _run_remote_command(

0 commit comments

Comments
 (0)