@@ -4,11 +4,15 @@ import (
44 "context"
55 "errors"
66 "log/slog"
7+ "net"
8+ "os"
79 "os/signal"
10+ "strings"
811 "syscall"
912 "time"
1013
1114 "github.com/henrygd/beszel/agent/health"
15+ "github.com/henrygd/beszel/agent/utils"
1216 "github.com/henrygd/beszel/internal/entities/system"
1317)
1418
@@ -111,13 +115,36 @@ func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
111115 _ = health .Update ()
112116 case <- sigCtx .Done ():
113117 slog .Info ("Shutting down" , "cause" , context .Cause (sigCtx ))
114- _ = c .agent .StopServer ()
115- c .closeWebSocket ()
116- return health .CleanUp ()
118+ return c .stop ()
117119 }
118120 }
119121}
120122
123+ // stop does not stop the connection manager itself, just any active connections. The manager will attempt to reconnect after stopping, so this should only be called immediately before shutting down the entire agent.
124+ //
125+ // If we need or want to expose a graceful Stop method in the future, do something like this to actually stop the manager:
126+ //
127+ // func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
128+ // ctx, cancel := context.WithCancel(context.Background())
129+ // c.cancel = cancel
130+ //
131+ // for {
132+ // select {
133+ // case <-ctx.Done():
134+ // return c.stop()
135+ // }
136+ // }
137+ // }
138+ //
139+ // func (c *ConnectionManager) Stop() {
140+ // c.cancel()
141+ // }
142+ func (c * ConnectionManager ) stop () error {
143+ _ = c .agent .StopServer ()
144+ c .closeWebSocket ()
145+ return health .CleanUp ()
146+ }
147+
121148// handleEvent processes connection events and updates the connection state accordingly.
122149func (c * ConnectionManager ) handleEvent (event ConnectionEvent ) {
123150 switch event {
@@ -185,9 +212,16 @@ func (c *ConnectionManager) connect() {
185212
186213 // Try WebSocket first, if it fails, start SSH server
187214 err := c .startWebSocketConnection ()
188- if err != nil && c .State == Disconnected {
189- c .startSSHServer ()
190- c .startWsTicker ()
215+ if err != nil {
216+ if shouldExitOnErr (err ) {
217+ time .Sleep (2 * time .Second ) // prevent tight restart loop
218+ _ = c .stop ()
219+ os .Exit (1 )
220+ }
221+ if c .State == Disconnected {
222+ c .startSSHServer ()
223+ c .startWsTicker ()
224+ }
191225 }
192226}
193227
@@ -224,3 +258,14 @@ func (c *ConnectionManager) closeWebSocket() {
224258 c .wsClient .Close ()
225259 }
226260}
261+
262+ // shouldExitOnErr checks if the error is a DNS resolution failure and if the
263+ // EXIT_ON_DNS_ERROR env var is set. https://github.com/henrygd/beszel/issues/1924.
264+ func shouldExitOnErr (err error ) bool {
265+ if val , _ := utils .GetEnv ("EXIT_ON_DNS_ERROR" ); val == "true" {
266+ if opErr , ok := errors.AsType [* net.OpError ](err ); ok {
267+ return strings .Contains (opErr .Err .Error (), "lookup" )
268+ }
269+ }
270+ return false
271+ }
0 commit comments