Skip to content

Commit fc17ffa

Browse files
authored
Merge branch 'henrygd:main' into feat/build-more-arm
2 parents 147b2a6 + cd9ea51 commit fc17ffa

10 files changed

Lines changed: 1643 additions & 695 deletions

File tree

agent/connection_manager.go

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@ import (
44
"context"
55
"errors"
66
"log/slog"
7+
"net"
8+
"os"
79
"os/signal"
10+
"strings"
811
"syscall"
912
"time"
1013

1114
"github.com/henrygd/beszel/agent/health"
15+
"github.com/henrygd/beszel/agent/utils"
1216
"github.com/henrygd/beszel/internal/entities/system"
1317
)
1418

@@ -111,13 +115,36 @@ func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
111115
_ = health.Update()
112116
case <-sigCtx.Done():
113117
slog.Info("Shutting down", "cause", context.Cause(sigCtx))
114-
_ = c.agent.StopServer()
115-
c.closeWebSocket()
116-
return health.CleanUp()
118+
return c.stop()
117119
}
118120
}
119121
}
120122

123+
// stop does not stop the connection manager itself, just any active connections. The manager will attempt to reconnect after stopping, so this should only be called immediately before shutting down the entire agent.
124+
//
125+
// If we need or want to expose a graceful Stop method in the future, do something like this to actually stop the manager:
126+
//
127+
// func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
128+
// ctx, cancel := context.WithCancel(context.Background())
129+
// c.cancel = cancel
130+
//
131+
// for {
132+
// select {
133+
// case <-ctx.Done():
134+
// return c.stop()
135+
// }
136+
// }
137+
// }
138+
//
139+
// func (c *ConnectionManager) Stop() {
140+
// c.cancel()
141+
// }
142+
func (c *ConnectionManager) stop() error {
143+
_ = c.agent.StopServer()
144+
c.closeWebSocket()
145+
return health.CleanUp()
146+
}
147+
121148
// handleEvent processes connection events and updates the connection state accordingly.
122149
func (c *ConnectionManager) handleEvent(event ConnectionEvent) {
123150
switch event {
@@ -185,9 +212,16 @@ func (c *ConnectionManager) connect() {
185212

186213
// Try WebSocket first, if it fails, start SSH server
187214
err := c.startWebSocketConnection()
188-
if err != nil && c.State == Disconnected {
189-
c.startSSHServer()
190-
c.startWsTicker()
215+
if err != nil {
216+
if shouldExitOnErr(err) {
217+
time.Sleep(2 * time.Second) // prevent tight restart loop
218+
_ = c.stop()
219+
os.Exit(1)
220+
}
221+
if c.State == Disconnected {
222+
c.startSSHServer()
223+
c.startWsTicker()
224+
}
191225
}
192226
}
193227

@@ -224,3 +258,14 @@ func (c *ConnectionManager) closeWebSocket() {
224258
c.wsClient.Close()
225259
}
226260
}
261+
262+
// shouldExitOnErr checks if the error is a DNS resolution failure and if the
263+
// EXIT_ON_DNS_ERROR env var is set. https://github.com/henrygd/beszel/issues/1924.
264+
func shouldExitOnErr(err error) bool {
265+
if val, _ := utils.GetEnv("EXIT_ON_DNS_ERROR"); val == "true" {
266+
if opErr, ok := errors.AsType[*net.OpError](err); ok {
267+
return strings.Contains(opErr.Err.Error(), "lookup")
268+
}
269+
}
270+
return false
271+
}

agent/connection_manager_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package agent
44

55
import (
66
"crypto/ed25519"
7+
"errors"
78
"fmt"
89
"net"
910
"net/url"
@@ -298,3 +299,65 @@ func TestConnectionManager_ConnectFlow(t *testing.T) {
298299
cm.connect()
299300
}, "Connect should not panic without WebSocket client")
300301
}
302+
303+
func TestShouldExitOnErr(t *testing.T) {
304+
createDialErr := func(msg string) error {
305+
return &net.OpError{
306+
Op: "dial",
307+
Net: "tcp",
308+
Err: errors.New(msg),
309+
}
310+
}
311+
312+
tests := []struct {
313+
name string
314+
err error
315+
envValue string
316+
expected bool
317+
}{
318+
{
319+
name: "no env var",
320+
err: createDialErr("lookup lkahsdfasdf: no such host"),
321+
envValue: "",
322+
expected: false,
323+
},
324+
{
325+
name: "env var false",
326+
err: createDialErr("lookup lkahsdfasdf: no such host"),
327+
envValue: "false",
328+
expected: false,
329+
},
330+
{
331+
name: "env var true, matching error",
332+
err: createDialErr("lookup lkahsdfasdf: no such host"),
333+
envValue: "true",
334+
expected: true,
335+
},
336+
{
337+
name: "env var true, matching error with extra context",
338+
err: createDialErr("lookup beszel.server.lan on [::1]:53: read udp [::1]:44557->[::1]:53: read: connection refused"),
339+
envValue: "true",
340+
expected: true,
341+
},
342+
{
343+
name: "env var true, non-matching error",
344+
err: errors.New("connection refused"),
345+
envValue: "true",
346+
expected: false,
347+
},
348+
{
349+
name: "env var true, dial but not lookup",
350+
err: createDialErr("connection timeout"),
351+
envValue: "true",
352+
expected: false,
353+
},
354+
}
355+
356+
for _, tt := range tests {
357+
t.Run(tt.name, func(t *testing.T) {
358+
t.Setenv("EXIT_ON_DNS_ERROR", tt.envValue)
359+
result := shouldExitOnErr(tt.err)
360+
assert.Equal(t, tt.expected, result)
361+
})
362+
}
363+
}

internal/cmd/agent/agent.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,6 @@ func main() {
195195
}
196196

197197
if err := a.Start(serverConfig); err != nil {
198-
log.Fatal("Failed to start server: ", err)
198+
log.Fatal("Failed to start: ", err)
199199
}
200200
}

0 commit comments

Comments
 (0)