Skip to content

Commit 83b7095

Browse files
authored
Merge pull request #224 from safing/fix/metrics-modules-api
Fix metrics, modules, api
2 parents 7872911 + be48ba3 commit 83b7095

10 files changed

Lines changed: 133 additions & 116 deletions

File tree

api/authentication.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ func authenticateRequest(w http.ResponseWriter, r *http.Request, targetHandler h
151151
switch requiredPermission { //nolint:exhaustive
152152
case NotFound:
153153
// Not found.
154-
tracer.Trace("api: authenticated handler reported: not found")
154+
tracer.Debug("api: no API endpoint registered for this path")
155155
http.Error(w, "Not found.", http.StatusNotFound)
156156
return nil
157157
case NotSupported:

api/router.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ func (mh *mainHandler) handle(w http.ResponseWriter, r *http.Request) error {
235235
http.Error(lrw, "Method not allowed.", http.StatusMethodNotAllowed)
236236
return nil
237237
default:
238+
tracer.Debug("api: no handler registered for this path")
238239
http.Error(lrw, "Not found.", http.StatusNotFound)
239240
return nil
240241
}

metrics/api.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"github.com/safing/portbase/api"
1212
"github.com/safing/portbase/config"
1313
"github.com/safing/portbase/log"
14-
"github.com/safing/portbase/utils"
1514
)
1615

1716
func registerAPI() error {
@@ -140,11 +139,7 @@ func writeMetricsTo(ctx context.Context, url string) error {
140139
)
141140
}
142141

143-
var metricsPusherDone = utils.NewBroadcastFlag()
144-
145142
func metricsWriter(ctx context.Context) error {
146-
defer metricsPusherDone.NotifyAndReset()
147-
148143
pushURL := pushOption()
149144
ticker := module.NewSleepyTicker(1*time.Minute, 0)
150145
defer ticker.Stop()

metrics/metrics_host.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616

1717
const hostStatTTL = 1 * time.Second
1818

19-
func registeHostMetrics() (err error) {
19+
func registerHostMetrics() (err error) {
2020
// Register load average metrics.
2121
_, err = NewGauge("host/load/avg/1", nil, getFloat64HostStat(LoadAvg1), &Options{Name: "Host Load Avg 1min", Permission: api.PermitUser})
2222
if err != nil {

metrics/metrics_info.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@ package metrics
33
import (
44
"runtime"
55
"strings"
6+
"sync/atomic"
67

78
"github.com/safing/portbase/info"
89
)
910

11+
var reportedStart atomic.Bool
12+
1013
func registerInfoMetric() error {
1114
meta := info.GetInfo()
1215
_, err := NewGauge(
@@ -26,6 +29,10 @@ func registerInfoMetric() error {
2629
"comment": commentOption(),
2730
},
2831
func() float64 {
32+
// Report as 0 the first time in order to detect (re)starts.
33+
if reportedStart.CompareAndSwap(false, true) {
34+
return 0
35+
}
2936
return 1
3037
},
3138
nil,

metrics/metrics_logs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import (
55
"github.com/safing/portbase/log"
66
)
77

8-
func registeLogMetrics() (err error) {
8+
func registerLogMetrics() (err error) {
99
_, err = NewFetchingCounter(
1010
"logs/warning/total",
1111
nil,

metrics/module.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"fmt"
66
"sort"
77
"sync"
8-
"time"
98

109
"github.com/safing/portbase/modules"
1110
)
@@ -59,11 +58,11 @@ func start() error {
5958
return err
6059
}
6160

62-
if err := registeHostMetrics(); err != nil {
61+
if err := registerHostMetrics(); err != nil {
6362
return err
6463
}
6564

66-
if err := registeLogMetrics(); err != nil {
65+
if err := registerLogMetrics(); err != nil {
6766
return err
6867
}
6968

@@ -82,16 +81,13 @@ func stop() error {
8281
// Wait until the metrics pusher is done, as it may have started reporting
8382
// and may report a higher number than we store to disk. For persistent
8483
// metrics it can then happen that the first report is lower than the
85-
// previous report, making prometheus think that al that happened since the
84+
// previous report, making prometheus think that all that happened since the
8685
// last report, due to the automatic restart detection.
87-
done := metricsPusherDone.NewFlag()
88-
done.Refresh()
89-
if !done.IsSet() {
90-
select {
91-
case <-done.Signal():
92-
case <-time.After(10 * time.Second):
93-
}
94-
}
86+
87+
// The registry is read locked when writing metrics.
88+
// Write lock the registry to make sure all writes are finished.
89+
registryLock.Lock()
90+
registryLock.Unlock() //nolint:staticcheck
9591

9692
storePersistentMetrics()
9793

@@ -120,6 +116,10 @@ func register(m Metric) error {
120116
// Set flag that first metric is now registered.
121117
firstMetricRegistered = true
122118

119+
if module.Status() < modules.StatusStarting {
120+
return fmt.Errorf("registering metric %q too early", m.ID())
121+
}
122+
123123
return nil
124124
}
125125

metrics/persistence.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ var (
2525
})
2626

2727
// ErrAlreadyInitialized is returned when trying to initialize an option
28-
// more than once.
28+
// more than once or if the time window for initializing is over.
2929
ErrAlreadyInitialized = errors.New("already initialized")
3030
)
3131

@@ -55,7 +55,7 @@ func EnableMetricPersistence(key string) error {
5555

5656
// Load metrics from storage.
5757
var err error
58-
storage, err = getMetricsStorage(key)
58+
storage, err = getMetricsStorage(storageKey)
5959
switch {
6060
case err == nil:
6161
// Continue.

modules/modules.go

Lines changed: 43 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ type Module struct { //nolint:maligned
5757
// start
5858
startComplete chan struct{}
5959
// stop
60-
Ctx context.Context
61-
cancelCtx func()
62-
stopFlag *abool.AtomicBool
63-
stopComplete chan struct{}
60+
Ctx context.Context
61+
cancelCtx func()
62+
stopFlag *abool.AtomicBool
63+
stopCompleted *abool.AtomicBool
64+
stopComplete chan struct{}
6465

6566
// workers/tasks
6667
ctrlFuncRunning *abool.AtomicBool
@@ -255,12 +256,10 @@ func (m *Module) checkIfStopComplete() {
255256
atomic.LoadInt32(m.taskCnt) == 0 &&
256257
atomic.LoadInt32(m.microTaskCnt) == 0 {
257258

258-
m.Lock()
259-
defer m.Unlock()
260-
261-
if m.stopComplete != nil {
259+
if m.stopCompleted.SetToIf(false, true) {
260+
m.Lock()
261+
defer m.Unlock()
262262
close(m.stopComplete)
263-
m.stopComplete = nil
264263
}
265264
}
266265
}
@@ -283,60 +282,56 @@ func (m *Module) stop(reports chan *report) {
283282
// Reset start/stop signal channels.
284283
m.startComplete = make(chan struct{})
285284
m.stopComplete = make(chan struct{})
285+
m.stopCompleted.SetTo(false)
286286

287-
// Make a copy of the stop channel.
288-
stopComplete := m.stopComplete
289-
290-
// Set status and cancel context.
287+
// Set status.
291288
m.status = StatusStopping
292-
m.stopFlag.Set()
293-
m.cancelCtx()
294289

295-
go m.stopAllTasks(reports, stopComplete)
290+
go m.stopAllTasks(reports)
296291
}
297292

298-
func (m *Module) stopAllTasks(reports chan *report, stopComplete chan struct{}) {
299-
// start shutdown function
300-
var stopFnError error
301-
stopFuncRunning := abool.New()
302-
if m.stopFn != nil {
303-
stopFuncRunning.Set()
304-
go func() {
305-
stopFnError = m.runCtrlFn("stop module", m.stopFn)
306-
stopFuncRunning.UnSet()
307-
m.checkIfStopComplete()
308-
}()
309-
} else {
310-
m.checkIfStopComplete()
311-
}
293+
func (m *Module) stopAllTasks(reports chan *report) {
294+
// Manually set the control function flag in order to stop completion by race
295+
// condition before stop function has even started.
296+
m.ctrlFuncRunning.Set()
297+
298+
// Set stop flag for everyone checking this flag before we activate any stop trigger.
299+
m.stopFlag.Set()
300+
301+
// Cancel the context to notify all workers and tasks.
302+
m.cancelCtx()
303+
304+
// Start stop function.
305+
stopFnError := m.startCtrlFn("stop module", m.stopFn)
312306

313307
// wait for results
314308
select {
315-
case <-stopComplete:
316-
// case <-time.After(moduleStopTimeout):
309+
case <-m.stopComplete:
310+
// Complete!
317311
case <-time.After(moduleStopTimeout):
318312
log.Warningf(
319-
"%s: timed out while waiting for stopfn/workers/tasks to finish: stopFn=%v/%v workers=%d tasks=%d microtasks=%d, continuing shutdown...",
313+
"%s: timed out while waiting for stopfn/workers/tasks to finish: stopFn=%v workers=%d tasks=%d microtasks=%d, continuing shutdown...",
320314
m.Name,
321-
stopFuncRunning.IsSet(), m.ctrlFuncRunning.IsSet(),
315+
m.ctrlFuncRunning.IsSet(),
322316
atomic.LoadInt32(m.workerCnt),
323317
atomic.LoadInt32(m.taskCnt),
324318
atomic.LoadInt32(m.microTaskCnt),
325319
)
326320
}
327321

328-
// collect error
322+
// Check for stop fn status.
329323
var err error
330-
if stopFuncRunning.IsNotSet() && stopFnError != nil {
331-
err = stopFnError
332-
}
333-
// set status
334-
if err != nil {
335-
m.Error(
336-
fmt.Sprintf("%s:stop-failed", m.Name),
337-
fmt.Sprintf("Stopping module %s failed", m.Name),
338-
fmt.Sprintf("Failed to stop module: %s", err.Error()),
339-
)
324+
select {
325+
case err = <-stopFnError:
326+
if err != nil {
327+
// Set error as module error.
328+
m.Error(
329+
fmt.Sprintf("%s:stop-failed", m.Name),
330+
fmt.Sprintf("Stopping module %s failed", m.Name),
331+
fmt.Sprintf("Failed to stop module: %s", err.Error()),
332+
)
333+
}
334+
default:
340335
}
341336

342337
// Always set to offline in order to let other modules shutdown in order.
@@ -384,7 +379,7 @@ func initNewModule(name string, prep, start, stop func() error, dependencies ...
384379
Name: name,
385380
enabled: abool.NewBool(false),
386381
enabledAsDependency: abool.NewBool(false),
387-
sleepMode: abool.NewBool(true),
382+
sleepMode: abool.NewBool(true), // Change (for init) is triggered below.
388383
sleepWaitingChannel: make(chan time.Time),
389384
prepFn: prep,
390385
startFn: start,
@@ -393,6 +388,7 @@ func initNewModule(name string, prep, start, stop func() error, dependencies ...
393388
Ctx: ctx,
394389
cancelCtx: cancelCtx,
395390
stopFlag: abool.NewBool(false),
391+
stopCompleted: abool.NewBool(true),
396392
ctrlFuncRunning: abool.NewBool(false),
397393
workerCnt: &workerCnt,
398394
taskCnt: &taskCnt,
@@ -401,7 +397,7 @@ func initNewModule(name string, prep, start, stop func() error, dependencies ...
401397
depNames: dependencies,
402398
}
403399

404-
// Sleep mode is disabled by default
400+
// Sleep mode is disabled by default.
405401
newModule.Sleep(false)
406402

407403
return newModule

0 commit comments

Comments
 (0)