Skip to content

Commit d5ccf9c

Browse files
authored
feat: move metrics server config to the policy file (#1572)
* feat(config): add metrics bind config to policy file with flag hack Signed-off-by: Xe Iaso <[email protected]> * feat(internal): move SetupListener from main Signed-off-by: Xe Iaso <[email protected]> * fix(main): use internal.SetupListener Signed-off-by: Xe Iaso <[email protected]> * fix(config): add metrics socket mode Signed-off-by: Xe Iaso <[email protected]> * feat: move metrics server to a dedicated package Signed-off-by: Xe Iaso <[email protected]> * doc: add metrics server configuration docs Signed-off-by: Xe Iaso <[email protected]> * doc(default-config): add vague references to metrics server Signed-off-by: Xe Iaso <[email protected]> * chore: spelling Signed-off-by: Xe Iaso <[email protected]> --------- Signed-off-by: Xe Iaso <[email protected]>
1 parent 3a6e368 commit d5ccf9c

45 files changed

Lines changed: 703 additions & 143 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/actions/spelling/allow.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ unipromos
3737
Samsung
3838
wenet
3939
qwertiko
40+
setuplistener

cmd/anubis/main.go

Lines changed: 26 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ import (
1717
"net"
1818
"net/http"
1919
"net/http/httputil"
20-
"net/http/pprof"
2120
"net/url"
2221
"os"
2322
"os/signal"
2423
"path/filepath"
25-
"strconv"
2624
"strings"
2725
"sync"
2826
"syscall"
@@ -33,12 +31,12 @@ import (
3331
"github.com/TecharoHQ/anubis/internal"
3432
libanubis "github.com/TecharoHQ/anubis/lib"
3533
"github.com/TecharoHQ/anubis/lib/config"
34+
"github.com/TecharoHQ/anubis/lib/metrics"
3635
botPolicy "github.com/TecharoHQ/anubis/lib/policy"
3736
"github.com/TecharoHQ/anubis/lib/thoth"
3837
"github.com/TecharoHQ/anubis/web"
3938
"github.com/facebookgo/flagenv"
4039
_ "github.com/joho/godotenv/autoload"
41-
"github.com/prometheus/client_golang/prometheus/promhttp"
4240
healthv1 "google.golang.org/grpc/health/grpc_health_v1"
4341
)
4442

@@ -119,33 +117,6 @@ func doHealthCheck() error {
119117
return nil
120118
}
121119

122-
// parseBindNetFromAddr determine bind network and address based on the given network and address.
123-
func parseBindNetFromAddr(address string) (string, string) {
124-
defaultScheme := "http://"
125-
if !strings.Contains(address, "://") {
126-
if strings.HasPrefix(address, ":") {
127-
address = defaultScheme + "localhost" + address
128-
} else {
129-
address = defaultScheme + address
130-
}
131-
}
132-
133-
bindUri, err := url.Parse(address)
134-
if err != nil {
135-
log.Fatal(fmt.Errorf("failed to parse bind URL: %w", err))
136-
}
137-
138-
switch bindUri.Scheme {
139-
case "unix":
140-
return "unix", bindUri.Path
141-
case "tcp", "http", "https":
142-
return "tcp", bindUri.Host
143-
default:
144-
log.Fatal(fmt.Errorf("unsupported network scheme %s in address %s", bindUri.Scheme, address))
145-
}
146-
return "", address
147-
}
148-
149120
func parseSameSite(s string) http.SameSite {
150121
switch strings.ToLower(s) {
151122
case "none":
@@ -162,53 +133,6 @@ func parseSameSite(s string) http.SameSite {
162133
return http.SameSiteDefaultMode
163134
}
164135

165-
func setupListener(network string, address string) (net.Listener, string) {
166-
formattedAddress := ""
167-
168-
if network == "" {
169-
// keep compatibility
170-
network, address = parseBindNetFromAddr(address)
171-
}
172-
173-
switch network {
174-
case "unix":
175-
formattedAddress = "unix:" + address
176-
case "tcp":
177-
if strings.HasPrefix(address, ":") { // assume it's just a port e.g. :4259
178-
formattedAddress = "http://localhost" + address
179-
} else {
180-
formattedAddress = "http://" + address
181-
}
182-
default:
183-
formattedAddress = fmt.Sprintf(`(%s) %s`, network, address)
184-
}
185-
186-
listener, err := net.Listen(network, address)
187-
if err != nil {
188-
log.Fatal(fmt.Errorf("failed to bind to %s: %w", formattedAddress, err))
189-
}
190-
191-
// additional permission handling for unix sockets
192-
if network == "unix" {
193-
mode, err := strconv.ParseUint(*socketMode, 8, 0)
194-
if err != nil {
195-
listener.Close()
196-
log.Fatal(fmt.Errorf("could not parse socket mode %s: %w", *socketMode, err))
197-
}
198-
199-
err = os.Chmod(address, os.FileMode(mode))
200-
if err != nil {
201-
err := listener.Close()
202-
if err != nil {
203-
log.Printf("failed to close listener: %v", err)
204-
}
205-
log.Fatal(fmt.Errorf("could not change socket mode: %w", err))
206-
}
207-
}
208-
209-
return listener, formattedAddress
210-
}
211-
212136
func makeReverseProxy(target string, targetSNI string, targetHost string, insecureSkipVerify bool, targetDisableKeepAlive bool) (http.Handler, error) {
213137
targetUri, err := url.Parse(target)
214138
if err != nil {
@@ -304,11 +228,6 @@ func main() {
304228

305229
wg := new(sync.WaitGroup)
306230

307-
if *metricsBind != "" {
308-
wg.Add(1)
309-
go metricsServer(ctx, *lg.With("subsystem", "metrics"), wg.Done)
310-
}
311-
312231
var rp http.Handler
313232
// when using anubis via Systemd and environment variables, then it is not possible to set targe to an empty string but only to space
314233
if strings.TrimSpace(*target) != "" {
@@ -348,6 +267,26 @@ func main() {
348267
lg.Debug("swapped to new logger")
349268
slog.SetDefault(lg)
350269

270+
if *metricsBind != "" || policy.Metrics != nil {
271+
wg.Add(1)
272+
273+
ms := &metrics.Server{
274+
Config: policy.Metrics,
275+
Log: lg,
276+
}
277+
278+
if policy.Metrics == nil {
279+
lg.Debug("migrating flags to metrics config", "bind", *metricsBind, "network", *metricsBindNetwork, "socket-mode", *socketMode)
280+
ms.Config = &config.Metrics{
281+
Bind: *metricsBind,
282+
Network: *metricsBindNetwork,
283+
SocketMode: *socketMode,
284+
}
285+
}
286+
287+
go ms.Run(ctx, wg.Done)
288+
}
289+
351290
// Warn if persistent storage is used without a configured signing key
352291
if policy.Store.IsPersistent() {
353292
if *hs512Secret == "" && *ed25519PrivateKeyHex == "" && *ed25519PrivateKeyHexFile == "" {
@@ -484,7 +423,11 @@ func main() {
484423
h = internal.JA4H(h)
485424

486425
srv := http.Server{Handler: h, ErrorLog: internal.GetFilteredHTTPLogger()}
487-
listener, listenerUrl := setupListener(*bindNetwork, *bind)
426+
listener, listenerUrl, err := internal.SetupListener(*bindNetwork, *bind, *socketMode)
427+
if err != nil {
428+
log.Fatalf("SetupListener(%q, %q, %q): %v", *bindNetwork, *bind, *socketMode, err)
429+
}
430+
488431
lg.Info(
489432
"listening",
490433
"url", listenerUrl,
@@ -519,53 +462,6 @@ func main() {
519462
wg.Wait()
520463
}
521464

522-
func metricsServer(ctx context.Context, lg slog.Logger, done func()) {
523-
defer done()
524-
525-
mux := http.NewServeMux()
526-
mux.HandleFunc("GET /debug/pprof/", pprof.Index)
527-
mux.HandleFunc("GET /debug/pprof/cmdline", pprof.Cmdline)
528-
mux.HandleFunc("GET /debug/pprof/profile", pprof.Profile)
529-
mux.HandleFunc("GET /debug/pprof/symbol", pprof.Symbol)
530-
mux.HandleFunc("GET /debug/pprof/trace", pprof.Trace)
531-
mux.Handle("/metrics", promhttp.Handler())
532-
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
533-
st, ok := internal.GetHealth("anubis")
534-
if !ok {
535-
slog.Error("health service anubis does not exist, file a bug")
536-
}
537-
538-
switch st {
539-
case healthv1.HealthCheckResponse_NOT_SERVING:
540-
http.Error(w, "NOT OK", http.StatusInternalServerError)
541-
return
542-
case healthv1.HealthCheckResponse_SERVING:
543-
fmt.Fprintln(w, "OK")
544-
return
545-
default:
546-
http.Error(w, "UNKNOWN", http.StatusFailedDependency)
547-
return
548-
}
549-
})
550-
551-
srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()}
552-
listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind)
553-
lg.Debug("listening for metrics", "url", metricsUrl)
554-
555-
go func() {
556-
<-ctx.Done()
557-
c, cancel := context.WithTimeout(context.Background(), 5*time.Second)
558-
defer cancel()
559-
if err := srv.Shutdown(c); err != nil {
560-
log.Printf("cannot shut down: %v", err)
561-
}
562-
}()
563-
564-
if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) {
565-
log.Fatal(err)
566-
}
567-
}
568-
569465
func extractEmbedFS(fsys embed.FS, root string, destDir string) error {
570466
return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
571467
if err != nil {

data/botPolicies.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,15 @@ status_codes:
166166
CHALLENGE: 200
167167
DENY: 200
168168

169+
# # Configuration for the metrics server. See the docs for more information:
170+
# #
171+
# # https://anubis.techaro.lol/docs/admin/policies#metrics-server
172+
# #
173+
# # This is commented out by default so that command line flags take precedence.
174+
# metrics:
175+
# bind: ":9090"
176+
# network: "tcp"
177+
169178
# Anubis can store temporary data in one of a few backends. See the storage
170179
# backends section of the docs for more information:
171180
#

docs/docs/CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
## [Unreleased]
1313

14+
<!-- This changes the project to: -->
15+
16+
- Move metrics server configuration to [the policy file](./admin/policies.mdx#metrics-server).
1417
- Expose [pprof endpoints](https://pkg.go.dev/net/http/pprof) on the metrics listener to enable profiling Anubis in production.
1518
- fix: prevent nil pointer panic in challenge validation when threshold rules match during PassChallenge (#1463)
1619
- Instruct reverse proxies to not cache error pages.
1720
- Fixed mixed tab/space indentation in Caddy documentation code block
1821
- Improve error messages and fix broken REDIRECT_DOMAINS link in docs ([#1193](https://github.com/TecharoHQ/anubis/issues/1193))
1922
- Add Bulgarian locale ([#1394](https://github.com/TecharoHQ/anubis/pull/1394))
20-
21-
<!-- This changes the project to: -->
2223
- Fix CEL internal errors when iterating `headers`/`query` map wrappers by implementing map iterators for `HTTPHeaders` and `URLValues` ([#1465](https://github.com/TecharoHQ/anubis/pull/1465)).
2324

2425
## v1.25.0: Necron

docs/docs/admin/installation.mdx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,15 @@ Anubis uses these environment variables for configuration:
8787
| `ED25519_PRIVATE_KEY_HEX_FILE` | unset | Path to a file containing the hex-encoded ed25519 private key. Only one of this or its sister option may be set. **Required when using persistent storage backends** (like bbolt) to ensure challenges survive service restarts. When running multiple instances on the same base domain, the key must be the same across all instances. |
8888
| `ERROR_TITLE` | unset | <EO /> If set, override the translation stack to show a custom title for error pages such as "Something went wrong!". See [Customizing messages](./botstopper.mdx#customizing-messages) for more details. |
8989
| `JWT_RESTRICTION_HEADER` | `X-Real-IP` | If set, the JWT is only valid if the current value of this header matches the value when the JWT was created. You can use it e.g. to restrict a JWT to the source IP of the user using `X-Real-IP`. |
90-
| `METRICS_BIND` | `:9090` | The network address that Anubis serves Prometheus metrics on. See `BIND` for more information. |
91-
| `METRICS_BIND_NETWORK` | `tcp` | The address family that the Anubis metrics server listens on. See `BIND_NETWORK` for more information. |
90+
| `METRICS_BIND` | `:9090` | The legacy configuration value for the network address that Anubis serves Prometheus metrics on. Please migrate this to [the policy file](./policies.mdx#metrics-server) as soon as possible. |
91+
| `METRICS_BIND_NETWORK` | `tcp` | The legacy configuration value for the address family that Anubis serves Prometheus metrics on. Please migrate this to [the policy file](./policies.mdx#metrics-server) as soon as possible. |
9292
| `OG_EXPIRY_TIME` | `24h` | The expiration time for the Open Graph tag cache. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
9393
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
9494
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
9595
| `OVERLAY_FOLDER` | unset | <EO /> If set, treat the given path as an [overlay folder](./botstopper.mdx#custom-images-and-css), allowing you to customize CSS, fonts, images, and add other assets to BotStopper deployments. |
9696
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
9797
| `PUBLIC_URL` | unset | The externally accessible URL for this Anubis instance, used for constructing redirect URLs (e.g., for Traefik forwardAuth). Leave it unset when Anubis terminates traffic directly (sidecar/standalone deployments) or redirect building will fail with `redir=null`. |
98-
| `REDIRECT_DOMAINS` | unset | Comma-separated list of domain names that Anubis should allow redirects to when passing a challenge. See [Redirect Domain Configuration](./configuration/redirect-domains.mdx) for more details. |
98+
| `REDIRECT_DOMAINS` | unset | Comma-separated list of domain names that Anubis should allow redirects to when passing a challenge. See [Redirect Domain Configuration](./configuration/redirect-domains.mdx) for more details. |
9999
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
100100
| `SLOG_LEVEL` | `INFO` | The log level for structured logging. Valid values are `DEBUG`, `INFO`, `WARN`, and `ERROR`. Set to `DEBUG` to see all requests, evaluations, and detailed diagnostic information. |
101101
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |

docs/docs/admin/policies.mdx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,27 @@ remote_addresses:
117117
- 100.64.0.0/10
118118
```
119119

120+
## Metrics server
121+
122+
Anubis includes support for [Prometheus-style metrics](https://prometheus.io/docs/introduction/overview/), allowing systems administrators to monitor Anubis' performance and effectiveness. This is a separate HTTP server with metrics, health checking, and debug routes.
123+
124+
Anubis' metrics server is configured with the `metrics` block in the configuration file:
125+
126+
```yaml
127+
metrics:
128+
bind: ":9090"
129+
network: "tcp"
130+
```
131+
132+
If you want to bind metrics to a Unix socket, make sure to set the network to `unix` and add a socket mode:
133+
134+
```yaml
135+
metrics:
136+
bind: "/tmp/anubis_metrics.sock"
137+
network: unix
138+
socketMode: "0700" # must be a string
139+
```
140+
120141
## Imprint / Impressum support
121142

122143
Anubis has support for showing imprint / impressum information. This is defined in the `impressum` block of your configuration. See [Imprint / Impressum configuration](./configuration/impressum.mdx) for more information.

0 commit comments

Comments
 (0)