diff --git a/Makefile b/Makefile index 71ddd5d22..63c0340c8 100644 --- a/Makefile +++ b/Makefile @@ -155,3 +155,8 @@ build/ve/bin/activate: scripts/requirements.txt scripts/requirements-dev.txt .PHONY: yamllint yamllint: ve build/ve/bin/yamllint -d '{extends: default, rules: {line-length: disable}}' schemas/*.yml + +# Build the ECS MCP server. +.PHONY: ecs-mcp +ecs-mcp: + go -C go-ecs build -o ../build github.com/elastic/ecs/go-ecs/cmd/ecs-mcp diff --git a/go-ecs/cmd/ecs-mcp/main.go b/go-ecs/cmd/ecs-mcp/main.go new file mode 100644 index 000000000..c631b78c0 --- /dev/null +++ b/go-ecs/cmd/ecs-mcp/main.go @@ -0,0 +1,301 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package main + +import ( + "bytes" + "context" + "errors" + "flag" + "fmt" + "log/slog" + "net/http" + "os" + "os/signal" + "path/filepath" + "runtime/debug" + "strconv" + "strings" + "time" + + "github.com/Masterminds/semver/v3" + "github.com/gorilla/handlers" + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/elastic/ecs/go-ecs/internal/field" + "github.com/elastic/ecs/go-ecs/internal/git" + ecsmcp "github.com/elastic/ecs/go-ecs/internal/mcp" + "github.com/elastic/ecs/go-ecs/internal/store" + + _ "modernc.org/sqlite" +) + +var ( + dbFile string + ecsDir string + listen string + certFile string + keyFile string + insecure bool + enableDebug bool + showVersion bool +) + +func parseArgs() { + flag.StringVar(&dbFile, "db", "", "path to database file (when omitted, creates a temporary db that is removed on exit)") + flag.StringVar(&ecsDir, "dir", "", "path to local checkout of ECS") + flag.StringVar(&listen, "listen", "", "listen for HTTP requests on this address, instead of stdin/stdout") + flag.StringVar(&certFile, "cert", "cert.pem", "path to TLS certificate file") + flag.StringVar(&keyFile, "key", "key.pem", "path to TLS private key file") + flag.BoolVar(&insecure, "insecure", false, "disable TLS") + flag.BoolVar(&showVersion, "version", false, "print version information and exit") + flag.BoolVar(&enableDebug, "debug", false, "enable debug logging") + + flag.Parse() +} + +func readEnv() { + getStringEnv("ECS_MCP_DIR", &ecsDir) + getStringEnv("ECS_MCP_LISTEN", &listen) + getStringEnv("ECS_MCP_CERT_FILE", &certFile) + getStringEnv("ECS_MCP_KEY_FILE", &keyFile) + getBoolEnv("ECS_MCP_INSECURE", &insecure) + getBoolEnv("ECS_MCP_DEBUG", &enableDebug) +} + +func getStringEnv(key string, target *string) { + if value, ok := os.LookupEnv(key); ok { + *target = value + } +} + +func getBoolEnv(key string, target *bool) { + if value, ok := os.LookupEnv(key); ok { + if v, err := strconv.ParseBool(value); err == nil { + *target = v + } else { + slog.Warn("Unable to parse boolean from environment variable", slog.String("env", key)) + } + } +} + +func getVersion() (modVer, vcsRef string) { + info, ok := debug.ReadBuildInfo() + if !ok { + return "", "" + } + + modVer = info.Main.Version + vcsRef = "unknown" + for _, setting := range info.Settings { + if setting.Key == "vcs.revision" { + vcsRef = setting.Value + break + } + } + + return modVer, vcsRef +} + +func getTags(ctx context.Context, gitRepo *git.Repo) ([]string, error) { + minVersion := semver.MustParse("v1.12.0") + + rawTags, err := gitRepo.Tags(ctx) + if err != nil { + return nil, err + } + + var tags []string + for _, tag := range rawTags { + if !strings.HasPrefix(tag, "v") { + continue + } + ver, err := semver.NewVersion(tag) + if err != nil || ver.LessThan(minVersion) { + continue + } + + tags = append(tags, tag) + } + + return tags, nil +} + +func getSchemas(ctx context.Context, repo *git.Repo) ([]*field.Schema, error) { + var schemas []*field.Schema + + tags, err := getTags(ctx, repo) + if err != nil { + return nil, err + } + + seenVersions := map[string]struct{}{} + + for _, tag := range tags { + ref, err := repo.TagToHash(ctx, tag) + if err != nil { + return nil, err + } + + versionRaw, err := repo.ReadFile(ctx, ref, "version") + if err != nil { + return nil, err + } + version := string(bytes.TrimSpace(versionRaw)) + + if _, ok := seenVersions[version]; ok { + continue + } + seenVersions[version] = struct{}{} + + defRaw, err := repo.ReadFile(ctx, ref, "generated/ecs/ecs_nested.yml") + if err != nil { + return nil, err + } + schema, err := field.Parse(defRaw) + if err != nil { + return nil, err + } + schema.Version = version + schemas = append(schemas, schema) + } + + return schemas, nil +} + +// Main fetches the ECS schema, loads it into a SQLite database, and runs the +// MCP server over either HTTP or stdio depending on command-line flags. +func Main() error { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + + repo, err := git.NewRepo(ctx, ecsDir, "https://github.com/elastic/ecs.git") + if err != nil { + return err + } + + schemas, err := getSchemas(ctx, repo) + if err != nil { + return err + } + + // Store fields. + if dbFile == "" { + dbFile = filepath.Join(os.TempDir(), fmt.Sprintf("ecs-mcp-%d.db", os.Getpid())) + slog.Info("Using temporary database file", slog.String("path", dbFile)) + defer os.Remove(dbFile) + } + + db, err := store.NewDB(ctx, dbFile, schemas) + if err != nil { + return err + } + defer db.Close() + + // Run MCP server. + modVer, vcsRef := getVersion() + mcpSrv := mcp.NewServer(&mcp.Implementation{ + Name: "ecs-mcp", + Version: modVer + "(" + vcsRef + ")", + }, nil) + ecsmcp.AddTools(mcpSrv, store.DDL, db) + ecsmcp.AddPrompts(mcpSrv) + + if listen != "" { + var handler http.Handler = mcp.NewStreamableHTTPHandler( + func(r *http.Request) *mcp.Server { return mcpSrv }, + &mcp.StreamableHTTPOptions{ + Stateless: true, + }, + ) + handler = handlers.CombinedLoggingHandler(os.Stderr, handler) + + httpSrv := &http.Server{ + Addr: listen, + Handler: handler, + } + doneCh := make(chan struct{}) + + go func() { + timeoutCtx, timeoutCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer timeoutCancel() + + <-ctx.Done() + + _ = httpSrv.Shutdown(timeoutCtx) + close(doneCh) + }() + + srvURL := listen + if strings.HasPrefix(listen, ":") { + srvURL = "localhost" + srvURL + } + if insecure { + srvURL = "http://" + srvURL + } else { + srvURL = "https://" + srvURL + } + + slog.Info("Starting server", slog.String("listen", httpSrv.Addr), slog.String("url", srvURL)) + + if insecure { + err = httpSrv.ListenAndServe() + } else { + err = httpSrv.ListenAndServeTLS(certFile, keyFile) + } + if err != nil { + if errors.Is(err, http.ErrServerClosed) { + err = nil + } + cancel() + } + <-doneCh + + slog.Info("Server shut down", slog.String("listen", httpSrv.Addr)) + + return err + } + + t := &mcp.LoggingTransport{ + Transport: &mcp.StdioTransport{}, + Writer: os.Stderr, + } + + if err = mcpSrv.Run(ctx, t); err != nil && !errors.Is(err, context.Canceled) { + return fmt.Errorf("failed to run stdio server: %w", err) + } + + return nil +} + +func main() { + parseArgs() + readEnv() + + if showVersion { + modVer, vcsRef := getVersion() + _, _ = fmt.Fprintf(os.Stderr, "ecs-mcp version %s [commit %v]\n", modVer, vcsRef) + os.Exit(0) + } + + level := slog.LevelInfo + if enableDebug { + level = slog.LevelDebug + } + logHandler := slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: level}) + slog.SetDefault(slog.New(logHandler)) + + var err error + if ecsDir == "" { + if ecsDir, err = os.Getwd(); err != nil { + slog.Error("Failed to get current working directory", slog.String("error", err.Error())) + os.Exit(1) + } + } + if err = Main(); err != nil { + slog.Error("Error running app", slog.String("error", err.Error())) + os.Exit(1) + } +} diff --git a/go-ecs/docs/ecs-mcp.md b/go-ecs/docs/ecs-mcp.md new file mode 100644 index 000000000..5ef172153 --- /dev/null +++ b/go-ecs/docs/ecs-mcp.md @@ -0,0 +1,135 @@ +# ecs-mcp + +A [Model Context Protocol](https://modelcontextprotocol.io/) server that exposes the +[Elastic Common Schema (ECS)](https://www.elastic.co/guide/en/ecs/current/index.html) to +LLM-powered tools. It loads every tagged ECS release into a local SQLite database and +serves tools for searching, matching, and querying the schema at a specific version — +so agents can reliably align vendor or custom fields with ECS without hallucinating +field names. + +## Features + +- **All tagged ECS releases in one database** — reads every tag (>= v1.12.0) from a + local checkout of `elastic/ecs` and loads each release into a single SQLite database + with a full-text search index over field names and descriptions. Tool callers select + which version to query at call time. +- **MCP tools for exploration** — inspect the database schema, run ad-hoc SQL, check + whether a list of field names exists in a given ECS version, or full-text search + field definitions for that version. +- **MCP prompt: `ecs_guide`** — a ready-to-use guide that primes the model on how to + combine the tools effectively, including how to select a version. +- **Flexible transports** — runs over stdio (for local MCP clients) or streamable HTTP + with optional TLS (for shared deployments). + +## Installation + +### Build from source + +Requires Go 1.26 or later. + +```sh +git clone https://github.com/elastic/ecs +cd ecs +make ecs-mcp +``` + +The resulting `ecs-mcp` binary is self-contained. + +### Running the server + +On startup the server reads all tagged ECS releases from the local checkout at +`-dir` (cloned on first run if the path does not yet exist) and loads each one +into the database. The server then runs over stdio by default: + +```sh +./ecs-mcp -dir /path/to/ecs +``` + +`-dir` is required. + +Useful flags (also settable via environment variable): + +| Flag | Env | Description | +|-------------|---------------------|-------------------------------------------------------------------------------------------------------------| +| `-dir` | `ECS_MCP_DIR` | Path to a local checkout of `elastic/ecs`. Cloned on if it does not exist. **Required.** | +| `-db` | — | Path to a SQLite file to persist the loaded schema. When omitted, a temp DB is created and removed on exit. | +| `-listen` | `ECS_MCP_LISTEN` | Run an HTTP server on this address (e.g. `localhost:8443`) instead of stdio. | +| `-cert` | `ECS_MCP_CERT_FILE` | TLS certificate for HTTP mode. Default `cert.pem`. | +| `-key` | `ECS_MCP_KEY_FILE` | TLS key for HTTP mode. Default `key.pem`. | +| `-insecure` | `ECS_MCP_INSECURE` | Disable TLS in HTTP mode. | +| `-debug` | `ECS_MCP_DEBUG` | Enable debug logging. | +| `-version` | — | Print version information and exit. | + +## MCP server setup + +### Claude / Claude Code + +Add an entry to your MCP client config (`claude_desktop_config.json` for the desktop app, +`~/.claude.json` or a project `.mcp.json` for Claude Code): + +```json +{ + "mcpServers": { + "ecs": { + "command": "/absolute/path/to/ecs-mcp", + "args": ["-dir", "/absolute/path/to/ecs"] + } + } +} +``` + +Or from the Claude Code CLI: + +```sh +claude mcp add ecs /absolute/path/to/ecs-mcp -- -dir /absolute/path/to/ecs +``` + +Restart the client and confirm the `ecs` server is connected. The `ecs_guide` prompt and +`ecs_*` tools should be available. + +### Cursor + +Add the server to `~/.cursor/mcp.json` (global) or `.cursor/mcp.json` in your project: + +```json +{ + "mcpServers": { + "ecs": { + "command": "/absolute/path/to/ecs-mcp", + "args": ["-dir", "/absolute/path/to/ecs"] + } + } +} +``` + +Reload Cursor and verify the server shows as connected under Settings → MCP. + +### HTTP transport + +To share one instance across multiple clients, run with `-listen`: + +```sh +./ecs-mcp -listen :8443 -cert cert.pem -key key.pem +# or, run without HTTPS: +./ecs-mcp -listen :8080 -insecure +``` + +Then point clients that support streamable HTTP MCP at `https://host:8443/`. + +## MCP tools + +Every tool that returns field data requires an ECS `version` argument (e.g. +`"9.3.0"`), since the database holds every tagged release. To list the versions +that are loaded, run `SELECT DISTINCT version FROM fields ORDER BY version;` +via `ecs_execute_sql_query`. + +| Tool | Arguments | Purpose | +|---|---|---| +| `ecs_get_sql_tables` | — | Returns the full DDL for the loaded ECS database — the catalog of tables, columns, and types. Call this first so subsequent queries target real columns. | +| `ecs_execute_sql_query` | `statement` | Executes an arbitrary read-only SQLite query against the ECS database. Useful for targeted lookups, fieldset membership checks, filtering by `level`, etc. Queries against `fields`/`fieldsets`/`expected_event_types` should filter by `version`, otherwise rows from every loaded release are merged. | +| `ecs_match_fields` | `version`, `field_names` | Given up to 500 dotted field names and an ECS `version`, returns each annotated with whether it exists in that version of ECS, plus the ECS type and description for matches. Ideal for bulk alignment checks. | +| `ecs_search_fields` | `version`, `query`, optional `limit` | Full-text search across ECS field names and descriptions for the given `version`. Accepts plain keywords, dotted paths, or camelCase identifiers — `crowdstrike.fdr.ProcessTTYAttached` is tokenized automatically and surfaces `process.tty`-family fields. | + +In addition, the server registers the `ecs_guide` prompt, which returns a short +walkthrough of how to combine the tools when mapping fields to ECS, including +how to select a version. diff --git a/go-ecs/go.mod b/go-ecs/go.mod new file mode 100644 index 000000000..b431fab93 --- /dev/null +++ b/go-ecs/go.mod @@ -0,0 +1,29 @@ +module github.com/elastic/ecs/go-ecs + +go 1.26 + +require ( + github.com/goccy/go-yaml v1.19.2 + github.com/gorilla/handlers v1.5.2 + github.com/modelcontextprotocol/go-sdk v1.5.0 + modernc.org/sqlite v1.50.0 +) + +require ( + github.com/Masterminds/semver/v3 v3.4.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/google/jsonschema-go v0.4.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.42.0 // indirect + modernc.org/libc v1.72.0 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect +) diff --git a/go-ecs/go.sum b/go-ecs/go.sum new file mode 100644 index 000000000..e86722eab --- /dev/null +++ b/go-ecs/go.sum @@ -0,0 +1,75 @@ +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= +github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= +github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= +github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modelcontextprotocol/go-sdk v1.5.0 h1:CHU0FIX9kpueNkxuYtfYQn1Z0slhFzBZuq+x6IiblIU= +github.com/modelcontextprotocol/go-sdk v1.5.0/go.mod h1:gggDIhoemhWs3BGkGwd1umzEXCEMMvAnhTrnbXJKKKA= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= +golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U= +modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8= +modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU= +modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0= +modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= +modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo= +modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= +modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= +modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c= +modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= +modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.50.0 h1:eMowQSWLK0MeiQTdmz3lqoF5dqclujdlIKeJA11+7oM= +modernc.org/sqlite v1.50.0/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/go-ecs/internal/field/field.go b/go-ecs/internal/field/field.go new file mode 100644 index 000000000..2ea328687 --- /dev/null +++ b/go-ecs/internal/field/field.go @@ -0,0 +1,31 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package field + +// Field represents a single ECS field. +type Field struct { + Name string + Type string + Level string + Short string + Description string + AllowedValues []AllowedValue + IsArray bool + Fieldsets []*Fieldset + Example string +} + +// AllowedValue represents a single value permitted for an ECS field. +type AllowedValue struct { + Name string + Description string +} + +// ExpectedEventType describes the event types expected to accompany a given +// event category (e.g. event.category). +type ExpectedEventType struct { + Category string + Types []string +} diff --git a/go-ecs/internal/field/fieldset.go b/go-ecs/internal/field/fieldset.go new file mode 100644 index 000000000..1fdecd5bb --- /dev/null +++ b/go-ecs/internal/field/fieldset.go @@ -0,0 +1,13 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package field + +// Fieldset represents an ECS fieldset. +type Fieldset struct { + Name string + Short string + Description string + TopLevel bool +} diff --git a/go-ecs/internal/field/parse.go b/go-ecs/internal/field/parse.go new file mode 100644 index 000000000..824d68ed5 --- /dev/null +++ b/go-ecs/internal/field/parse.go @@ -0,0 +1,156 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package field + +import ( + "slices" + "sort" + + "github.com/goccy/go-yaml" +) + +type allowedValueDef struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + ExpectedEventTypes []string `yaml:"expected_event_types"` +} + +type fieldDef struct { + Type string `yaml:"type"` + Level string `yaml:"level"` + Short string `yaml:"short"` + Example string `yaml:"example"` + Description string `yaml:"description"` + AllowedValues []allowedValueDef `yaml:"allowed_values"` + Normalize any `yaml:"normalize"` + OriginalFieldset string `yaml:"original_fieldset"` +} + +type fieldsetDef struct { + Fields map[string]fieldDef `yaml:"fields"` + Description string `yaml:"description"` + Short string `yaml:"short"` + + Reusable *struct { + TopLevel bool `yaml:"top_level"` + } `yaml:"reusable"` +} + +func (f *fieldsetDef) allowedAtRoot() bool { + if f.Reusable != nil && !f.Reusable.TopLevel { + return false + } + + return true +} + +// Parse will parse Fieldsets and Fields from the provided data. The data is +// expected to be in the format of a generated ecs_nested.yml file. +func Parse(data []byte) (*Schema, error) { + var schema Schema + + var raw map[string]fieldsetDef + if err := yaml.Unmarshal(data, &raw); err != nil { + return nil, err + } + + fieldsetMap := map[string]*Fieldset{} + fieldMap := map[string]*Field{} + + // Pass 1: Get fieldsets + for kfs, vfs := range raw { + fs := &Fieldset{ + Name: kfs, + Short: vfs.Short, + Description: vfs.Description, + TopLevel: true, + } + if vfs.Reusable != nil { + fs.TopLevel = vfs.Reusable.TopLevel + } + fieldsetMap[kfs] = fs + } + + // Pass 2: Get fields + for kfs, vfs := range raw { + if !vfs.allowedAtRoot() { + continue + } + for kf, vf := range vfs.Fields { + f := &Field{ + Name: kf, + Type: vf.Type, + Level: vf.Level, + Short: vf.Short, + Description: vf.Description, + Fieldsets: []*Fieldset{fieldsetMap[kfs]}, + Example: vf.Example, + } + switch v := vf.Normalize.(type) { + case []any: + slices.ContainsFunc(v, func(item any) bool { + switch vi := item.(type) { + case string: + if vi == "array" { + f.IsArray = true + return true + } + } + return false + }) + case string: + f.IsArray = v == "array" + } + if vf.OriginalFieldset != "" { + fs := fieldsetMap[vf.OriginalFieldset] + if !slices.Contains(f.Fieldsets, fs) { + f.Fieldsets = append(f.Fieldsets, fieldsetMap[vf.OriginalFieldset]) + } + } + if len(vf.AllowedValues) > 0 { + f.AllowedValues = make([]AllowedValue, 0, len(vf.AllowedValues)) + for _, v := range vf.AllowedValues { + f.AllowedValues = append(f.AllowedValues, AllowedValue{ + Name: v.Name, + Description: v.Description, + }) + if len(v.ExpectedEventTypes) > 0 { + eet := &ExpectedEventType{ + Category: v.Name, + Types: make([]string, 0, len(v.ExpectedEventTypes)), + } + for _, eev := range v.ExpectedEventTypes { + eet.Types = append(eet.Types, eev) + } + schema.ExpectedEventTypes = append(schema.ExpectedEventTypes, eet) + } + } + } + + fieldMap[kf] = f + } + } + + schema.Fieldsets = make([]*Fieldset, 0, len(fieldsetMap)) + for _, fieldset := range fieldsetMap { + schema.Fieldsets = append(schema.Fieldsets, fieldset) + } + sort.Slice(schema.Fieldsets, func(i, j int) bool { + return schema.Fieldsets[i].Name < schema.Fieldsets[j].Name + }) + + schema.Fields = make([]*Field, 0, len(fieldMap)) + for _, field := range fieldMap { + schema.Fields = append(schema.Fields, field) + } + sort.Slice(schema.Fields, func(i, j int) bool { + return schema.Fields[i].Name < schema.Fields[j].Name + }) + sort.Slice(schema.ExpectedEventTypes, func(i, j int) bool { + return schema.ExpectedEventTypes[i].Category < schema.ExpectedEventTypes[j].Category + }) + + return &schema, nil +} diff --git a/go-ecs/internal/field/schema.go b/go-ecs/internal/field/schema.go new file mode 100644 index 000000000..300d60b3e --- /dev/null +++ b/go-ecs/internal/field/schema.go @@ -0,0 +1,14 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package field + +// Schema is a parsed ECS schema, comprising its fieldsets, fields, and the +// expected event types for each event category. +type Schema struct { + Version string + Fieldsets []*Fieldset + Fields []*Field + ExpectedEventTypes []*ExpectedEventType +} diff --git a/go-ecs/internal/git/repo.go b/go-ecs/internal/git/repo.go new file mode 100644 index 000000000..09e2dab33 --- /dev/null +++ b/go-ecs/internal/git/repo.go @@ -0,0 +1,160 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package git + +import ( + "bufio" + "bytes" + "context" + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/goccy/go-yaml" +) + +func getEnv() []string { + return append(os.Environ(), + "GIT_TERMINAL_PROMPT=0", // Disable HTTPS credential prompts. + "GIT_SSH_COMMAND=ssh -o BatchMode=yes", // Disable SSH passphrase/password prompts. + "GIT_CONFIG_COUNT=1", // Disable gc to prevent zombie child processes. + "GIT_CONFIG_KEY_0=gc.auto", + "GIT_CONFIG_VALUE_0=0", + ) +} + +// Repo is a git repository. +type Repo struct { + name string + dir string +} + +// Name is the name of the repository. +func (r *Repo) Name() string { + return r.name +} + +// Dir is the path to the repository. +func (r *Repo) Dir() string { + return r.dir +} + +// RunCommand runs an arbitrary git command against the repo. +// +// git -C args... +func (r *Repo) RunCommand(ctx context.Context, args ...string) ([]byte, error) { + cmdArgs := []string{"-C", r.dir} + cmdArgs = append(cmdArgs, args...) + + cmd := exec.CommandContext(ctx, "git", cmdArgs...) + cmd.Env = getEnv() + + slog.Debug("Running git command", slog.String("cmd", cmd.String())) + + output, err := cmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("git: command failed [%s]: %w: %s", cmd.String(), err, output) + } + + return output, nil +} + +// Tags lists the tags for the repo. +func (r *Repo) Tags(ctx context.Context) ([]string, error) { + output, err := r.RunCommand(ctx, "tag") + if err != nil { + return nil, err + } + + var tags []string + + scanner := bufio.NewScanner(bytes.NewReader(output)) + for scanner.Scan() { + tags = append(tags, scanner.Text()) + } + if err = scanner.Err(); err != nil { + return nil, fmt.Errorf("git: failed to scan output: %w", err) + } + + return tags, nil +} + +// TagToHash converts a tag to a commit hash. +func (r *Repo) TagToHash(ctx context.Context, tag string) (string, error) { + output, err := r.RunCommand(ctx, "rev-parse", tag) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(output)), nil +} + +// ReadFile will read and return the contents of a file at the given ref. +func (r *Repo) ReadFile(ctx context.Context, ref, filename string) ([]byte, error) { + return r.RunCommand(ctx, "show", ref+":"+filename) +} + +// ParseYAMLFile will parse the yaml file at the given ref. +func (r *Repo) ParseYAMLFile(ctx context.Context, ref, filename string, v any) error { + data, err := r.ReadFile(ctx, ref, filename) + if err != nil { + return err + } + + return yaml.Unmarshal(data, v) +} + +func (r *Repo) clone(ctx context.Context, remote string) error { + if remote == "" { + return fmt.Errorf("git: no remote specified") + } + + parentDir := filepath.Dir(r.dir) + + slog.Info("Cloning repo...", slog.String("repo", r.name), slog.String("dir", r.dir)) + + if err := os.MkdirAll(parentDir, 0755); err != nil { + return fmt.Errorf("git: could not create directory %q: %w", parentDir, err) + } + + start := time.Now() + + cmd := exec.CommandContext(ctx, "git", "clone", + "--bare", + remote, + r.dir) + cmd.Env = getEnv() + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("git: clone failed %w: %s", err, output) + } + + slog.Debug("Git clone complete", slog.String("repo", r.name), slog.String("duration", time.Since(start).String())) + + return nil +} + +// NewRepo creates a new Repo. It will clone the repo as a bare repository if +// it does not already exist, or it will run fetch on the existing repository. +func NewRepo(ctx context.Context, dir, remote string) (*Repo, error) { + r := Repo{ + dir: dir, + } + + if _, err := os.Stat(dir); os.IsNotExist(err) { + if err = r.clone(ctx, remote); err != nil { + return nil, err + } + } else if err != nil { + return nil, err + } + + return &r, nil +} diff --git a/go-ecs/internal/mcp/ecs_guide.md b/go-ecs/internal/mcp/ecs_guide.md new file mode 100644 index 000000000..dbd918119 --- /dev/null +++ b/go-ecs/internal/mcp/ecs_guide.md @@ -0,0 +1,46 @@ +You have access to a set of tools for exploring Elastic Common Schema (ECS). Here is how to use them effectively. + +## Overview + +The server loads every tagged ECS release (>= v1.12.0) from the configured ECS +repository into a single SQLite database. Every field, fieldset, and expected +event type row is tagged with the ECS `version` it came from, so all of the +tools — except `ecs_get_sql_tables` — require a `version` to disambiguate which +release of ECS to consult. + +Pick one version per workflow and reuse it. Do not mix results from different +versions in the same mapping. + +## Tools + +- **ecs_get_sql_tables** — Returns the complete database schema with all table definitions, columns, and types. Note that `fields`, `fieldsets`, and `expected_event_types` all carry a `version` column. +- **ecs_match_fields** — Check whether field names exist in a specific ECS version. Requires `version` (e.g. `"9.3.0"`) and a list of dotted field names. Returns each annotated with whether it exists in that version, plus the ECS data type and description for matches. +- **ecs_search_fields** — Full-text search across ECS field definitions for a specific version. Requires `version` and a `query`. Plain keywords, dotted field names, or camelCase identifiers are automatically split into search tokens — e.g., `crowdstrike.fdr.ProcessTTYAttached` finds `process.tty` and related fields. +- **ecs_execute_sql_query** — Executes arbitrary read-only SQLite queries. The most powerful tool for both discovery and analytics. You MUST filter by `version` in every query that touches `fields`, `fieldsets`, `field_fieldsets`, or `expected_event_types` — otherwise you will get rows merged across every loaded ECS release. + +## Discovering available versions + +To see which ECS versions are loaded, run: + +```sql +SELECT DISTINCT version FROM fields ORDER BY version; +``` + +If the user does not specify a version, ask — do not default silently. If they +name a product that pins a specific ECS release (e.g. a Fleet integration that +declares `ecs.reference: git@v9.3.0`), use that version. + +## ECS Field Mapping Workflow + +When reviewing whether package fields align with ECS: + +1. **Choose a version** — Confirm or ask the user which ECS version to map against, then reuse it for every tool call in the workflow. +2. **Discover** — Use `ecs_search_fields` (with `version`) to find ECS fields related to a concept. For example, given a custom field like `crowdstrike.fdr.ProcessTTYAttached`, search for "process tty" or "terminal" to discover that `process.tty` exists in ECS. +3. **Match** — Use `ecs_match_fields` (with `version`) and a list of field names from a package to identify which ones already exist in ECS for that version. +4. **Recommend** — Fields that match ECS should use `external: ecs` in their field definition to inherit the upstream ECS definition and avoid drift. + +## Tips + +- The `fields` table has flattened dotted-path field names with resolved ECS definitions. Always scope `SELECT` statements with `WHERE version = ''`. +- To join `fields` to `fieldsets` via `field_fieldsets`, filter both `fields.version` and `fieldsets.version` to the same release — rows are not cross-linked between versions. +- The docs FTS index uses porter stemming, so "authenticate" also matches "authentication". diff --git a/go-ecs/internal/mcp/mcp.go b/go-ecs/internal/mcp/mcp.go new file mode 100644 index 000000000..c2ad6badf --- /dev/null +++ b/go-ecs/internal/mcp/mcp.go @@ -0,0 +1,250 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package mcp + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "log/slog" + + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/elastic/ecs/go-ecs/internal/store" + + _ "embed" +) + +//go:embed ecs_guide.md +var guidePromptText string + +const ( + defaultFTSLimit = 25 + maxMatchFieldNames = 500 +) + +type execQueryArgs struct { + Statement string `json:"statement" jsonschema:"SQLite query to execute"` +} + +type matchFieldsArgs struct { + FieldNames []string `json:"field_names" jsonschema:"List of dotted field names to check against ECS (max 500)"` + Version string `json:"version" jsonschema:"The ECS version"` +} + +type searchFieldsArgs struct { + Query string `json:"query" jsonschema:"Search query - plain keywords, dotted field names, or camelCase identifiers (e.g. process terminal, crowdstrike.fdr.ProcessTTYAttached, \"source address\", network AND bytes)"` + Version string `json:"version" jsonschema:"The ECS version"` + Limit int `json:"limit,omitempty" jsonschema:"Maximum number of results to return (default 25)"` +} + +type tools struct { + ddl string + db *sql.DB +} + +func (t *tools) getSQLSchema(_ context.Context, _ *mcp.CallToolRequest, _ any) (*mcp.CallToolResult, any, error) { + return &mcp.CallToolResult{ + Content: []mcp.Content{ + &mcp.TextContent{Text: t.ddl}, + }, + }, nil, nil +} + +func (t *tools) executeQuery(ctx context.Context, _ *mcp.CallToolRequest, args execQueryArgs) (*mcp.CallToolResult, any, error) { + slog.InfoContext(ctx, "Executing query", slog.String("statement", args.Statement)) + + rows, err := t.db.QueryContext(ctx, args.Statement) + if err != nil { + slog.ErrorContext(ctx, "Error executing query", slog.String("statement", args.Statement), slog.String("error", err.Error())) + return mcpErrorf("failed to execute query: %v", err), nil, nil + } + defer rows.Close() + + columns, err := rows.Columns() + if err != nil { + slog.ErrorContext(ctx, "Error getting columns", slog.String("error", err.Error())) + return mcpErrorf("failed to get columns: %v", err), nil, nil + } + + var result []map[string]any + for rows.Next() { + values := make([]any, len(columns)) + pointers := make([]any, len(columns)) + for i := range values { + pointers[i] = &values[i] + } + + if err = rows.Scan(pointers...); err != nil { + slog.ErrorContext(ctx, "Error scanning row", slog.String("error", err.Error())) + return mcpErrorf("failed to scan row: %v", err), nil, nil + } + + row := make(map[string]any) + for i, column := range columns { + val := values[i] + if b, ok := val.([]byte); ok { + row[column] = string(b) + } else { + row[column] = val + } + } + result = append(result, row) + } + + jsonRows, err := json.Marshal(result) + if err != nil { + slog.ErrorContext(ctx, "Error marshaling results", slog.String("error", err.Error())) + return mcpErrorf("failed to marshal result: %v", err), nil, nil + } + + slog.InfoContext(ctx, "Query executed successfully", slog.Int("row_count", len(result))) + return &mcp.CallToolResult{ + Content: []mcp.Content{ + &mcp.TextContent{Text: string(jsonRows)}, + }, + }, nil, nil +} + +func (t *tools) matchFields(ctx context.Context, _ *mcp.CallToolRequest, args matchFieldsArgs) (*mcp.CallToolResult, any, error) { + if len(args.FieldNames) == 0 { + return mcpErrorf("field_names must not be empty"), nil, nil + } + if len(args.FieldNames) > maxMatchFieldNames { + return mcpErrorf("field_names exceeds maximum of %d", maxMatchFieldNames), nil, nil + } + + q := store.New(t.db) + matched, err := q.MatchFields(ctx, args.Version, args.FieldNames) + if err != nil { + return mcpErrorf("failed to match fields: %v", err), nil, nil + } + + type result struct { + Name string `json:"name"` + IsECS bool `json:"is_ecs"` + Type string `json:"type"` + Description string `json:"description"` + } + results := make([]result, len(args.FieldNames)) + for i, name := range args.FieldNames { + if m, ok := matched[name]; ok { + results[i] = result{Name: name, IsECS: true, Type: m.Type, Description: m.Description} + } else { + results[i] = result{Name: name} + } + } + + jsonData, err := json.Marshal(results) + if err != nil { + return mcpErrorf("failed to marshal results: %v", err), nil, nil + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + &mcp.TextContent{Text: string(jsonData)}, + }, + }, nil, nil +} + +func (t *tools) searchFields(ctx context.Context, _ *mcp.CallToolRequest, args searchFieldsArgs) (*mcp.CallToolResult, any, error) { + limit := args.Limit + if limit <= 0 { + limit = defaultFTSLimit + } + + q := store.New(t.db) + matched, err := q.SearchFields(ctx, args.Version, args.Query, limit) + if err != nil { + return mcpErrorf("failed to search fields: %v", err), nil, nil + } + + jsonData, err := json.Marshal(matched) + if err != nil { + return mcpErrorf("failed to marshal results: %v", err), nil, nil + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + &mcp.TextContent{Text: string(jsonData)}, + }, + }, nil, nil +} + +// AddTools registers the ECS MCP tools on the server. +func AddTools(s *mcp.Server, ddl string, db *sql.DB) { + t := &tools{ + ddl: ddl, + db: db, + } + + mcp.AddTool(s, &mcp.Tool{ + Annotations: &mcp.ToolAnnotations{ + ReadOnlyHint: true, + }, + Description: "Call this tool first. Returns the complete catalog of available tables and columns", + Name: "ecs_get_sql_tables", + Title: "Get ECS SQL tables", + }, t.getSQLSchema) + + mcp.AddTool(s, &mcp.Tool{ + Annotations: &mcp.ToolAnnotations{ + ReadOnlyHint: true, + }, + Description: "Call this tool to execute an arbitrary SQLite query. Be sure you have called ecs_get_sql_tables() first to understand the structure of the data.", + Name: "ecs_execute_sql_query", + Title: "Execute SQL query", + }, t.executeQuery) + + mcp.AddTool(s, &mcp.Tool{ + Annotations: &mcp.ToolAnnotations{ + ReadOnlyHint: true, + }, + Description: "Check whether field names exist in ECS (Elastic Common Schema).", + Name: "ecs_match_fields", + Title: "Match ECS fields", + }, t.matchFields) + + mcp.AddTool(s, &mcp.Tool{ + Annotations: &mcp.ToolAnnotations{ + ReadOnlyHint: true, + }, + Description: "Full-text search across ECS (Elastic Common Schema) field definitions.", + Name: "ecs_search_fields", + Title: "Search ECS fields", + }, t.searchFields) +} + +// AddPrompts registers all MCP prompts on the server. +func AddPrompts(s *mcp.Server) { + s.AddPrompt(&mcp.Prompt{ + Name: "ecs_guide", + Title: "ECS Guide", + Description: "How to use the ecs tools together to explore Elastic Common Schema (ECS).", + }, guidePromptHandler) +} + +func guidePromptHandler(_ context.Context, _ *mcp.GetPromptRequest) (*mcp.GetPromptResult, error) { + return &mcp.GetPromptResult{ + Description: "Guide for using the ecs MCP tools", + Messages: []*mcp.PromptMessage{ + { + Role: "user", + Content: &mcp.TextContent{Text: guidePromptText}, + }, + }, + }, nil +} + +func mcpErrorf(format string, args ...any) *mcp.CallToolResult { + return &mcp.CallToolResult{ + Content: []mcp.Content{ + &mcp.TextContent{ + Text: fmt.Sprintf("ERROR: "+format, args...), + }, + }, + } +} diff --git a/go-ecs/internal/store/db.go b/go-ecs/internal/store/db.go new file mode 100644 index 000000000..bb0916935 --- /dev/null +++ b/go-ecs/internal/store/db.go @@ -0,0 +1,35 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 + +package store + +import ( + "context" + "database/sql" +) + +type DBTX interface { + ExecContext(context.Context, string, ...interface{}) (sql.Result, error) + PrepareContext(context.Context, string) (*sql.Stmt, error) + QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error) + QueryRowContext(context.Context, string, ...interface{}) *sql.Row +} + +func New(db DBTX) *Queries { + return &Queries{db: db} +} + +type Queries struct { + db DBTX +} + +func (q *Queries) WithTx(tx *sql.Tx) *Queries { + return &Queries{ + db: tx, + } +} diff --git a/go-ecs/internal/store/models.go b/go-ecs/internal/store/models.go new file mode 100644 index 000000000..0dda5d2b3 --- /dev/null +++ b/go-ecs/internal/store/models.go @@ -0,0 +1,56 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 + +package store + +import ( + "database/sql" +) + +type ExpectedEventType struct { + Category string + Type string + Version string +} + +type Field struct { + ID int64 + Name string + Version string + Type string + Level string + Short sql.NullString + Description sql.NullString + IsArray bool + Example sql.NullString + SearchText string +} + +type FieldAllowedValue struct { + FieldID int64 + Name string + Description string +} + +type FieldFieldset struct { + FieldID int64 + FieldsetID int64 +} + +type FieldsFt struct { + SearchText string +} + +type Fieldset struct { + ID int64 + Name string + Version string + Short sql.NullString + Description sql.NullString + TopLevel bool +} diff --git a/go-ecs/internal/store/queries.go b/go-ecs/internal/store/queries.go new file mode 100644 index 000000000..10fb9858f --- /dev/null +++ b/go-ecs/internal/store/queries.go @@ -0,0 +1,216 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package store + +import ( + "context" + "strings" +) + +// MatchedField is the subset of a field's metadata returned when confirming +// that a name corresponds to an ECS field. +type MatchedField struct { + Type string + Description string +} + +// SearchResult is a single row returned from a full-text search over ECS +// fields. +type SearchResult struct { + Name string `json:"name"` + Type string `json:"type"` + Description string `json:"description"` + IsArray bool `json:"is_array"` +} + +// MatchFields looks up the given field names in the ECS catalog and returns a +// map keyed by name for those that exist. Names that do not correspond to an +// ECS field are omitted from the result. +func (q *Queries) MatchFields(ctx context.Context, version string, names []string) (map[string]MatchedField, error) { + if len(names) == 0 { + return nil, nil + } + + var sb strings.Builder + sb.WriteString("SELECT name, type, description FROM fields WHERE version = ? AND name ") + if len(names) > 1 { + sb.WriteString("IN (") + sb.WriteString(strings.Repeat("?,", len(names)-1)) + sb.WriteString("?)") + } else { + sb.WriteString("= ?") + } + + stmt := sb.String() + args := make([]any, len(names)+1) + args[0] = version + for i, v := range names { + args[i+1] = v + } + + rows, err := q.db.QueryContext(ctx, stmt, args...) + if err != nil { + return nil, err + } + defer rows.Close() + + matches := make(map[string]MatchedField) + for rows.Next() { + var name string + var match MatchedField + if err = rows.Scan(&name, &match.Type, &match.Description); err != nil { + return nil, err + } + matches[name] = match + } + if rows.Err() != nil { + return nil, err + } + + return matches, nil +} + +// SearchFields runs a full-text search over ECS field names and descriptions +// and returns up to limit results ordered by FTS5 rank. If limit is zero or +// negative, a default of 25 is used. The query is normalized before matching +// (dots are stripped, camelCase tokens are split, and plain terms are joined +// with OR) so that dotted field names and camelCase identifiers match as +// expected. +func (q *Queries) SearchFields(ctx context.Context, version string, query string, limit int) ([]SearchResult, error) { + if limit <= 0 { + limit = 25 + } + + //Normalize the query for FTS5 matching: + //1. Sanitize (replace dots with spaces) + //2. Split camelCase tokens (e.g. "ProcessTTYAttached" → "Process TTY Attached") + //3. Join plain terms with OR for additive discovery ranking + sanitized := sanitizeFTSQuery(query) + sanitized = splitCamelCase(query) + sanitized = implicitOR(query) + + stmt := `SELECT ef.name, ef.type, ef.description, ef.is_array +FROM fields_fts +JOIN fields ef ON ef.id = fields_fts.rowid +WHERE ef.version = ? AND fields_fts MATCH ? +ORDER BY rank +LIMIT ?` + + rows, err := q.db.QueryContext(ctx, stmt, version, sanitized, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var results []SearchResult + for rows.Next() { + var result SearchResult + if err = rows.Scan(&result.Name, &result.Type, &result.Description, &result.IsArray); err != nil { + return nil, err + } + results = append(results, result) + } + if rows.Err() != nil { + return nil, err + } + + return results, nil +} + +// sanitizeFTSQuery replaces characters that cause FTS5 syntax errors +// with spaces. Dots in particular are common in field names like +// "source.nat.ip" and would otherwise cause "syntax error near '.'". +func sanitizeFTSQuery(query string) string { + return strings.ReplaceAll(query, ".", " ") +} + +// splitCamelCase splits camelCase and PascalCase tokens within a query +// into separate words. For example "ProcessTTYAttached" becomes +// "Process TTY Attached" and "sourceIP" becomes "source IP". +// Tokens that are already all-lowercase or all-uppercase are unchanged. +// Underscores are also treated as word boundaries. +func splitCamelCase(query string) string { + tokens := strings.Fields(query) + changed := false + for i, tok := range tokens { + split := splitCamelCaseToken(tok) + if split != tok { + tokens[i] = split + changed = true + } + } + if !changed { + return query + } + return strings.Join(tokens, " ") +} + +func splitCamelCaseToken(s string) string { + // Replace underscores with spaces first. + s = strings.ReplaceAll(s, "_", " ") + if strings.Contains(s, " ") { + // Recursively process each sub-token from underscore splitting. + parts := strings.Fields(s) + for i, p := range parts { + parts[i] = splitCamelCaseToken(p) + } + return strings.Join(parts, " ") + } + + var buf strings.Builder + runes := []rune(s) + for i, r := range runes { + if i > 0 && isUpperRune(r) { + prev := runes[i-1] + if isLowerRune(prev) { + // lowUpp boundary: "sourceIP" → "source IP" + buf.WriteRune(' ') + } else if isUpperRune(prev) && i+1 < len(runes) && isLowerRune(runes[i+1]) { + // UPPUpp+low boundary: "TTYAttached" → "TTY Attached" + buf.WriteRune(' ') + } + } + buf.WriteRune(r) + } + return buf.String() +} + +func isUpperRune(r rune) bool { return r >= 'A' && r <= 'Z' } +func isLowerRune(r rune) bool { return r >= 'a' && r <= 'z' } + +// fts5Operators are the reserved keywords in FTS5 query syntax. +var fts5Operators = map[string]bool{ + "AND": true, + "OR": true, + "NOT": true, + "NEAR": true, +} + +// implicitOR rewrites a plain FTS5 query so space-separated terms use OR +// instead of FTS5's default implicit AND. This makes discovery searches +// additive: fields matching more terms rank higher, but a single matching +// term is enough to return a result. +// +// Queries that already contain FTS5 operators (AND, OR, NOT, NEAR), +// phrase quotes, or prefix wildcards are returned unchanged. +func implicitOR(query string) string { + // If the query contains FTS5 syntax characters, pass through as-is. + if strings.ContainsAny(query, `"()*`) { + return query + } + + tokens := strings.Fields(query) + for _, tok := range tokens { + if fts5Operators[tok] { + return query + } + } + + if len(tokens) <= 1 { + return query + } + + return strings.Join(tokens, " OR ") +} diff --git a/go-ecs/internal/store/queries.sql.go b/go-ecs/internal/store/queries.sql.go new file mode 100644 index 000000000..7c9afb98d --- /dev/null +++ b/go-ecs/internal/store/queries.sql.go @@ -0,0 +1,129 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 +// source: queries.sql + +package store + +import ( + "context" + "database/sql" +) + +const insertExpectedEventType = `-- name: InsertExpectedEventType :exec +INSERT INTO + expected_event_types (category, type, version) +VALUES (?, ?, ?) +` + +type InsertExpectedEventTypeParams struct { + Category string + Type string + Version string +} + +func (q *Queries) InsertExpectedEventType(ctx context.Context, arg InsertExpectedEventTypeParams) error { + _, err := q.db.ExecContext(ctx, insertExpectedEventType, arg.Category, arg.Type, arg.Version) + return err +} + +const insertField = `-- name: InsertField :one +INSERT INTO + fields (name, version, type, level, short, description, is_array, example, search_text) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + RETURNING id +` + +type InsertFieldParams struct { + Name string + Version string + Type string + Level string + Short sql.NullString + Description sql.NullString + IsArray bool + Example sql.NullString + SearchText string +} + +func (q *Queries) InsertField(ctx context.Context, arg InsertFieldParams) (int64, error) { + row := q.db.QueryRowContext(ctx, insertField, + arg.Name, + arg.Version, + arg.Type, + arg.Level, + arg.Short, + arg.Description, + arg.IsArray, + arg.Example, + arg.SearchText, + ) + var id int64 + err := row.Scan(&id) + return id, err +} + +const insertFieldAllowedValue = `-- name: InsertFieldAllowedValue :exec +INSERT INTO + field_allowed_values (field_id, name, description) +VALUES (?, ?, ?) +` + +type InsertFieldAllowedValueParams struct { + FieldID int64 + Name string + Description string +} + +func (q *Queries) InsertFieldAllowedValue(ctx context.Context, arg InsertFieldAllowedValueParams) error { + _, err := q.db.ExecContext(ctx, insertFieldAllowedValue, arg.FieldID, arg.Name, arg.Description) + return err +} + +const insertFieldFieldset = `-- name: InsertFieldFieldset :exec +INSERT INTO + field_fieldsets (fieldset_id, field_id) +VALUES (?, ?) +` + +type InsertFieldFieldsetParams struct { + FieldsetID int64 + FieldID int64 +} + +func (q *Queries) InsertFieldFieldset(ctx context.Context, arg InsertFieldFieldsetParams) error { + _, err := q.db.ExecContext(ctx, insertFieldFieldset, arg.FieldsetID, arg.FieldID) + return err +} + +const insertFieldset = `-- name: InsertFieldset :one +INSERT INTO + fieldsets (name, version, short, description, top_level) +VALUES (?, ?, ?, ?, ?) +RETURNING id +` + +type InsertFieldsetParams struct { + Name string + Version string + Short sql.NullString + Description sql.NullString + TopLevel bool +} + +func (q *Queries) InsertFieldset(ctx context.Context, arg InsertFieldsetParams) (int64, error) { + row := q.db.QueryRowContext(ctx, insertFieldset, + arg.Name, + arg.Version, + arg.Short, + arg.Description, + arg.TopLevel, + ) + var id int64 + err := row.Scan(&id) + return id, err +} diff --git a/go-ecs/internal/store/sql/queries.sql b/go-ecs/internal/store/sql/queries.sql new file mode 100644 index 000000000..24b961d24 --- /dev/null +++ b/go-ecs/internal/store/sql/queries.sql @@ -0,0 +1,26 @@ +-- name: InsertFieldset :one +INSERT INTO + fieldsets (name, version, short, description, top_level) +VALUES (?, ?, ?, ?, ?) +RETURNING id; + +-- name: InsertField :one +INSERT INTO + fields (name, version, type, level, short, description, is_array, example, search_text) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + RETURNING id; + +-- name: InsertFieldFieldset :exec +INSERT INTO + field_fieldsets (fieldset_id, field_id) +VALUES (?, ?); + +-- name: InsertFieldAllowedValue :exec +INSERT INTO + field_allowed_values (field_id, name, description) +VALUES (?, ?, ?); + +-- name: InsertExpectedEventType :exec +INSERT INTO + expected_event_types (category, type, version) +VALUES (?, ?, ?); diff --git a/go-ecs/internal/store/sql/schema.sql b/go-ecs/internal/store/sql/schema.sql new file mode 100644 index 000000000..48dec7950 --- /dev/null +++ b/go-ecs/internal/store/sql/schema.sql @@ -0,0 +1,53 @@ +-- Table storing all ECS fields. +CREATE TABLE IF NOT EXISTS fields ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + version TEXT NOT NULL, + type TEXT NOT NULL, + level TEXT NOT NULL, + short TEXT, + description TEXT, + is_array BOOLEAN NOT NULL, + example TEXT, + search_text TEXT NOT NULL +); + +-- Table storing ECS fieldsets. +CREATE TABLE IF NOT EXISTS fieldsets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + version TEXT NOT NULL, + short TEXT, + description TEXT, + top_level BOOLEAN NOT NULL +); + +-- Table storing ECS field to fieldset connections. +CREATE TABLE IF NOT EXISTS field_fieldsets ( + field_id INTEGER NOT NULL, + fieldset_id INTEGER NOT NULL, + FOREIGN KEY (field_id) REFERENCES fields(id), + FOREIGN KEY (fieldset_id) REFERENCES fieldsets(id) +); + +-- Table storing ECS field allowed values. +CREATE TABLE IF NOT EXISTS field_allowed_values ( + field_id INTEGER NOT NULL, + name TEXT NOT NULL, + description TEXT NOT NULL, + FOREIGN KEY (field_id) REFERENCES fields(id) +); + +-- Table storing ECS expected event.category and event.type values. +CREATE TABLE IF NOT EXISTS expected_event_types ( + category TEXT NOT NULL, + type TEXT NOT NULL, + version TEXT NOT NULL +); + +CREATE VIRTUAL TABLE IF NOT EXISTS fields_fts USING fts5( + search_text, + content=fields, + content_rowid=id, + tokenize='porter unicode61' +); diff --git a/go-ecs/internal/store/sql/sqlc.yml b/go-ecs/internal/store/sql/sqlc.yml new file mode 100644 index 000000000..32733a119 --- /dev/null +++ b/go-ecs/internal/store/sql/sqlc.yml @@ -0,0 +1,10 @@ +--- +version: "2" +sql: + - engine: sqlite + queries: queries.sql + schema: schema.sql + gen: + go: + package: store + out: ../ diff --git a/go-ecs/internal/store/store.go b/go-ecs/internal/store/store.go new file mode 100644 index 000000000..b92bd6b2d --- /dev/null +++ b/go-ecs/internal/store/store.go @@ -0,0 +1,197 @@ +// Licensed to Elasticsearch B.V. under one or more agreements. +// Elasticsearch B.V. licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +package store + +import ( + "context" + "database/sql" + "fmt" + "log/slog" + "os" + "strings" + + "github.com/elastic/ecs/go-ecs/internal/field" + + _ "embed" +) + +//go:generate sqlc generate -f sql/sqlc.yml + +//go:embed sql/schema.sql +var DDL string + +const batchSize = 10000 + +// NewDB creates a fresh SQLite database at dbFile (removing any existing file), +// populates it with the given ECS schema, and returns a read-only handle. +func NewDB(ctx context.Context, dbFile string, schemas []*field.Schema) (*sql.DB, error) { + // Remove existing DB if it exists. Create a new DB. + datasource := fmt.Sprintf("file:%s", dbFile) + if err := os.Remove(dbFile); err != nil && !os.IsNotExist(err) { + return nil, fmt.Errorf("failed to remove existing DB: %w", err) + } + db, err := sql.Open("sqlite", datasource) + if err != nil { + return nil, fmt.Errorf("failed to open new DB: %w", err) + } + + // Create tables. + if _, err = db.ExecContext(ctx, DDL); err != nil { + db.Close() + return nil, fmt.Errorf("failed to create tables: %w", err) + } + + // Write to DB. + slog.Info("Adding schemas to database") + + for _, schema := range schemas { + slog.Debug("Adding schema to database", slog.String("version", schema.Version), slog.Int("fieldsets", len(schema.Fieldsets)), slog.Int("fields", len(schema.Fields))) + + fieldsetIDs := make(map[string]int64, len(schema.Fieldsets)) + + if err = runBatchedTransaction(ctx, db, schema.Fieldsets, func(ctx context.Context, q *Queries, fs *field.Fieldset) error { + fieldsetIDs[fs.Name], err = q.InsertFieldset(ctx, InsertFieldsetParams{ + Name: fs.Name, + Version: schema.Version, + Short: nullString(fs.Short), + Description: nullString(fs.Description), + TopLevel: fs.TopLevel, + }) + + return err + }); err != nil { + return nil, err + } + + if err = runBatchedTransaction(ctx, db, schema.Fields, func(ctx context.Context, q *Queries, v *field.Field) error { + searchText := strings.ReplaceAll(v.Name, ".", " ") + " " + v.Description + + insertParams := InsertFieldParams{ + Name: v.Name, + Version: schema.Version, + Type: v.Type, + Level: v.Level, + Short: nullString(v.Short), + Description: nullString(v.Description), + IsArray: v.IsArray, + Example: nullString(v.Example), + SearchText: searchText, + } + + var id int64 + if id, err = q.InsertField(ctx, insertParams); err != nil { + slog.Error("Failed to insert field", slog.String("version", schema.Version), slog.String("field", v.Name), slog.String("error", err.Error())) + return err + } + + for _, fs := range v.Fieldsets { + if err = q.InsertFieldFieldset(ctx, InsertFieldFieldsetParams{ + FieldsetID: fieldsetIDs[fs.Name], + FieldID: id, + }); err != nil { + slog.Error("Failed to insert field fieldset reference", slog.String("version", schema.Version), slog.String("field", v.Name), slog.String("error", err.Error())) + return err + } + } + for _, av := range v.AllowedValues { + if err = q.InsertFieldAllowedValue(ctx, InsertFieldAllowedValueParams{ + FieldID: id, + Name: av.Name, + Description: av.Description, + }); err != nil { + slog.Error("Failed to insert field allowed value", slog.String("version", schema.Version), slog.String("field", v.Name), slog.String("error", err.Error())) + return err + } + } + + return nil + }); err != nil { + return nil, err + } + + if err = runBatchedTransaction(ctx, db, schema.ExpectedEventTypes, func(ctx context.Context, q *Queries, v *field.ExpectedEventType) error { + for _, vv := range v.Types { + if err = q.InsertExpectedEventType(ctx, InsertExpectedEventTypeParams{ + Category: v.Category, + Type: vv, + Version: schema.Version, + }); err != nil { + slog.Error("Failed to insert field allowed value", slog.String("version", schema.Version), slog.String("category", v.Category), slog.String("type", vv), slog.String("error", err.Error())) + return err + } + } + + return nil + }); err != nil { + return nil, err + } + + } + + if _, err = db.ExecContext(ctx, `INSERT INTO fields_fts(fields_fts) VALUES('rebuild')`); err != nil { + db.Close() + return nil, fmt.Errorf("rebuilding ECS fields FTS: %w", err) + } + + db.Close() + + slog.Info("Initialized database") + + // Open DB as read-only. + datasource = fmt.Sprintf("file:%s?mode=ro", dbFile) + db, err = sql.Open("sqlite", datasource) + if err != nil { + return nil, fmt.Errorf("failed to open read-only DB: %w", err) + } + + return db, nil +} + +// nullString converts a string to a sql.NullString. A string is null if it is empty. +func nullString(s string) sql.NullString { + if s == "" { + return sql.NullString{} + } + + return sql.NullString{String: s, Valid: true} +} + +func runTransaction(ctx context.Context, db *sql.DB, fn func(context.Context, *Queries) error) error { + q := Queries{db: db} + + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("failed to begin transaction: %w", err) + } + + qtx := q.WithTx(tx) + + if err = fn(ctx, qtx); err != nil { + _ = tx.Rollback() + return err + } + + return tx.Commit() +} + +func runBatchedTransaction[V any](ctx context.Context, db *sql.DB, items []V, fn func(ctx context.Context, q *Queries, item V) error) error { + var i int + for i < len(items) { + if err := runTransaction(ctx, db, func(ctx context.Context, queries *Queries) error { + for bi := 0; bi < batchSize && i < len(items); bi++ { + if err := fn(ctx, queries, items[i]); err != nil { + return err + } + i++ + } + + return nil + }); err != nil { + return err + } + } + + return nil +}