Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
7c20add
Add skeleton code, install stac-validator
jonhealy1 Mar 26, 2026
7d7b87b
move item validation
jonhealy1 Mar 26, 2026
d8f8b7d
add tests
jonhealy1 Mar 26, 2026
fb6ea42
add validator to cicd
jonhealy1 Mar 26, 2026
7142259
fix name
jonhealy1 Mar 26, 2026
01497b0
update changelog, description
jonhealy1 Mar 26, 2026
d8c4737
Add SCHEMA_CACHE_SIZE env var
jonhealy1 Mar 27, 2026
75ac1ab
remove debug print
jonhealy1 Mar 27, 2026
53c7fa0
Merge branch 'main' into add-stac-validator
jonhealy1 Mar 27, 2026
0de3107
consolidate queue function, fix tests
jonhealy1 Mar 27, 2026
d03f58f
return 400 fix
jonhealy1 Mar 27, 2026
d3bfd3c
update tests
jonhealy1 Mar 27, 2026
5712d5e
global validator
jonhealy1 Mar 27, 2026
9b46afc
ensure validation is async
jonhealy1 Mar 27, 2026
ae63a4d
return 400, clean up
jonhealy1 Mar 28, 2026
b5c64e2
Enable validation in queue
jonhealy1 Mar 28, 2026
ae048e0
update changelog
jonhealy1 Mar 28, 2026
b6dd0f6
Merge branch 'main' into add-stac-validator-queue
jonhealy1 Mar 28, 2026
51a756e
add gostac-validator
jonhealy1 Mar 29, 2026
fece196
update tests
jonhealy1 Mar 29, 2026
66d09fc
add go tests
jonhealy1 Mar 29, 2026
bf0f6b5
update go container env
jonhealy1 Mar 29, 2026
01099f5
Merge branch 'main' into add-stac-go-validators
jonhealy1 Mar 31, 2026
69bc6d6
Merge branch 'main' into add-stac-go-validators
jonhealy1 May 8, 2026
4e3b269
add go validator service
jonhealy1 May 8, 2026
52f4cd5
add stac-validator fast service
jonhealy1 May 9, 2026
29e8135
streamline logic
jonhealy1 May 9, 2026
e8b8e6c
update cicd, tests
jonhealy1 May 9, 2026
eff371a
Merge branch 'main' into add-fast-validator-service
jonhealy1 May 9, 2026
e464346
update cicd
jonhealy1 May 9, 2026
546192e
Merge branch 'add-fast-validator-service' of https://github.com/jonhe…
jonhealy1 May 9, 2026
166ff59
switch env vars
jonhealy1 May 9, 2026
5381772
update tests
jonhealy1 May 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ jobs:
ports:
- 6379:6379

stac-validator:
image: ghcr.io/staclabs/stac-validator:latest
ports:
- 8000:8000

strategy:
matrix:
python-version: [ "3.12", "3.13", "3.14"]
Expand Down Expand Up @@ -115,6 +120,8 @@ jobs:
DATABASE_REFRESH: true
ES_VERIFY_CERTS: false
REDIS_ENABLE: true
REDIS_HOST: localhost
REDIS_HOST: 127.0.0.1
REDIS_PORT: 6379
BACKEND: ${{ matrix.backend }}
ENABLE_FAST_VALIDATOR: false
FAST_VALIDATOR_URL: http://localhost:8000/validate
BACKEND: ${{ matrix.backend }}
16 changes: 15 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ run_es = docker compose \
-e PY_IGNORE_IMPORTMISMATCH=1 \
-e APP_HOST=${APP_HOST} \
-e APP_PORT=${ES_APP_PORT} \
-e ENABLE_FAST_VALIDATOR=false \
app-elasticsearch

run_os = docker compose \
Expand All @@ -22,6 +23,7 @@ run_os = docker compose \
-e PY_IGNORE_IMPORTMISMATCH=1 \
-e APP_HOST=${APP_HOST} \
-e APP_PORT=${OS_APP_PORT} \
-e ENABLE_FAST_VALIDATOR=false \
app-opensearch

.PHONY: image-es-os
Expand Down Expand Up @@ -67,37 +69,49 @@ docker-shell-os:

.PHONY: test-elasticsearch
test-elasticsearch: image-es-os
docker compose up -d elasticsearch stac-validator
-$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest'
docker compose down

.PHONY: test-elasticsearch-catalogs
test-elasticsearch-catalogs: image-es-os
docker compose up -d elasticsearch stac-validator
-$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest extensions/test_catalogs.py -v'
docker compose down

.PHONY: test-elasticsearch-validator
test-elasticsearch-validator: image-es-os
docker compose up -d elasticsearch stac-validator
-$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest api/test_api_stac_validator.py -v'
docker compose down

.PHONY: test-opensearch
test-opensearch: image-es-os
docker compose up -d opensearch stac-validator
-$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest'
docker compose down

.PHONY: test-opensearch-catalogs
test-opensearch-catalogs: image-es-os
docker compose up -d opensearch stac-validator
-$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest extensions/test_catalogs.py -v'
docker compose down

.PHONY: test-datetime-filtering-es
test-datetime-filtering-es: image-es-os
docker compose up -d elasticsearch stac-validator
-$(run_es) /bin/bash -c 'export ENABLE_DATETIME_INDEX_FILTERING=true && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest -s --cov=stac_fastapi --cov-report=term-missing -m datetime_filtering'
docker compose down

.PHONY: test-datetime-filtering-os
test-datetime-filtering-os: image-es-os
docker compose up -d opensearch stac-validator
-$(run_os) /bin/bash -c 'export ENABLE_DATETIME_INDEX_FILTERING=true && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest -s --cov=stac_fastapi --cov-report=term-missing -m datetime_filtering'
docker compose down

.PHONY: test
test: image-es-os
docker compose up -d elasticsearch opensearch redis
docker compose up -d elasticsearch opensearch redis stac-validator

-$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest'

Expand Down
65 changes: 63 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,21 @@ The following organizations have contributed time and/or funding to support the

## Latest News

- **05/09/2026: High-Speed Fast Python Validator & Batch Processing:** Added support for `ENABLE_FAST_VALIDATOR` to offload STAC schema validation to the [fast Python validator](https://github.com/staclabs/stac-validator) microservice. Bulk insertions are now validated in massive batches with zero-blocking on the FastAPI event loop, safely routing invalid items to a Dead Letter Queue (DLQ) when used with the Redis background worker.
- **03/19/2026: SKOS to STAC Ingestion Demo.** 📓 Check out the interactive [SKOS-catalogs-ingestion-demo.ipynb](https://github.com/StacLabs/sfeos-tools/blob/main/demo-notebooks/SKOS-catalogs-ingestion-demo.ipynb) notebook! This tutorial demonstrates automated semantic ingestion from SKOS/RDF-XML files into hierarchical STAC catalogs, showcasing poly-hierarchy, contextual breadcrumbs, and data safety features of the Multi-Tenant Catalogs extension. Thanks to support from CloudFerro!
- **01/11/2026: Hierarchical Catalog Support.** Sub-catalogs are now fully supported! Catalogs can now contain other catalogs for unlimited nesting levels. This enables complex organizational hierarchies with multi-parent support for both catalogs and collections.
- **01/09/2026: Custom Index Mappings.** You can now customize Elasticsearch/OpenSearch index mappings directly via environment variables without changing source code. Use `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` to merge custom field definitions (e.g., for STAC extensions like SAR or Cube) or `STAC_FASTAPI_ES_MAPPINGS_FILE` to load mappings from a JSON file. See [Custom Index Mappings](#custom-index-mappings) for details.
- **12/09/2025: Multi-Tenant Catalogs.** The [`STAC API - Multi-Tenant Catalogs Endpoint Extension`](https://github.com/stac-api-extensions/multi-tenant-catalogs) is now in main! This enables a registry of catalogs and supports **poly-hierarchy** (collections belonging to multiple catalogs simultaneously). Enable it via `ENABLE_CATALOGS_ROUTE`. _Coming next: Support for nested sub-catalogs._
- **11/07/2025:** 🌍 The SFEOS STAC Viewer is now available at: https://healy-hyperspatial.github.io/sfeos-web. Use this site to examine your data and test your STAC API!
- **10/24/2025:** Added `previous_token` pagination using Redis for efficient navigation. This feature allows users to navigate backwards through large result sets by storing pagination state in Redis. To use this feature, ensure Redis is configured (see [Redis for navigation](#redis-for-navigation)) and set `REDIS_ENABLE=true` in your environment.
- **10/23/2025:** The `EXCLUDED_FROM_QUERYABLES` environment variable was added to exclude fields from the `queryables` endpoint. See [docs](#excluding-fields-from-queryables).
- **10/15/2025:** 🚀 SFEOS Tools v0.1.0 Released! - The new `sfeos-tools` CLI is now available on [PyPI](https://pypi.org/project/sfeos-tools/)
- **10/15/2025:** Added `reindex` command to **[SFEOS-tools](https://github.com/Healy-Hyperspatial/sfeos-tools)** for zero-downtime index updates when changing mappings or settings. The new `reindex` command makes it easy to apply mapping changes, update index settings, or migrate to new index structures without any service interruption, ensuring high availability of your STAC API during maintenance operations.

<details style="border: 1px solid #eaecef; border-radius: 6px; padding: 10px; margin-bottom: 16px; background-color: #f9f9f9;">
<summary style="cursor: pointer; font-weight: bold; margin: -10px -10px 0; padding: 10px; background-color: #f0f0f0; border-bottom: 1px solid #eaecef; border-top-left-radius: 6px; border-top-right-radius: 6px;">View Older News (Click to Expand)</summary>

-------------
- **10/15/2025:** 🚀 SFEOS Tools v0.1.0 Released! - The new `sfeos-tools` CLI is now available on [PyPI](https://pypi.org/project/sfeos-tools/)
- **10/15/2025:** Added `reindex` command to **[SFEOS-tools](https://github.com/Healy-Hyperspatial/sfeos-tools)** for zero-downtime index updates when changing mappings or settings. The new `reindex` command makes it easy to apply mapping changes, update index settings, or migrate to new index structures without any service interruption, ensuring high availability of your STAC API during maintenance operations.
- **10/12/2025:** Collections search **bbox** functionality added! The collections search extension now supports bbox queries. Collections will need to be updated via the API or with the new **[SFEOS-tools](https://github.com/Healy-Hyperspatial/sfeos-tools)** CLI package to support geospatial discoverability. 🙏 Thanks again to **CloudFerro** for their sponsorship of this work!
- **10/04/2025:** The **[CloudFerro](https://cloudferro.com/)** logo has been added to the sponsors and supporters list above. Their sponsorship of the ongoing collections search extension work has been invaluable. This is in addition to the many other important changes and updates their developers have added to the project.
- **09/25/2025:** v6.5.0 adds a new GET/POST /collections-search endpoint (disabled by default via ENABLE_COLLECTIONS_SEARCH_ROUTE) to avoid conflicts with the Transactions Extension, and enhances collections search with structured filtering (CQL2 JSON/text), query, and datetime filtering. These changes make collection discovery more powerful and configurable while preserving compatibility with transaction-enabled deployments.
Expand Down Expand Up @@ -106,6 +107,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
- [Using Pre-built Docker Images](#using-pre-built-docker-images)
- [Using Docker Compose](#using-docker-compose)
- [Configuration Reference](#configuration-reference)
- [STAC Validation](#stac-validation)
- [Free-Text Search (`q` parameter)](#free-text-search-q-parameter)
- [Queryables Endpoint](#queryables-endpoint)
- [Root Queryables Configuration](#root-queryables-configuration)
Expand Down Expand Up @@ -741,6 +743,8 @@ You can customize additional settings in your `.env` file:
| `ENABLE_COLLECTIONS_SEARCH_ROUTE` | Enable the custom `/collections-search` endpoint (both GET and POST methods). When disabled, the custom endpoint will not be available, but collection search extensions will still be available on the core `/collections` endpoint if `ENABLE_COLLECTIONS_SEARCH` is true. | `false` | Optional |
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. This is useful for deployments where mutating the catalog via the API should be prevented. If set to `true`, the POST `/collections` route for search will be unavailable in the API. | `true` | Optional |
| `ENABLE_CATALOGS_ROUTE` | Enable the **/catalogs** endpoint for hierarchical catalog browsing and navigation. **Note:** Requires the catalogs extension to be installed via `stac-fastapi-elasticsearch[catalogs]`, `stac-fastapi-opensearch[catalogs]`, or `stac-fastapi-core[catalogs]`. See [Catalogs Route](#catalogs-route) for installation instructions. | `false` | Optional |
| `ENABLE_FAST_VALIDATOR` | Enables the high-performance fast Python validator microservice to validate STAC items and collections on ingestion. Highly recommended for bulk insertions as it validates massive batches concurrently without blocking the API. Use with `FAST_VALIDATOR_URL`. | `false` | Optional |
| `FAST_VALIDATOR_URL` | The full endpoint URL of the fast Python STAC validator service. Used when `ENABLE_FAST_VALIDATOR` is true. | `http://stac-validator:8000/validate` | Optional |
| `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional |

### 5. Limits & Performance
Expand Down Expand Up @@ -794,6 +798,63 @@ You can customize additional settings in your `.env` file:
> [!NOTE]
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.

## STAC Validation

STAC FastAPI provides a flexible, 3-tier validation architecture for STAC items and collections on ingestion. This ensures data quality and compliance with the STAC specification while allowing you to balance strict schema enforcement with high-throughput ingestion performance.

### 1. Native Pydantic Validation (Always Enabled)

By default, all STAC items and collections are validated using **Pydantic** (via `stac-pydantic`) at the API routing layer. This validation:

- Enforces required STAC fields and correct data types.
- Validates spatial and temporal properties.
- Provides extremely fast, built-in validation without external dependencies.

This validation is always enabled and happens automatically before data reaches the database or the Redis queue.

### 2. High-Speed Fast Python Validator (Recommended for Production)

For deployments that require strict STAC schema validation (including STAC Extensions like SAR, EO, and Point Cloud) but also need to process massive bulk insertions, SFEOS provides integration with the **fast Python STAC Validator** microservice.

The fast validator natively accepts arrays of STAC items (`FeatureCollections`) and validates them concurrently. This completely eliminates network bottlenecks and CPU blocking on the FastAPI event loop.

#### Enabling the Fast Validator

To use the Fast Validator, you must run the validator microservice alongside your API. The Docker Compose configuration includes this by default:

```bash
# Start the stack with the fast validator sidecar container included
docker compose up
```

Then, enable it in your environment:

```bash
export ENABLE_FAST_VALIDATOR=true
export FAST_VALIDATOR_URL=http://stac-validator:8000/validate
```

#### Batch Error Responses

When the Fast Validator is enabled, bulk API insertions (like POSTing a `FeatureCollection`) that contain invalid items will instantly return a `400 Bad Request` with a detailed dictionary mapping specific item_ids to their exact schema failures:

```json
{
"detail": {
"message": "Bulk insertion rejected. 2 items failed validation.",
"errors": {
"landsat-scene-1": "Fast Validator Rejected STAC: 'properties.datetime' is required (at /properties)",
"landsat-scene-2": "Fast Validator Rejected STAC: additional properties 'eo:bands' not allowed (at /properties)"
}
}
}
```

#### Performance Considerations

- **Pydantic validation**: Very fast and always enabled
- **Fast validator** (ENABLE_FAST_VALIDATOR): Adds minimal overhead. Validates batches concurrently. **Highly recommended** for production deployments, especially if ENABLE_REDIS_QUEUE=true to ensure bad data never poisons the queue.

## Free-Text Search (`q` parameter)

The free-text search feature allows users to discover items and collections using keywords or phrases. By default, the search targets core fields: `id`, `collection`, `properties.title`, `properties.description`, and `properties.keywords`.
Expand Down
23 changes: 19 additions & 4 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ services:
- DATABASE_REFRESH=true
- ENABLE_COLLECTIONS_SEARCH_ROUTE=true
- ENABLE_CATALOGS_ROUTE=true
- ENABLE_FAST_VALIDATOR=true
- FAST_VALIDATOR_URL=http://stac-validator:8000/validate
- REDIS_ENABLE=true
- REDIS_HOST=redis
- REDIS_PORT=6379
Expand All @@ -34,8 +36,10 @@ services:
- ./scripts:/app/scripts
- ./esdata:/usr/share/elasticsearch/data
depends_on:
- elasticsearch
- redis
elasticsearch:
condition: service_started
redis:
condition: service_started
command:
bash -c "./scripts/wait-for-it-es.sh es-container:9200 && python -m stac_fastapi.elasticsearch.app"

Expand Down Expand Up @@ -65,6 +69,8 @@ services:
- STAC_FASTAPI_RATE_LIMIT=200/minute
- ENABLE_COLLECTIONS_SEARCH_ROUTE=true
- ENABLE_CATALOGS_ROUTE=true
- ENABLE_FAST_VALIDATOR=true
- FAST_VALIDATOR_URL=http://stac-validator:8000/validate
- REDIS_ENABLE=true
- REDIS_HOST=redis
- REDIS_PORT=6379
Expand All @@ -75,8 +81,10 @@ services:
- ./scripts:/app/scripts
- ./osdata:/usr/share/opensearch/data
depends_on:
- opensearch
- redis
elasticsearch:
condition: service_started
redis:
condition: service_started
command:
bash -c "./scripts/wait-for-it-es.sh os-container:9202 && python -m stac_fastapi.opensearch.app"

Expand Down Expand Up @@ -108,6 +116,13 @@ services:
ports:
- "9202:9202"

stac-validator:
container_name: stac-validator
image: ghcr.io/staclabs/stac-validator:latest
restart: always
ports:
- "8081:8000"

redis:
image: redis:7-alpine
hostname: redis
Expand Down
Loading