Skip to content

Commit b6edb4b

Browse files
committed
Add Kubernetes/Kind deployment support for info() function
Add Helm values and deployment scripts for running the demo on Kubernetes with info() function support. - values-info-function.yaml: Prometheus v3.11.1 with info() enabled, minimal resource attribute promotion, collector transform metric_statements for PostgreSQL service.name/service.instance.id and duplicate target_info prevention - values-kind.yaml: Kind-specific overrides (NodePort, memory limits) - kind-config.yaml: Kind cluster with port mapping - deploy-kind.sh: Creates Kind cluster and deploys the demo - deploy-info-function.sh: Deploys to existing k8s cluster with custom Grafana dashboards as ConfigMaps Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
1 parent 347cc36 commit b6edb4b

5 files changed

Lines changed: 374 additions & 0 deletions

File tree

kubernetes/deploy-kind.sh

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/bin/sh
2+
# Copyright The OpenTelemetry Authors
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Deploy OpenTelemetry Demo to a local Kind cluster
6+
#
7+
# This script creates a Kind cluster and delegates the actual deployment
8+
# to deploy.sh with Kind-specific values.
9+
#
10+
# Prerequisites:
11+
# - kind: https://kind.sigs.k8s.io/docs/user/quick-start/#installation
12+
# - kubectl
13+
# - helm
14+
15+
set -e
16+
17+
CLUSTER_NAME="${CLUSTER_NAME:-otel-demo}"
18+
export NAMESPACE="${NAMESPACE:-otel-demo}"
19+
export RELEASE_NAME="${RELEASE_NAME:-opentelemetry-demo}"
20+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
21+
22+
# Check prerequisites
23+
command -v kind >/dev/null 2>&1 || { echo "Error: kind is not installed. See https://kind.sigs.k8s.io/docs/user/quick-start/#installation"; exit 1; }
24+
command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl is not installed."; exit 1; }
25+
command -v helm >/dev/null 2>&1 || { echo "Error: helm is not installed."; exit 1; }
26+
27+
echo "=== OpenTelemetry Demo on Kind ==="
28+
echo "Cluster: $CLUSTER_NAME"
29+
echo ""
30+
31+
# Create Kind cluster if it doesn't exist
32+
if ! kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then
33+
echo "Creating Kind cluster '$CLUSTER_NAME'..."
34+
kind create cluster --config "$SCRIPT_DIR/kind-config.yaml" --name "$CLUSTER_NAME"
35+
echo ""
36+
else
37+
echo "Kind cluster '$CLUSTER_NAME' already exists."
38+
kubectl config use-context "kind-${CLUSTER_NAME}"
39+
echo ""
40+
fi
41+
42+
# Deploy using the shared script with Kind-specific values
43+
"$SCRIPT_DIR/deploy.sh" \
44+
-f "$SCRIPT_DIR/values-kind.yaml" \
45+
--timeout 10m
46+
47+
# Wait for pods
48+
echo ""
49+
echo "Waiting for pods to be ready..."
50+
kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance="$RELEASE_NAME" \
51+
--namespace "$NAMESPACE" --timeout=5m 2>/dev/null || true
52+
53+
echo ""
54+
echo "Access the demo:"
55+
echo " Frontend: http://localhost:8080 (via Kind NodePort)"
56+
echo ""
57+
echo "For Grafana, Prometheus, Jaeger use port-forward:"
58+
echo " kubectl port-forward svc/grafana 3000:80 -n $NAMESPACE"
59+
echo " kubectl port-forward svc/prometheus 9090:9090 -n $NAMESPACE"
60+
echo " kubectl port-forward svc/jaeger 16686:16686 -n $NAMESPACE"
61+
echo ""
62+
echo "View pods:"
63+
echo " kubectl get pods -n $NAMESPACE"
64+
echo ""
65+
echo "Delete cluster when done:"
66+
echo " kind delete cluster --name $CLUSTER_NAME"

kubernetes/deploy.sh

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/bin/sh
2+
# Copyright The OpenTelemetry Authors
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Deploy OpenTelemetry Demo to a Kubernetes cluster
6+
#
7+
# This script:
8+
# 1. Installs/upgrades the Helm chart with info() function values
9+
# 2. Deploys custom Grafana dashboards that use the info() function
10+
#
11+
# Usage:
12+
# kubernetes/deploy.sh # Basic deploy
13+
# kubernetes/deploy.sh -f kubernetes/values-kind.yaml # Extra values
14+
#
15+
# Extra arguments are passed directly to helm upgrade.
16+
#
17+
# Environment variables:
18+
# NAMESPACE - Kubernetes namespace (default: otel-demo)
19+
# RELEASE_NAME - Helm release name (default: opentelemetry-demo)
20+
21+
set -e
22+
23+
NAMESPACE="${NAMESPACE:-otel-demo}"
24+
RELEASE_NAME="${RELEASE_NAME:-opentelemetry-demo}"
25+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
26+
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
27+
28+
echo "=== Deploying OpenTelemetry Demo ==="
29+
echo "Namespace: $NAMESPACE"
30+
echo "Release: $RELEASE_NAME"
31+
echo ""
32+
33+
# Add Helm repo if not already added
34+
echo "Adding Helm repository..."
35+
helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts 2>/dev/null || true
36+
helm repo update
37+
38+
# Create namespace if it doesn't exist
39+
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
40+
41+
# Install/upgrade the Helm chart
42+
echo ""
43+
echo "Installing/upgrading Helm chart..."
44+
helm upgrade --install "$RELEASE_NAME" open-telemetry/opentelemetry-demo \
45+
--namespace "$NAMESPACE" \
46+
-f "$SCRIPT_DIR/values-info-function.yaml" \
47+
"$@" \
48+
--wait
49+
50+
# Deploy custom dashboards as ConfigMaps.
51+
# Delete conflicting dashboards from Helm chart that don't use info() function.
52+
echo ""
53+
echo "Deploying custom Grafana dashboards..."
54+
echo " - Removing default Helm chart dashboards..."
55+
kubectl delete configmap grafana-dashboard-apm-dashboard --namespace "$NAMESPACE" 2>/dev/null || true
56+
kubectl delete configmap grafana-dashboard-postgresql-dashboard --namespace "$NAMESPACE" 2>/dev/null || true
57+
58+
echo " - APM Dashboard"
59+
kubectl create configmap apm-dashboard \
60+
--from-file=apm-dashboard.json="$REPO_ROOT/src/grafana/provisioning/dashboards/demo/apm-dashboard.json" \
61+
--namespace "$NAMESPACE" \
62+
--dry-run=client -o yaml | kubectl apply -f -
63+
kubectl label configmap apm-dashboard grafana_dashboard=1 --namespace "$NAMESPACE" --overwrite
64+
65+
echo " - PostgreSQL Dashboard"
66+
kubectl create configmap postgresql-dashboard \
67+
--from-file=postgresql-dashboard.json="$REPO_ROOT/src/grafana/provisioning/dashboards/demo/postgresql-dashboard.json" \
68+
--namespace "$NAMESPACE" \
69+
--dry-run=client -o yaml | kubectl apply -f -
70+
kubectl label configmap postgresql-dashboard grafana_dashboard=1 --namespace "$NAMESPACE" --overwrite
71+
72+
# Restart Grafana to pick up the new dashboards
73+
echo ""
74+
echo "Restarting Grafana to load dashboards..."
75+
kubectl rollout restart deployment/grafana --namespace "$NAMESPACE" 2>/dev/null || \
76+
kubectl rollout restart deployment/"$RELEASE_NAME"-grafana --namespace "$NAMESPACE" 2>/dev/null || \
77+
echo " (Could not restart Grafana - dashboards will load on next restart)"
78+
79+
echo ""
80+
echo "=== Deployment complete ==="
81+
echo ""
82+
echo "Access the demo:"
83+
echo " kubectl port-forward svc/frontend-proxy 8080:8080 -n $NAMESPACE"
84+
echo " Open http://localhost:8080"
85+
echo ""
86+
echo "Access Grafana:"
87+
echo " kubectl port-forward svc/grafana 3000:80 -n $NAMESPACE"
88+
echo " Open http://localhost:3000 (admin/admin)"

kubernetes/kind-config.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright The OpenTelemetry Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Kind cluster configuration for OpenTelemetry Demo
5+
# Creates a cluster with port mapping for the frontend proxy
6+
#
7+
# Usage:
8+
# kind create cluster --config kubernetes/kind-config.yaml --name otel-demo
9+
#
10+
kind: Cluster
11+
apiVersion: kind.x-k8s.io/v1alpha4
12+
nodes:
13+
- role: control-plane
14+
extraPortMappings:
15+
# Frontend proxy (main entry point) - exposed via NodePort
16+
- containerPort: 30080
17+
hostPort: 8080
18+
protocol: TCP
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Copyright The OpenTelemetry Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Helm values override for testing the experimental Prometheus info() function
5+
# This configuration uses Prometheus v3.10.0+ with the info() function fix
6+
# and removes workaround resource attribute promotions.
7+
#
8+
# Usage:
9+
# helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
10+
# helm repo update
11+
# helm install opentelemetry-demo open-telemetry/opentelemetry-demo \
12+
# --namespace otel-demo --create-namespace \
13+
# -f kubernetes/values-info-function.yaml
14+
#
15+
# Note: This requires Prometheus v3.10.0+ which includes the info() function fix.
16+
# See https://github.com/prometheus/prometheus/pull/17817
17+
18+
# OTel Collector configuration overrides
19+
opentelemetry-collector:
20+
config:
21+
processors:
22+
# Don't override host.name from the original service with the collector's hostname
23+
resourcedetection:
24+
detectors: [env, system]
25+
override: false
26+
# Set host.name from k8s.pod.name since the resourcedetection processor
27+
# would otherwise set it to the collector's hostname.
28+
# Also preserve the existing service.instance.id setting from the chart.
29+
resource:
30+
attributes:
31+
- key: service.instance.id
32+
from_attribute: k8s.pod.uid
33+
action: insert
34+
- key: host.name
35+
from_attribute: k8s.pod.name
36+
action: upsert
37+
# Add metric_statements to the transform processor for info() support:
38+
# 1. Set service.name for PostgreSQL receiver metrics (which lack it)
39+
# 2. Construct unique service.instance.id per PostgreSQL resource scope
40+
# to prevent duplicate target_info entries
41+
# 3. Set default service.instance.id for any service missing it
42+
# Add metric_statements to the transform processor for info() support:
43+
# 1. Drop attributes that k8sattributes adds inconsistently across resource
44+
# scopes, which causes duplicate target_info entries with the same
45+
# identifying labels but different non-identifying attributes
46+
# 2. Set service.name for PostgreSQL receiver metrics (which lack it)
47+
# 3. Construct unique service.instance.id per PostgreSQL resource scope
48+
# to prevent duplicate target_info entries
49+
# 4. Set default service.instance.id for any service missing it
50+
transform:
51+
metric_statements:
52+
- context: resource
53+
statements:
54+
- delete_key(attributes, "container.id")
55+
- delete_key(attributes, "process.command")
56+
- delete_key(attributes, "process.pid")
57+
- delete_key(attributes, "process.runtime.description")
58+
- delete_key(attributes, "process.runtime.name")
59+
- delete_key(attributes, "process.runtime.version")
60+
- delete_key(attributes, "telemetry.sdk.language")
61+
- delete_key(attributes, "telemetry.sdk.name")
62+
- delete_key(attributes, "telemetry.sdk.version")
63+
- >-
64+
set(attributes["service.name"], "postgresql")
65+
where attributes["service.name"] == nil
66+
and attributes["postgresql.database.name"] != nil
67+
- >-
68+
set(attributes["service.instance.id"],
69+
Concat([attributes["service.name"], "/",
70+
attributes["postgresql.database.name"], "/",
71+
attributes["postgresql.table.name"], "/",
72+
attributes["postgresql.index.name"]], ""))
73+
where attributes["service.name"] == "postgresql"
74+
and attributes["postgresql.index.name"] != nil
75+
- >-
76+
set(attributes["service.instance.id"],
77+
Concat([attributes["service.name"], "/",
78+
attributes["postgresql.database.name"], "/",
79+
attributes["postgresql.table.name"]], ""))
80+
where attributes["service.name"] == "postgresql"
81+
and attributes["postgresql.table.name"] != nil
82+
and attributes["postgresql.index.name"] == nil
83+
- >-
84+
set(attributes["service.instance.id"],
85+
Concat([attributes["service.name"], "/",
86+
attributes["postgresql.database.name"]], ""))
87+
where attributes["service.name"] == "postgresql"
88+
and attributes["postgresql.database.name"] != nil
89+
and attributes["postgresql.table.name"] == nil
90+
- >-
91+
set(attributes["service.instance.id"],
92+
attributes["service.name"])
93+
where attributes["service.instance.id"] == nil
94+
and attributes["service.name"] != nil
95+
service:
96+
pipelines:
97+
metrics:
98+
processors: [k8sattributes, memory_limiter, resourcedetection, resource, transform, batch]
99+
100+
prometheus:
101+
server:
102+
# Use Prometheus v3.10.0+ which includes the info() function bug fix
103+
# (https://github.com/prometheus/prometheus/pull/17817)
104+
image:
105+
repository: quay.io/prometheus/prometheus
106+
tag: v3.11.1
107+
108+
# Enable experimental PromQL functions including info()
109+
extraFlags:
110+
- "enable-feature=exemplar-storage"
111+
- "enable-feature=promql-experimental-functions"
112+
- "web.enable-otlp-receiver"
113+
114+
# OTLP receiver configuration
115+
# Resource attributes are NOT promoted to labels on metrics.
116+
# Instead, use the experimental info() PromQL function to enrich metrics with
117+
# labels from target_info at query time. This reduces label cardinality while
118+
# still allowing access to resource attributes in queries.
119+
otlp:
120+
keep_identifying_resource_attributes: true
121+
promote_resource_attributes:
122+
# Kafka resource attributes produced by the OTel Collector Kafka receiver
123+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kafkametricsreceiver
124+
- kafka.cluster.alias
125+
126+
# See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.142.0/connector/spanmetricsconnector/README.md#known-limitation-the-single-writer-principle
127+
- collector.instance.id
128+
129+
# host.name is needed for system/host metrics from the hostmetrics receiver,
130+
# which cannot use info() because they lack service.name to match target_info.
131+
- host.name
132+
133+
# Allow out-of-order ingestion for metrics that may arrive late
134+
tsdb:
135+
out_of_order_time_window: 30m
136+
137+
# Grafana configuration with custom dashboards
138+
grafana:
139+
# Enable the sidecar to load dashboards from ConfigMaps
140+
sidecar:
141+
dashboards:
142+
enabled: true
143+
# Label that ConfigMaps must have to be picked up
144+
label: grafana_dashboard
145+
# Search in all namespaces
146+
searchNamespace: ALL
147+
148+
# Dashboard providers configuration
149+
dashboardProviders:
150+
dashboardproviders.yaml:
151+
apiVersion: 1
152+
providers:
153+
- name: 'custom'
154+
orgId: 1
155+
folder: 'Custom'
156+
type: file
157+
disableDeletion: false
158+
editable: true
159+
options:
160+
path: /var/lib/grafana/dashboards/custom
161+
162+
# Note: The APM and PostgreSQL dashboards with info() function queries
163+
# need to be deployed as ConfigMaps. See the dashboards in:
164+
# - src/grafana/provisioning/dashboards/demo/apm-dashboard.json
165+
# - src/grafana/provisioning/dashboards/demo/postgresql-dashboard.json
166+
#
167+
# To deploy them, create ConfigMaps like:
168+
#
169+
# kubectl create configmap apm-dashboard \
170+
# --from-file=apm-dashboard.json=src/grafana/provisioning/dashboards/demo/apm-dashboard.json \
171+
# -n otel-demo
172+
# kubectl label configmap apm-dashboard grafana_dashboard=1 -n otel-demo
173+
#
174+
# kubectl create configmap postgresql-dashboard \
175+
# --from-file=postgresql-dashboard.json=src/grafana/provisioning/dashboards/demo/postgresql-dashboard.json \
176+
# -n otel-demo
177+
# kubectl label configmap postgresql-dashboard grafana_dashboard=1 -n otel-demo

kubernetes/values-kind.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright The OpenTelemetry Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Additional Helm values for Kind deployment
5+
# Use with: -f values-info-function.yaml -f values-kind.yaml
6+
#
7+
# This configures the frontend-proxy service as NodePort for Kind access
8+
# and increases memory limits for services that need more than the defaults
9+
10+
components:
11+
frontend-proxy:
12+
service:
13+
type: NodePort
14+
nodePort: 30080
15+
# Increase memory limits for services that OOMKill with defaults
16+
product-catalog:
17+
resources:
18+
limits:
19+
memory: 100Mi
20+
flagd:
21+
resources:
22+
limits:
23+
memory: 500Mi
24+
# Disable flagd-ui sidecar - it OOMKills even with 1Gi limit
25+
sidecarContainers: []

0 commit comments

Comments
 (0)