-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPROMETHEUS.YAML
More file actions
214 lines (187 loc) · 5.58 KB
/
PROMETHEUS.YAML
File metadata and controls
214 lines (187 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
cat /etc/prometheus/prometheus.yml
root@master1:~# cat /etc/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "alerts.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["127.0.0.1:9090"]
# The label name is added as a label `label_name=<label_value>` to any timeseries scraped from this config.
labels:
app: "prometheus"
# Node Exporter
# - job_name: 'node_exporter'
# static_configs:
# - targets: ["localhost:9100"]
# labels:
# app: "node_exporter"
- job_name: 'node_exporter'
static_configs:
- targets:
- "192.168.31.67:9100" # master1
- "192.168.122.151:9100" # master2
- "192.168.122.198:9100" # master3
- "192.168.122.155:9100" # worker2
- "192.168.122.232:9100" # worker1
labels:
app: "node_exporter"
- job_name: "alertmanager"
static_configs:
- targets: ["localhost:9093"]
- job_name: "jenkins"
metrics_path: /prometheus
static_configs:
- targets: ["localhost:8080"]
- job_name: 'mlflow-metrics'
static_configs:
- targets: ['localhost:9105']
###########################################################
- job_name: 'kube-state-metrics'
metrics_path: /metrics
static_configs:
- targets:
- 192.168.31.67:30080
###########################################
- job_name: 'kubelet-worker2'
scheme: https
metrics_path: /metrics
static_configs:
- targets:
- 192.168.31.67:10250
tls_config:
insecure_skip_verify: true
# - job_name: 'mlflow-server'
# metrics_path: /metrics
# static_configs:
# - targets: ['localhost:5000']
root@master1:~#
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
root@master1:~# cat /etc/alertmanager/alertmanager.yml
global:
smtp_smarthost: 'smtp.gmail.com:587'
smtp_from: 'aanjireddy2024@gmail.com'
smtp_auth_username: 'aanjireddy2024@gmail.com'
smtp_auth_password: 'eaorjy'
smtp_require_tls: true
route:
receiver: 'email-alert'
group_wait: 10s
group_interval: 30s
repeat_interval: 1h
receivers:
- name: 'email-alert'
email_configs:
- to: 'aanjireddy2024@gmail.com'
send_resolved: true
root@master1:~#
root@master1:~#
root@master1:~#
root@master1:~# cat /etc/kubernetes/manifests/etcd.yaml
apiVersion: v1
kind: Pod
metadata:
annotations:
kubeadm.kubernetes.io/etcd.advertise-client-urls: https://192.168.31.67:2379
creationTimestamp: null
labels:
component: etcd
tier: control-plane
name: etcd
namespace: kube-system
spec:
containers:
- command:
- etcd
- --advertise-client-urls=https://192.168.31.67:2379
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
- --client-cert-auth=true
- --data-dir=/var/lib/etcd
- --experimental-initial-corrupt-check=true
- --experimental-watch-progress-notify-interval=5s
- --initial-advertise-peer-urls=https://192.168.31.67:2380
- --initial-cluster=master1=https://192.168.31.67:2380
- --key-file=/etc/kubernetes/pki/etcd/server.key
- --listen-client-urls=https://127.0.0.1:2379,https://192.168.31.67:2379
- --listen-metrics-urls=http://127.0.0.1:2381
- --listen-peer-urls=https://192.168.31.67:2380
- --name=master1
- --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
- --peer-client-cert-auth=true
- --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
- --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
- --snapshot-count=10000
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
image: registry.k8s.io/etcd:3.5.24-0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /livez
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: etcd
readinessProbe:
failureThreshold: 3
httpGet:
host: 127.0.0.1
path: /readyz
port: 2381
scheme: HTTP
periodSeconds: 1
timeoutSeconds: 15
resources:
requests:
cpu: 100m
memory: 100Mi
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /readyz
port: 2381
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /var/lib/etcd
name: etcd-data
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
hostNetwork: true
priority: 2000001000
priorityClassName: system-node-critical
securityContext:
seccompProfile:
type: RuntimeDefault
volumes:
- hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
name: etcd-certs
- hostPath:
path: /var/lib/etcd
type: DirectoryOrCreate
name: etcd-data
Backup Prometheus config