k8s-ubuntu-24.04-kubernetes-install-my-bash-scripts-shell-script-bash-script/PROMETHEUS.YAML at main · anjilinux/k8s-ubuntu-24.04-kubernetes-install-my-bash-scripts-shell-script-bash-script · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
cat /etc/prometheus/prometheus.yml
root@master1:~# cat /etc/prometheus/prometheus.yml
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:   ['localhost:9093']

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
   - "alerts.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["127.0.0.1:9090"]
       # The label name is added as a label `label_name=<label_value>` to any timeseries scraped from this config.
        labels:
          app: "prometheus"

  # Node Exporter
#  - job_name: 'node_exporter'
 #   static_configs:
  #    - targets: ["localhost:9100"]
   #     labels:
    #      app: "node_exporter"


  - job_name: 'node_exporter'
    static_configs:
      - targets:
          - "192.168.31.67:9100"     # master1
          - "192.168.122.151:9100"   # master2
          - "192.168.122.198:9100"   # master3
          - "192.168.122.155:9100"   # worker2
          - "192.168.122.232:9100"   # worker1
        labels:
          app: "node_exporter"


  - job_name: "alertmanager"
    static_configs:
      - targets: ["localhost:9093"]

  - job_name: "jenkins"
    metrics_path: /prometheus
    static_configs:
      - targets: ["localhost:8080"]

  - job_name: 'mlflow-metrics'
    static_configs:
      - targets: ['localhost:9105']


###########################################################


  - job_name: 'kube-state-metrics'
    metrics_path: /metrics
    static_configs:
      - targets:
          - 192.168.31.67:30080

###########################################


  - job_name: 'kubelet-worker2'
    scheme: https
    metrics_path: /metrics
    static_configs:
      - targets:
          - 192.168.31.67:10250
    tls_config:
      insecure_skip_verify: true


#  - job_name: 'mlflow-server'
#    metrics_path: /metrics
#    static_configs:
#      - targets: ['localhost:5000']
root@master1:~#

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

root@master1:~# cat /etc/alertmanager/alertmanager.yml
global:
  smtp_smarthost: 'smtp.gmail.com:587'
  smtp_from: 'aanjireddy2024@gmail.com'
  smtp_auth_username: 'aanjireddy2024@gmail.com'
  smtp_auth_password: 'eaorjy'
  smtp_require_tls: true

route:
  receiver: 'email-alert'
  group_wait: 10s
  group_interval: 30s
  repeat_interval: 1h

receivers:
- name: 'email-alert'
  email_configs:
  - to: 'aanjireddy2024@gmail.com'
    send_resolved: true
root@master1:~#
root@master1:~#

root@master1:~#
root@master1:~# cat /etc/kubernetes/manifests/etcd.yaml
apiVersion: v1
kind: Pod
metadata:
  annotations:
    kubeadm.kubernetes.io/etcd.advertise-client-urls: https://192.168.31.67:2379
  creationTimestamp: null
  labels:
    component: etcd
    tier: control-plane
  name: etcd
  namespace: kube-system
spec:
  containers:
  - command:
    - etcd
    - --advertise-client-urls=https://192.168.31.67:2379
    - --cert-file=/etc/kubernetes/pki/etcd/server.crt
    - --client-cert-auth=true
    - --data-dir=/var/lib/etcd
    - --experimental-initial-corrupt-check=true
    - --experimental-watch-progress-notify-interval=5s
    - --initial-advertise-peer-urls=https://192.168.31.67:2380
    - --initial-cluster=master1=https://192.168.31.67:2380
    - --key-file=/etc/kubernetes/pki/etcd/server.key
    - --listen-client-urls=https://127.0.0.1:2379,https://192.168.31.67:2379
    - --listen-metrics-urls=http://127.0.0.1:2381
    - --listen-peer-urls=https://192.168.31.67:2380
    - --name=master1
    - --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
    - --peer-client-cert-auth=true
    - --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
    - --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    - --snapshot-count=10000
    - --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    image: registry.k8s.io/etcd:3.5.24-0
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /livez
        port: 2381
        scheme: HTTP
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    name: etcd
    readinessProbe:
      failureThreshold: 3
      httpGet:
        host: 127.0.0.1
        path: /readyz
        port: 2381
        scheme: HTTP
      periodSeconds: 1
      timeoutSeconds: 15
    resources:
      requests:
        cpu: 100m
        memory: 100Mi
    startupProbe:
      failureThreshold: 24
      httpGet:
        host: 127.0.0.1
        path: /readyz
        port: 2381
        scheme: HTTP
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 15
    volumeMounts:
    - mountPath: /var/lib/etcd
      name: etcd-data
    - mountPath: /etc/kubernetes/pki/etcd
      name: etcd-certs
  hostNetwork: true
  priority: 2000001000
  priorityClassName: system-node-critical
  securityContext:
    seccompProfile:
      type: RuntimeDefault
  volumes:
  - hostPath:
      path: /etc/kubernetes/pki/etcd
      type: DirectoryOrCreate
    name: etcd-certs
  - hostPath:
      path: /var/lib/etcd
      type: DirectoryOrCreate
    name: etcd-data
Backup Prometheus config