k8s部署prometheus
1.创建pv、pvc
apiVersion: v1
kind: PersistentVolume
metadata:
name: prometheus
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
nfs:
server: 10.0.0.10
path: /nfsdata/
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus
namespace: kube-ops
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
2.配置rbac
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: kube-ops
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: ""
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-ops
3.创建service
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: kube-ops
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
spec:
selector:
app: prometheus
type: NodePort
ports:
- name: http
port: 9090
targetPort: http
nodePort: 30002
4.创建configmap
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-conf
namespace: kube-ops
data:
rules.yml: |
groups:
- name: 系统硬件告警
rules:
- alert: NodeFilesystemUsage
expr: (node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"} ) / node_filesystem_size_bytes{mountpoint="/rootfs"} * 100 > 80
for: 10s
labels:
filesystem: node
annotations:
summary: "{{ $labels.instance }}:磁盘使用量"
description: "{{ $labels.instance }}: rootfs使用{{ $value }},大于总容量的80%"
- alert: NodeMemoryUsage
expr: (node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / node_memory_MemTotal_bytes * 100 > 90
for: 10s
labels:
team: node
annotations:
summary: "{{ $labels.instance }}: node节点内存使用过高"
description: "{{ $labels.instance }}: 内存使用大于90%,当前已用{{ $value }}%"
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
rule_files:
- "/etc/prometheus/rules.yml"
# - "second.rules"
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
- job_name: node_exporter
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: "(.*):10250"
target_label: __address__
replacement: "${1}:9100"
- action: labelmap
regex: "__meta_kubernetes_node_label_(.*)"
- job_name: 'kubernetes-kubelet'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- action: replace
source_labels: [__meta_kubernetes_node_name]
regex: (.*)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- action: replace
source_labels: [__address__]
target_label: __address__
replacement: kubernetes.default.svc:443
- job_name: 'kubernetes-api-services'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: replace
source_labels: [__address__]
target_label: __address__
replacement: kubernetes.default:443
- action: keep
source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_endpoint_port_name,__meta_kubernetes_service_name]
regex: default;https;kubernetes
- action: labelmap
regex: __meta_kubernetes_(.+)
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
# 将label标签中的端口替换为annotations中指定的端口
- action: replace
source_labels: [__address__,__meta_kubernetes_service_annotation_prometheus_io_port]
target_label: __address__
regex: (.*?):(\d+);(\d+)
replacement: ${1}:${3}
# 动态获取scheme,确保http和https都可以进行采集
- action: replace
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
5.部署node_exporter
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
namespace: kube-ops
labels:
name: node-exporter
spec:
template:
metadata:
labels:
name: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true
containers:
- name: node-exporter
image: prom/node-exporter:v0.16.0
ports:
- containerPort: 9100
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
6.部署prometheus deployment
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: prometheus
namespace: kube-ops
labels:
app: prometheus
spec:
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: prom/prometheus:v2.15.2
command:
- "/bin/prometheus"
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention=7d"
- "--web.enable-admin-api"
- "--web.enable-lifecycle"
ports:
- name: http
containerPort: 9090
protocol: TCP
volumeMounts:
- name: config
mountPath: "/etc/prometheus"
- name: data
subPath: prometheus
mountPath: "/prometheus"
resources:
requests:
cpu: 0.05
memory: 512Mi
limits:
cpu: 1
memory: 2Gi
volumes:
- name: config
configMap:
name: prometheus-conf
- name: data
persistentVolumeClaim:
claimName: prometheus
最后更新于 2020-08-26 02:57:47 并被添加「k8s prometheus」标签,已有 186 位童鞋阅读过。
本站使用「署名 4.0 国际」创作共享协议,可自由转载、引用,但需署名作者且注明文章出处
此处评论已关闭