[root@k8s-master1 promethes]# cat prometheus-rules.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rules
namespace: kube-system
data:
general.rules: |
groups:
- name: general.rules
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
severity: error
annotations:
summary: "Instance {
{ $labels.instance }} 停止工作"
description: "{
{ $labels.instance }} job {
{ $labels.job }} 已经停止5分钟以上."
node.rules: |
groups:
- name: node.rules
rules:
- alert: NodeFilesystemUsage
expr: |
node_filesystem_avail{fstype=~"ext.|xfs",job="kubernetes-service-endpoints"}
/ node_filesystem_size{fstype=~"ext.|xfs",job="kubernetes-service-endpoints"}
* 100 <= 10
for: 2m
labels:
severity: critical