From 30fd5c6022e76a01b2bac714bf7ba7ff633380b2 Mon Sep 17 00:00:00 2001 From: "Ankush.upadhyay" Date: Fri, 14 Feb 2025 19:00:15 +0530 Subject: [PATCH 1/2] Added thanos feature for prometheus HA --- README.md | 8 +- examples/complete/helm/prometheus.yaml | 21 +- examples/complete/helm/thanos.yaml | 93 ++ examples/complete/main.tf | 17 +- grafana/dashboards/analytics_nginx_logs.json | 2 +- grafana/dashboards/ingress_nginx.json | 2 +- grafana/dashboards/k8s_view_global.json | 90 +- grafana/dashboards/k8s_view_namespace.json | 2 +- grafana/dashboards/k8s_view_pods.json | 2 +- .../dashboards/karpenter_node_dashboard.json | 1189 +++++++++++++++++ grafana/dashboards/nodegroup.json | 393 +++++- helm/values/prometheus/thanos/values.yaml | 1135 ++++++++++++++++ helm/values/prometheus/values.yaml | 32 +- helm/values/thanos/thanos.yaml | 823 ++++++++++++ main.tf | 77 +- thanos.tf | 245 ++++ variables.tf | 24 +- 17 files changed, 4011 insertions(+), 144 deletions(-) create mode 100644 examples/complete/helm/thanos.yaml create mode 100644 grafana/dashboards/karpenter_node_dashboard.json create mode 100644 helm/values/prometheus/thanos/values.yaml create mode 100644 helm/values/thanos/thanos.yaml create mode 100644 thanos.tf diff --git a/README.md b/README.md index 53b2fe4..a2721ee 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,7 @@ No requirements. | [loki\_scalable\_s3\_bucket](#module\_loki\_scalable\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | 4.1.2 | | [s3\_bucket\_mimir](#module\_s3\_bucket\_mimir) | terraform-aws-modules/s3-bucket/aws | 4.1.2 | | [s3\_bucket\_temp](#module\_s3\_bucket\_temp) | terraform-aws-modules/s3-bucket/aws | 4.1.2 | +| [s3\_bucket\_thanos](#module\_s3\_bucket\_thanos) | terraform-aws-modules/s3-bucket/aws | 4.1.2 | ## Resources @@ -192,6 +193,7 @@ No requirements. | [helm_release.stackdriver_exporter](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [helm_release.statsd_exporter](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [helm_release.tempo](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [helm_release.thanos](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [kubernetes_config_map.argocd_dashboard](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | | [kubernetes_config_map.aws_acm](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | | [kubernetes_config_map.aws_alb](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | @@ -252,8 +254,8 @@ No requirements. | [blackbox\_exporter\_version](#input\_blackbox\_exporter\_version) | Version of the Blackbox exporter to deploy. | `string` | `"8.17.0"` | no | | [cloudwatch\_enabled](#input\_cloudwatch\_enabled) | Whether or not to add CloudWatch as datasource and add some default dashboards for AWS in Grafana. | `bool` | `false` | no | | [cluster\_name](#input\_cluster\_name) | Specifies the name of the EKS cluster. | `string` | n/a | yes | -| [deployment\_config](#input\_deployment\_config) | Configuration options for the Prometheus, Alertmanager, Loki, and Grafana deployments, including the hostname, storage class name, dashboard refresh interval, and S3 bucket configuration for Mimir. | `any` |
{
"alb_acm_certificate_arn": "",
"blackbox_values_yaml": "",
"dashboard_refresh_interval": "",
"grafana_enabled": true,
"grafana_ingress_load_balancer": "nlb",
"grafana_mimir_values_yaml": "",
"hostname": "",
"ingress_class_name": "",
"loki_hostname": "",
"loki_internal_ingress_enabled": false,
"loki_scalable_config": {
"loki_scalable_values": "",
"loki_scalable_version": "6.6.5",
"s3_bucket_name": "",
"s3_bucket_region": "",
"versioning_enabled": ""
},
"loki_values_yaml": "",
"mimir_s3_bucket_config": {
"s3_bucket_name": "",
"s3_bucket_region": "",
"s3_object_expiration": "",
"versioning_enabled": ""
},
"otel_config": {
"otel_collector_enabled": false,
"otel_operator_enabled": false
},
"private_alb_enabled": "",
"prometheus_hostname": "",
"prometheus_internal_ingress_enabled": false,
"prometheus_values_yaml": "",
"promtail_config": {
"promtail_values": "",
"promtail_version": "6.16.3"
},
"storage_class_name": "gp2",
"tempo_config": {
"s3_bucket_name": "",
"s3_bucket_region": "",
"s3_object_expiration": "",
"versioning_enabled": false
},
"tempo_values_yaml": ""
}
| no | -| [exporter\_config](#input\_exporter\_config) | allows enabling/disabling various exporters for scraping metrics, including Consul, MongoDB, Redis, and StatsD. | `map(any)` |
{
"argocd": false,
"blackbox": true,
"conntrack": false,
"consul": false,
"couchdb": false,
"druid": false,
"elasticsearch": true,
"ethtool_exporter": true,
"istio": false,
"jenkins": false,
"json": false,
"kafka": false,
"mongodb": true,
"mysql": true,
"nats": false,
"nifi": false,
"pingdom": false,
"postgres": false,
"prometheustosd": false,
"push_gateway": false,
"rabbitmq": false,
"redis": true,
"snmp": false,
"stackdriver": false,
"statsd": true
}
| no | +| [deployment\_config](#input\_deployment\_config) | Configuration options for the Prometheus, Alertmanager, Loki, and Grafana deployments, including the hostname, storage class name, dashboard refresh interval, and S3 bucket configuration for Mimir. | `any` |
{
"alb_acm_certificate_arn": "",
"blackbox_values_yaml": "",
"dashboard_refresh_interval": "",
"grafana_enabled": true,
"grafana_ingress_load_balancer": "nlb",
"grafana_mimir_values_yaml": "",
"hostname": "",
"ingress_class_name": "",
"loki_hostname": "",
"loki_internal_ingress_enabled": false,
"loki_scalable_config": {
"loki_scalable_values": "",
"loki_scalable_version": "6.6.5",
"s3_bucket_name": "",
"s3_bucket_region": "",
"versioning_enabled": ""
},
"loki_values_yaml": "",
"mimir_s3_bucket_config": {
"s3_bucket_name": "",
"s3_bucket_region": "",
"s3_object_expiration": "",
"versioning_enabled": ""
},
"otel_config": {
"otel_collector_enabled": false,
"otel_operator_enabled": false
},
"private_alb_enabled": "",
"prometheus_hostname": "",
"prometheus_internal_ingress_enabled": false,
"prometheus_replicas": 1,
"prometheus_shards": 1,
"prometheus_values_yaml": "",
"promtail_config": {
"promtail_values": "",
"promtail_version": "6.16.3"
},
"storage_class_name": "gp3",
"tempo_config": {
"s3_bucket_name": "",
"s3_bucket_region": "",
"s3_object_expiration": "",
"versioning_enabled": false
},
"tempo_values_yaml": "",
"thanos_configs": {
"s3_bucket_name": "",
"s3_bucket_region": "",
"s3_object_expiration": "",
"versioning_enabled": ""
},
"thanos_values_yaml": ""
}
| no | +| [exporter\_config](#input\_exporter\_config) | allows enabling/disabling various exporters for scraping metrics, including Consul, MongoDB, Redis, and StatsD. | `map(any)` |
{
"argocd": false,
"blackbox": true,
"conntrack": false,
"consul": false,
"couchdb": false,
"druid": false,
"elasticsearch": true,
"ethtool_exporter": true,
"istio": false,
"jenkins": false,
"json": false,
"kafka": false,
"mongodb": true,
"mysql": true,
"nats": false,
"nifi": false,
"pingdom": false,
"postgres": false,
"prometheustosd": false,
"push_gateway": false,
"rabbitmq": false,
"redis": true,
"snmp": false,
"stackdriver": false,
"statsd": true
}
| no | | [grafana\_mimir\_enabled](#input\_grafana\_mimir\_enabled) | Specify whether or not to deploy the Grafana Mimir plugin. | `bool` | `false` | no | | [grafana\_mimir\_version](#input\_grafana\_mimir\_version) | Version of the Grafana Mimir plugin to deploy. | `string` | `"5.4.0"` | no | | [kube\_prometheus\_stack\_enabled](#input\_kube\_prometheus\_stack\_enabled) | Specify whether or not to deploy Grafana as part of the Prometheus and Alertmanager stack. | `bool` | `false` | no | @@ -263,6 +265,8 @@ No requirements. | [pgl\_namespace](#input\_pgl\_namespace) | Name of the Kubernetes namespace where the Grafana deployment will be deployed. | `string` | `"monitoring"` | no | | [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | Version of the Prometheus chart to deploy. | `string` | `"61.1.0"` | no | | [tempo\_enabled](#input\_tempo\_enabled) | Enable Grafana Tempo | `bool` | `false` | no | +| [thanos\_chart\_version](#input\_thanos\_chart\_version) | helm chart version for thanos | `string` | `"15.10.2"` | no | +| [thanos\_enabled](#input\_thanos\_enabled) | Choose whether to enable Thanos HA setup for kube-prometheus stack | `bool` | `false` | no | ## Outputs diff --git a/examples/complete/helm/prometheus.yaml b/examples/complete/helm/prometheus.yaml index 1870193..c830f9d 100644 --- a/examples/complete/helm/prometheus.yaml +++ b/examples/complete/helm/prometheus.yaml @@ -198,7 +198,7 @@ prometheus: resources: requests: storage: 50Gi - retention: 30d + retention: 2d walCompression: true ## If true, the Operator won't process any Prometheus configuration changes ## @@ -226,3 +226,22 @@ prometheus: # prometheus-node-exporter: # rbac: # pspEnabled: true +thanosRuler: + thanosRulerSpec: + retention: 24h + resources: + requests: + memory: 200Mi + cpu: 100m + limits: + memory: 400Mi + cpu: 200m + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" diff --git a/examples/complete/helm/thanos.yaml b/examples/complete/helm/thanos.yaml new file mode 100644 index 0000000..6038496 --- /dev/null +++ b/examples/complete/helm/thanos.yaml @@ -0,0 +1,93 @@ +query: + containerPorts: + http: 10902 + grpc: 10901 + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" +queryFrontend: + enabled: true + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" +bucketweb: + refresh: 30m + timeout: 5m + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" +compactor: + retentionResolutionRaw: 30d + retentionResolution5m: 90d + retentionResolution1h: 1y + consistencyDelay: 5m + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + persistence: + size: 8Gi +storegateway: + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + persistence: + size: 8Gi diff --git a/examples/complete/main.tf b/examples/complete/main.tf index c3b6930..06b9a73 100644 --- a/examples/complete/main.tf +++ b/examples/complete/main.tf @@ -16,10 +16,11 @@ module "pgl" { version = "3.0.3" cluster_name = "" kube_prometheus_stack_enabled = true - loki_enabled = false - loki_scalable_enabled = true - grafana_mimir_enabled = true + loki_enabled = true + loki_scalable_enabled = false + grafana_mimir_enabled = false cloudwatch_enabled = true + thanos_enabled = false tempo_enabled = false deployment_config = { hostname = "grafana.squareops.com" @@ -28,9 +29,13 @@ module "pgl" { loki_values_yaml = file("./helm/loki.yaml") blackbox_values_yaml = file("./helm/blackbox.yaml") grafana_mimir_values_yaml = file("./helm/mimir.yaml") + thanos_values_yaml = file("./helm/thanos.yaml") tempo_values_yaml = file("./helm/tempo.yaml") + prometheus_replicas = 1 + prometheus_shards = 1 dashboard_refresh_interval = "" grafana_enabled = true + grafana_ha_enabled = true prometheus_hostname = "prometheus.com" prometheus_internal_ingress_enabled = false grafana_ingress_load_balancer = "nlb" ##Choose your load balancer type (e.g., NLB or ALB). If using ALB, ensure you provide the ACM certificate ARN for SSL. @@ -39,6 +44,12 @@ module "pgl" { private_alb_enabled = false # Set to true, when wanted to deploy PGL on ALB internal loki_internal_ingress_enabled = false loki_hostname = "loki.com" + thanos_configs = { + s3_bucket_name = "${local.environment}-${local.name}-thanos-bucket" + versioning_enabled = "false" + s3_bucket_region = "${local.region}" + s3_object_expiration = 90 + } mimir_s3_bucket_config = { s3_bucket_name = "${local.environment}-${local.name}-mimir-bucket" versioning_enabled = "false" diff --git a/grafana/dashboards/analytics_nginx_logs.json b/grafana/dashboards/analytics_nginx_logs.json index 36aca4d..b283574 100644 --- a/grafana/dashboards/analytics_nginx_logs.json +++ b/grafana/dashboards/analytics_nginx_logs.json @@ -2019,4 +2019,4 @@ "uid": "fgc-nginx-web-analytics", "version": 3, "weekStart": "" - } \ No newline at end of file + } diff --git a/grafana/dashboards/ingress_nginx.json b/grafana/dashboards/ingress_nginx.json index 6cbb4cd..e74a583 100644 --- a/grafana/dashboards/ingress_nginx.json +++ b/grafana/dashboards/ingress_nginx.json @@ -2366,4 +2366,4 @@ "uid": "k8s-nginx-ingress-prometheus-ng", "version": 5, "weekStart": "" -} \ No newline at end of file +} diff --git a/grafana/dashboards/k8s_view_global.json b/grafana/dashboards/k8s_view_global.json index 37d248e..8a5478d 100644 --- a/grafana/dashboards/k8s_view_global.json +++ b/grafana/dashboards/k8s_view_global.json @@ -25,7 +25,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 2859, + "id": 65, "links": [], "liveNow": false, "panels": [ @@ -66,7 +66,7 @@ "content": "
\n \n

$k8s_version

\n
", "mode": "html" }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -135,7 +135,7 @@ "sizing": "auto", "valueMode": "color" }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -284,7 +284,7 @@ "text": {}, "valueMode": "color" }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -430,7 +430,7 @@ "textMode": "value", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -530,6 +530,7 @@ "sortDesc": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -746,7 +747,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -818,7 +819,7 @@ "textMode": "value", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -887,7 +888,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -953,7 +954,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1109,7 +1110,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1268,7 +1269,7 @@ "textMode": "value", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1385,6 +1386,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1519,6 +1521,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1620,8 +1623,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1726,8 +1728,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1831,8 +1832,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1949,8 +1949,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2073,8 +2072,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2182,8 +2180,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2299,8 +2296,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2415,8 +2411,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2520,8 +2515,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2625,8 +2619,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2747,8 +2740,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2886,8 +2878,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3070,8 +3061,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3181,8 +3171,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3320,8 +3309,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3460,8 +3448,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3535,9 +3522,9 @@ "list": [ { "current": { - "selected": false, - "text": "Prometheus", - "value": "prometheus" + "selected": true, + "text": "default", + "value": "default" }, "hide": 0, "includeAll": false, @@ -3553,6 +3540,7 @@ }, { "current": { + "isNone": true, "selected": false, "text": "None", "value": "" @@ -3627,7 +3615,7 @@ }, { "current": { - "selected": false, + "selected": true, "text": [ "node-exporter" ], @@ -3658,9 +3646,10 @@ }, { "current": { + "isNone": true, "selected": false, - "text": "v1.30.2-eks-1552ad0", - "value": "v1.30.2-eks-1552ad0" + "text": "None", + "value": "" }, "datasource": { "type": "prometheus", @@ -3689,10 +3678,11 @@ "from": "now-1h", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": {}, "timezone": "", "title": "Kubernetes / Views / Global", "uid": "k8s_views_global", - "version": 7, + "version": 1, "weekStart": "" } diff --git a/grafana/dashboards/k8s_view_namespace.json b/grafana/dashboards/k8s_view_namespace.json index 8986b25..c8f31a6 100644 --- a/grafana/dashboards/k8s_view_namespace.json +++ b/grafana/dashboards/k8s_view_namespace.json @@ -2304,4 +2304,4 @@ "uid": "k8s_views_ns", "version": 35, "weekStart": "" - } \ No newline at end of file + } diff --git a/grafana/dashboards/k8s_view_pods.json b/grafana/dashboards/k8s_view_pods.json index f12d928..4251b71 100644 --- a/grafana/dashboards/k8s_view_pods.json +++ b/grafana/dashboards/k8s_view_pods.json @@ -2696,4 +2696,4 @@ "uid": "k8s_views_pods", "version": 28, "weekStart": "" - } \ No newline at end of file + } diff --git a/grafana/dashboards/karpenter_node_dashboard.json b/grafana/dashboards/karpenter_node_dashboard.json new file mode 100644 index 0000000..6880d6b --- /dev/null +++ b/grafana/dashboards/karpenter_node_dashboard.json @@ -0,0 +1,1189 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Grafana dashboard to monitor nodes grouped by their corresponding Karpenter managed node group.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 13548, + "graphTooltip": 0, + "id": 311, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kube_node_info{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Nodes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kube_pod_info{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pod Count - Total", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kubelet_running_pods{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pod Count - Running", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kube_pod_info{node=~\"$node\"}) - sum(kubelet_running_pods{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pod Count - Not Running", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(1 - (sum(rate(node_cpu_seconds_total{instance=~\"$instance:9100\", mode=\"idle\"}[5m])) by (instance) / sum(rate(node_cpu_seconds_total{instance=~\"$instance:9100\"}[5m])) by (instance))) * 100", + "interval": "", + "legendFormat": "{{instance}}-cpu-utilization", + "refId": "A" + } + ], + "title": "CPU Utilization", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(node_memory_MemTotal_bytes{instance=~\"$instance:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$instance:9100\"}) / node_memory_MemTotal_bytes{instance=~\"$instance:9100\"} * 100", + "interval": "", + "legendFormat": "{{instance}}-memory-utilization", + "refId": "A" + } + ], + "title": "Memory Utilization", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 15 + }, + "id": 6, + "interval": "", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(sum (node_filesystem_size_bytes{instance=~\"$instance:9100\"}) by (instance) - sum (node_filesystem_free_bytes{instance=~\"$instance:9100\"}) by (instance)) / sum (node_filesystem_size_bytes{instance=~\"$instance:9100\"}) by (instance) * 100", + "interval": "", + "legendFormat": "{{kubernetes_node}}-disk-utilization", + "refId": "B" + } + ], + "title": "Disk Utilization", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "mbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 15 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$instance:9100\"}[5m]) / (1024 * 1024)) by (instance)", + "interval": "", + "legendFormat": "{{instance}}-read-bytes", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$instance:9100\"}[5m]) / (1024 * 1024)) by (instance)", + "interval": "", + "legendFormat": "{{instance}}-write-bytes", + "refId": "B" + } + ], + "title": "Disk I/O", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "mbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 15 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"$instance:9100\",device=~\"eth.*\"}[2m]) / (1024 * 1024)) by (instance)", + "instant": false, + "interval": "", + "legendFormat": "{{instance}}-network-in", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"$instance:9100\",device=~\"eth.*\"}[2m]) / (1024 * 1024)) by (instance)", + "interval": "", + "legendFormat": "{{instance}}-network-out", + "refId": "B" + } + ], + "title": "Network I/O", + "type": "timeseries" + }, + { + "aliasColors": {}, + "autoMigrateFrom": "graph", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "node_load1{instance=~\"$instance:9100\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Load Avg (1m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "autoMigrateFrom": "graph", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 24 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "node_load5{instance=~\"$instance:9100\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Load Avg (5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "autoMigrateFrom": "graph", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 24 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "node_load15{instance=~\"$instance:9100\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Load Avg (15m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(kube_node_labels,label_karpenter_sh_nodepool)", + "hide": 0, + "includeAll": true, + "label": "", + "multi": true, + "name": "nodegroup", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_labels,label_karpenter_sh_nodepool)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(kube_node_labels{label_karpenter_sh_nodepool=~\"$nodegroup\"},node)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "node", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_labels{label_karpenter_sh_nodepool=~\"$nodegroup\"},node)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(kube_pod_info{node =~ \"$node\"}, host_ip)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(kube_pod_info{node =~ \"$node\"}, host_ip)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Karpenter Node (Groups)", + "uid": "K8s-CR-Nodesgroups", + "version": 2, + "weekStart": "" + } diff --git a/grafana/dashboards/nodegroup.json b/grafana/dashboards/nodegroup.json index c0a2d16..8968489 100644 --- a/grafana/dashboards/nodegroup.json +++ b/grafana/dashboards/nodegroup.json @@ -26,7 +26,7 @@ "fiscalYearStartMonth": 0, "gnetId": 13548, "graphTooltip": 0, - "id": 2851, + "id": 67, "links": [], "liveNow": false, "panels": [ @@ -106,6 +106,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -119,7 +120,7 @@ "editorMode": "code", "expr": "sum(kube_node_info{node=~\"$node\"})", "interval": "", - "legendFormat": "{{Node}}", + "legendFormat": "$node", "range": true, "refId": "A" } @@ -176,7 +177,7 @@ "textMode": "value_and_name", "wideLayout": false }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -185,7 +186,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "count by (label_eks_amazonaws_com_capacity_type) (kube_node_labels{label_eks_amazonaws_com_capacity_type!=\"\",label_eks_amazonaws_com_nodegroup=\"$nodegroup\"})", + "expr": "count by (label_eks_amazonaws_com_capacity_type) (kube_node_labels{label_eks_amazonaws_com_capacity_type!=\"\",label_eks_amazonaws_com_nodegroup=\"$nodegroup\", node=\"$node\"})", "interval": "", "legendFormat": "{{label_eks_amazonaws_com_capacity_type}}", "range": true, @@ -248,7 +249,7 @@ "textMode": "value_and_name", "wideLayout": false }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -257,7 +258,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "count by (label_beta_kubernetes_io_instance_type) (kube_node_labels{label_beta_kubernetes_io_instance_type!=\"\",label_eks_amazonaws_com_nodegroup=\"$nodegroup\",job=\"kube-state-metrics\"})", + "expr": "count by (label_beta_kubernetes_io_instance_type) (kube_node_labels{label_beta_kubernetes_io_instance_type!=\"\",label_eks_amazonaws_com_nodegroup=\"$nodegroup\",job=\"kube-state-metrics\", node=\"$node\"})", "interval": "", "legendFormat": "{{label_beta_kubernetes_io_instance_type}}", "range": true, @@ -312,7 +313,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -375,7 +376,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -445,7 +446,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -523,7 +524,7 @@ "overrides": [] }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, "y": 6 @@ -537,6 +538,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -617,7 +619,7 @@ "overrides": [] }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 12, "y": 6 @@ -631,6 +633,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -650,6 +653,289 @@ "title": "Memory Utilization", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\", node=\"$node\"})", + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\", node=\"$node\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(container_cpu_usage_seconds_total{node=\"$node\", cpu=\"total\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Usage", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(machine_cpu_cores{node=\"$node\"})", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "D" + } + ], + "title": "CPU Info on Node (in m)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlPu" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", node=\"$node\"})", + "instant": false, + "legendFormat": "Request", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\", node=\"$node\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(container_memory_usage_bytes{node=\"$node\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Usage", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "machine_memory_bytes{node=\"$node\"}", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "D" + } + ], + "title": "Memory Info on Node (in GB)", + "type": "timeseries" + }, { "datasource": { "uid": "$datasource" @@ -714,7 +1000,7 @@ "h": 9, "w": 8, "x": 0, - "y": 15 + "y": 20 }, "id": 6, "interval": "", @@ -726,6 +1012,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -809,7 +1096,7 @@ "h": 9, "w": 8, "x": 8, - "y": 15 + "y": 20 }, "id": 10, "options": { @@ -820,6 +1107,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -912,7 +1200,7 @@ "h": 9, "w": 8, "x": 16, - "y": 15 + "y": 20 }, "id": 12, "options": { @@ -923,6 +1211,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -998,8 +1287,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1015,7 +1303,7 @@ "h": 8, "w": 8, "x": 0, - "y": 24 + "y": 29 }, "id": 23, "options": { @@ -1091,8 +1379,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1108,7 +1395,7 @@ "h": 8, "w": 8, "x": 8, - "y": 24 + "y": 29 }, "id": 22, "options": { @@ -1184,8 +1471,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1201,7 +1487,7 @@ "h": 8, "w": 8, "x": 16, - "y": 24 + "y": 29 }, "id": 24, "options": { @@ -1240,8 +1526,27 @@ { "current": { "selected": true, - "text": "prod-Task-Services-ng", - "value": "prod-Task-Services-ng" + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Infra", + "value": "Infra" }, "datasource": { "uid": "$datasource" @@ -1268,13 +1573,9 @@ }, { "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "text": "ip-10-20-39-69.us-east-2.compute.internal", + "value": "ip-10-20-39-69.us-east-2.compute.internal" }, "datasource": { "type": "prometheus", @@ -1282,8 +1583,8 @@ }, "definition": "label_values(kube_node_labels{label_eks_amazonaws_com_nodegroup =~ \"$nodegroup\"}, node)", "hide": 0, - "includeAll": true, - "multi": true, + "includeAll": false, + "multi": false, "name": "node", "options": [], "query": { @@ -1327,25 +1628,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "current": { - "selected": true, - "text": "Prometheus-prod-atmosly", - "value": "cdverpho22sqoa" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" } ] }, @@ -1353,6 +1635,7 @@ "from": "now-5m", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "refresh_intervals": [ "10s", @@ -1369,6 +1652,6 @@ "timezone": "", "title": "Kubernetes / Compute Resources / Node (Groups)", "uid": "K8s-CR-Nodesgroups", - "version": 7, + "version": 1, "weekStart": "" -} \ No newline at end of file +} diff --git a/helm/values/prometheus/thanos/values.yaml b/helm/values/prometheus/thanos/values.yaml new file mode 100644 index 0000000..48adf58 --- /dev/null +++ b/helm/values/prometheus/thanos/values.yaml @@ -0,0 +1,1135 @@ +nameOverride: "prometheus" +fullnameOverride: "prometheus" +## Create default rules for monitoring the cluster +## +defaultRules: + create: true + rules: + alertmanager: true + etcd: true + configReloaders: true + general: true + k8sContainerCpuUsageSecondsTotal: true + k8sContainerMemoryCache: true + k8sContainerMemoryRss: true + k8sContainerMemorySwap: true + k8sContainerResource: true + k8sContainerMemoryWorkingSetBytes: true + k8sPodOwner: true + kubeApiserverAvailability: true + kubeApiserverBurnrate: true + kubeApiserverHistogram: true + kubeApiserverSlos: true + kubeControllerManager: true + kubelet: true + kubeProxy: true + kubePrometheusGeneral: true + kubePrometheusNodeRecording: true + kubernetesApps: true + kubernetesResources: true + kubernetesStorage: true + kubernetesSystem: true + kubeSchedulerAlerting: true + kubeSchedulerRecording: true + kubeStateMetrics: true + network: true + node: true + nodeExporterAlerting: true + nodeExporterRecording: true + prometheus: true + prometheusOperator: true + windows: false + + ## Disabled PrometheusRule alerts + disabled: {} + # KubeAPIDown: true + # NodeRAIDDegraded: true + +windowsMonitoring: + ## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter') + enabled: false + +## Configuration for prometheus-windows-exporter +## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-windows-exporter +## +prometheus-windows-exporter: + ## Enable ServiceMonitor and set Kubernetes label to use as a job label + ## + prometheus: + monitor: + enabled: true + jobLabel: jobLabel + + releaseLabel: true + + ## Set job label to 'windows-exporter' as required by the default Prometheus rules and Grafana dashboards + ## + podLabels: + jobLabel: windows-exporter + + ## Enable memory and container metrics as required by the default Prometheus rules and Grafana dashboards + ## + config: |- + collectors: + enabled: '[defaults],memory,container' + +## Component scraping etcd +## +kubeEtcd: + enabled: true + +## ref: https://prometheus.io/docs/alerting/alertmanager/ +## +alertmanager: + ## Deploy alertmanager + ## + enabled: true + ## Alertmanager configuration directives + ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file + ## https://prometheus.io/webtools/alerting/routing-tree-editor/ + ## + config: + global: + resolve_timeout: 5m + route: + group_wait: 30s + group_interval: 1m + repeat_interval: 12h + receiver: slack_others + routes: + - receiver: slack_critical + # continue: true + match: + severity: critical + - receiver: slack_warning + # continue: true + match: + severity: warning + # - receiver: email_alerts + # match_re: + # severity: critical|warning + receivers: + - name: slack_others + slack_configs: + - api_url: "https://hooks.slack.com/services/TB5FXBSUE/B041XD27KHV/A7Z4C8jUdEJhcqvxOvjiMC" + send_resolved: true + icon_url: https://avatars3.githubusercontent.com/u/3380462 + title: |- + [{{ .Status | toUpper }}{{ if eq .Status "firing" }}: {{ .Alerts.Firing | len }}{{ end }}] + text: |- + {{ range .Alerts }} + + *Alert Name :* {{ .Labels.alertname }} + + {{- if .Annotations.summary }} + *Alert Summary:* {{ .Annotations.summary }} + {{- end -}} + + {{- if .Annotations.description }} + *Alert Description:* {{ .Annotations.description }} + {{ else }} + *Alert Message:* {{ .Annotations.message }} + {{- end }} + + *Alert Details:* + {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` + {{ end }} + {{ end }} + + - name: slack_critical + slack_configs: + - api_url: "https://hooks.slack.com/services/TB5FXBSUE/B041XD27KHV/WcA7Z8jUdEJhcqvxOvjiMC" + send_resolved: true + icon_url: https://avatars3.githubusercontent.com/u/3380462 + title: |- + [{{ .Status | toUpper }}{{ if eq .Status "firing" }}: {{ .Alerts.Firing | len }}{{ end }}] + text: |- + {{ range .Alerts }} + + *Alert Name :* {{ .Labels.alertname }} + + {{- if .Annotations.summary }} + *Alert Summary:* {{ .Annotations.summary }} + {{- end -}} + + {{- if .Annotations.description }} + *Alert Description:* {{ .Annotations.description }} + {{ else }} + *Alert Message:* {{ .Annotations.message }} + {{- end }} + + *Alert Details:* + {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` + {{ end }} + {{ end }} + + - name: slack_warning + slack_configs: + - api_url: "https://hooks.slack.com/services/TB5FXBSUE/B041XD27KHV/WcA7Z4C8jEJhcqvxOvjiMC" + send_resolved: true + icon_url: https://avatars3.githubusercontent.com/u/3380462 + title: |- + [{{ .Status | toUpper }}{{ if eq .Status "firing" }}: {{ .Alerts.Firing | len }}{{ end }}] + text: |- + {{ range .Alerts }} + + *Alert Name :* {{ .Labels.alertname }} + + {{- if .Annotations.summary }} + *Alert Summary:* {{ .Annotations.summary }} + {{- end -}} + + {{- if .Annotations.description }} + *Alert Description:* {{ .Annotations.description }} + {{ else }} + *Alert Message:* {{ .Annotations.message }} + {{- end }} + + *Alert Details:* + {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` + {{ end }} + {{ end }} + + + alertmanagerSpec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + +grafana: + enabled: ${grafana_enabled} + serviceAccount: + annotations: ${annotations} + priorityClassName: grafana-pod-critical + defaultDashboardsTimezone: browser + replicas: 1 + autoscaling: + enabled: ${grafana_ha_enabled} + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + targetAverageUtilization: 85 + - type: Resource + resource: + name: memory + targetAverageUtilization: 85 + + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + # Allow discovery in all namespaces for dashboards + searchNamespace: ALL + annotations: ${annotations} + # Support for new table panels, when enabled grafana auto migrates the old table panels to newer table panels + enableNewTablePanelSyntax: true + folder: /tmp/dashboards + folderAnnotation: grafana_folder + provider: + allowUiUpdates: true + foldersFromFilesStructure: true + + resources: + limits: + cpu: 200m + memory: 400Mi + requests: + cpu: 50m + memory: 100Mi + + datasources: + enabled: true + defaultDatasourceEnabled: true + isDefaultDatasource: false + # datasources.yaml: + # apiVersion: 1 + # datasources: + # - name: prometheus + # type: prometheus + # url: http://prometheus-operator-kube-p-prometheus.monitoring.svc.cluster.local:9090 + # access: proxy + # - name: loki + # type: loki + # url: http://loki:3100 + # access: proxy + + additionalDataSources: + ${indent(3, loki_datasource_config)} + ${indent(3, cw_datasource_config)} + ${indent(3, tempo_datasource_config)} + ${indent(3, thanos_datasource_config)} + + persistence: + enabled: true + storageClassName: ${storage_class_name} + size: 20Gi + + adminPassword: ${grafana_admin_password} + + ingress: + enabled: ${ingress_enabled} + ingressClassName: ${ingress_ingressClassName} + annotations: ${ingress_annotations} + hosts: ${ingress_hosts} + tls: ${ingress_tls} + + serviceMonitor: + enabled: true + labels: + release: prometheus-operator + + grafana.ini: + max_idle_connections: 500 + dashboards: + min_refresh_interval: ${min_refresh_interval} + server: + enable_gzip: true + + resources: + limits: + cpu: 1000m + memory: 3Gi + requests: + cpu: 200m + memory: 400Mi + + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - grafana + topologyKey: topology.kubernetes.io/zone + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + +kube-state-metrics: + metricLabelsAllowlist: + - pods=[*] + - nodes=[*] + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + +kubeProxy: + enabled: false + +kubeApiServer: + enabled: false + +kubeControllerManager: + enabled: false + +kubeScheduler: + enabled: false + +prometheusOperator: + createCustomResource: false + enabled: true + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + +nodeExporter: + enabled: true + resources: + limits: + cpu: 200m + memory: 600Mi + requests: + cpu: 50m + memory: 100Mi + +prometheus: + enabled: true + prometheusSpec: + priorityClassName: system-node-critical + enableRemoteWriteReceiver: true + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: ${storage_class_name} + resources: + requests: + storage: 30Gi + retention: 14d + walCompression: true + ## If true, the Operator won't process any Prometheus configuration changes + ## + paused: false + ## Number of replicas of each shard to deploy for a Prometheus deployment. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## + replicas: ${prometheus_replicas} + ## EXPERIMENTAL: Number of shards to distribute targets onto. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## Note that scaling down shards will not reshard data onto remaining instances, it must be manually moved. + ## Increasing shards will not reshard data either but it will continue to be available from the same instances. + ## To query globally use Thanos sidecar and Thanos querier or remote write data to a central location. + ## Sharding is done on the content of the `__address__` target meta-label. + ## + shards: ${prometheus_shards} + resources: + limits: + cpu: 1200m + memory: 4Gi + requests: + cpu: 200m + memory: 800Mi + + # remoteWrite: + # - url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + + thanos: + objectStorageConfig: + secret: + type: S3 + config: + bucket: ${s3_bucket_name} + endpoint: ${s3_endpoint} + region: ${s3_bucket_region} + # access_key: "" + # secret_key: "" + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + additionalScrapeConfigs: [] + # - job_name: blackbox + # metrics_path: /probe + # params: + # module: [http_2xx] + # static_configs: + # # Add URLs as target parameter + # - targets: + # - https://www.google.com + # - https://stackoverflow.com + # - https://scala-lang.org + # - https://helm.sh + + # relabel_configs: + # - source_labels: [__address__] + # target_label: __param_target + # - source_labels: [__param_target] + # # Important! + # target_label: target + # # Ensure blackbox-exporter is reachable from Prometheus + # - target_label: __address__ + # replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 + maximumStartupDurationSeconds: 900 + ingress: + enabled: ${enable_prometheus_internal_ingress} + annotations: + kubernetes.io/ingress.class: "internal-nginx" + kubernetes.io/tls-acme: "false" + hosts: + - ${prometheus_hostname} + paths: + - / + +prometheus-node-exporter: + resources: + limits: + cpu: 200m + memory: 600Mi + requests: + cpu: 10m + memory: 50Mi + +additionalPrometheusRulesMap: + nodes: + groups: + - name: ethtool + rules: + - alert: conntrack_allowance_available + annotations: + description: conntrack_allowance_available is the number of tracked connections that can be established by the instance before hitting the Connections Tracked allowance of that instance type `{{$labels.instance }}` + summary: conntrack_allowance_available `{{ $labels.instance }}` + expr: node_net_ethtool{type="conntrack_allowance_available"} > 3000 + for: 30s + labels: + severity: critical + - alert: conntrack_allowance_exceeded + annotations: + description: conntrack_allowance_exceeded is the number of packets dropped because connection tracking exceeded the maximum for the instance and new connections could not be established `{{$labels.instance }}` + summary: conntrack_allowance_exceeded `{{ $labels.instance }}` + expr: node_net_ethtool{type="conntrack_allowance_exceeded"} > 0 + for: 30s + labels: + severity: critical + - alert: pps_allowance_exceeded + annotations: + description: pps_allowance_exceeded is the number of packets queued and/or dropped because the bidirectional PPS exceeded the maximum for the instance `{{$labels.instance }}` + summary: pps_allowance_exceeded `{{ $labels.instance }}` + expr: node_net_ethtool{type="pps_allowance_exceeded"} > 0 + for: 30s + labels: + severity: critical + - alert: bw_in_allowance_exceeded + annotations: + description: bw_in_allowance_exceeded is the number of packets queued and/or dropped because the inbound aggregate bandwidth exceeded the maximum for the instance `{{$labels.instance }}` + summary: bw_in_allowance_exceeded `{{ $labels.instance }}` + expr: node_net_ethtool{type="bw_in_allowance_exceeded"} > 0 + for: 30s + labels: + severity: critical + - alert: bw_out_allowance_exceeded + annotations: + description: bw_out_allowance_exceeded is the number of packets queued and/or dropped because the outbound aggregate bandwidth exceeded the maximum for the instance `{{$labels.instance }}` + summary: bw_out_allowance_exceeded `{{ $labels.instance }}` + expr: node_net_ethtool{type="bw_out_allowance_exceeded"} > 0 + for: 30s + labels: + severity: critical + - name: blackbox + rules: + - alert: BlackboxProbeFailed + expr: probe_success == 0 + for: 30s + labels: + severity: critical + annotations: + summary: Blackbox probe failed `{{ $labels.instance }}` + description: Probe failed on `{{ $labels.instance }}` + + - alert: BlackboxProbeHttpFailure + expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 + for: 30s + labels: + severity: critical + annotations: + summary: Blackbox probe HTTP failure `{{ $labels.instance }}` + description: HTTP status code is not in between 200-399 on `{{ $labels.instance }}` + + - alert: BlackboxSlowProbe + expr: avg_over_time(probe_duration_seconds[1m]) > 30 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox slow probe `{{ $labels.instance }}` + description: Blackbox probe took more than 30 seconds to complete on `{{ $labels.instance }}` + + - alert: BlackboxProbeSlowHttp + expr: avg_over_time(probe_http_duration_seconds[1m]) > 30 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox probe slow HTTP `{{ $labels.instance }}` + description: HTTP request took more than 30 seconds to complete on `{{ $labels.instance }}` + + - alert: BlackboxProbeSlowPing + expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 30 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox probe slow ping `{{ $labels.instance }}` + description: Blackbox ping took more than 30 seconds to complete on `{{ $labels.instance }}` + + - alert: BlackboxSslCertificateWillExpireSoon + expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30 + for: 1m + labels: + severity: warning + annotations: + summary: Blackbox SSL certificate will expire soon `{{ $labels.instance }}` + description: SSL certificate expires in 30 days on `{{ $labels.instance }}` + - name: mysql + rules: + - alert: MysqlDown + expr: mysql_up == 0 + for: 1s + labels: + severity: critical + annotations: + summary: MySQL down (instance {{ $labels.instance }}) + description: MySQL instance is down on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + - alert: MysqlTooManyConnections(>80%) + expr: avg by (instance) (mysql_global_status_threads_connected) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL too many connections (> 80%) (instance {{ $labels.instance }}) + description: More than 80% of MySQL connections are in use on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + - alert: MysqlSlowQueries + expr: rate(mysql_global_status_slow_queries[5m]) > 0 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL slow queries (instance {{ $labels.instance }}) + description: MySQL server mysql has some new slow query.\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + - alert: MysqlInnodbLogWaits + expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10 + for: 0m + labels: + severity: warning + annotations: + summary: MySQL InnoDB log waits (instance {{ $labels.instance }}) + description: MySQL innodb log writes stalling\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + - alert: Mysql Cache Hit Rate + expr: rate(mysql_global_status_table_open_cache_hits[5m]) / (rate(mysql_global_status_table_open_cache_hits[5m]) + rate(mysql_global_status_table_open_cache_misses[5m])) < 0.8 + for: 1m + labels: + severity: warning + annotations: + summary: MySQL Cache Hit Rate is low (instance {{ $labels.instance }}) + description: MySQL Cache Hit Rate is low (instance {{ $labels.instance }}) + - alert: MysqlHighThreadsRunning + expr: avg by (instance) (mysql_global_status_threads_running) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 60 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL high threads running (instance {{ $labels.instance }}) + description: More than 60% of MySQL connections are in running state on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + - alert: MysqlSlaveIoThreadNotRunning + expr: mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_io_running == 0 + for: 0m + labels: + severity: critical + annotations: + summary: MySQL Slave IO thread not running (instance {{ $labels.instance }}) + description: MySQL Slave IO thread not running on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + - alert: MysqlRestarted + expr: mysql_global_status_uptime < 60 + for: 0m + labels: + severity: info + annotations: + summary: MySQL restarted (instance {{ $labels.instance }}) + description: MySQL has just been restarted, less than one minute ago on {{ $labels.instance }}.\n VALUE = {{ $value }}\n LABELS= {{ $labels }} + + - name: mongodb + rules: + - alert: MongoServerDown + expr: up{job="mongodb-metrics"} == 0 + for: 1s + labels: + severity: warning + annotations: + summary: Mongo server detected down by instance {{$labels.instance}} in {{$labels.namespace}} + - alert: HighLatency + expr: rate(mongodb_mongod_op_latencies_latency_total[5m]) / rate(mongodb_mongod_op_latencies_ops_total[5m]) > 35000 + for: 10m + labels: + severity: page + annotations: + summary: High latency in instance {{$labels.instance}} + - alert: HighTicketUtilization + expr: (mongodb_mongod_wiredtiger_concurrent_transactions_out_tickets / mongodb_mongod_wiredtiger_concurrent_transactions_total_tickets) > 0.75 + for: 10m + labels: + severity: page + annotations: + summary: Ticket usage over 75% in instance {{$labels.instance}} + - alert: MongodbTooManyConnections + expr: avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80 + for: 2m + labels: + severity: warning + annotations: + summary: MongoDB too many connections (instance {{ $labels.instance }}) + description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: MongodbReplicationLag + expr: avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}) > 10 + for: 0m + labels: + severity: critical + annotations: + summary: MongoDB replication lag (instance {{ $labels.instance }}) + description: "Mongodb replication lag is more than 10s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - name: Redis + rules: + - alert: RedisDown + expr: redis_up == 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis down (instance {{ $labels.instance }}) + description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RedisDisconnectedSlaves + expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1 + for: 0m + labels: + severity: critical + annotations: + summary: Redis disconnected slaves (instance {{ $labels.instance }}) + description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RedisTooManyConnections + expr: redis_connected_clients > 1750 + for: 2m + labels: + severity: warning + annotations: + summary: Redis too many connections (instance {{ $labels.instance }}) + description: "Redis instance has too many connections\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RedisReplicationBroken + expr: delta(redis_connected_slaves[1m]) < 0 + for: 0m + labels: + severity: critical + annotations: + summary: Redis replication broken (instance {{ $labels.instance }}) + description: "Redis instance lost a slave\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RedisHighResponseTime + expr: sum(rate(redis_commands_duration_seconds_total[5m])) / sum(rate(redis_commands_processed_total[5m])) > 0.250 + for: 10m + labels: + severity: page + annotations: + summary: Response time over 250ms in instance {{$labels.instance}} + - alert: RedisHighKeysEvictionRatio + expr: (sum(rate(redis_evicted_keys_total[5m])) / sum(redis_db_keys)) > 0.1 + for: 30m + labels: + severity: page + annotations: + summary: High keys eviction ratio in instance {{$labels.instance}} + + - name: Jenkins Alerts + rules: + - alert: JenkinsInstanceUnhealthy + expr: jenkins_health_check_score < 0.5 + for: 5m + labels: + severity: warning + annotations: + summary: Jenkins server is unhealthy + description: "Check jenkins health. Current health values is {{ $value }}" + - alert: JenkinsFailedJobs + expr: rate(jenkins_runs_failure_total[1h]) > 1 + for: 5m + labels: + severity: warning + annotations: + summary: Jenkins Jobs Failing + description: "Jenkins jobs are failing\n VALUE = {{ $value }}\n" + - name: Elastic Search Alerts + rules: + - alert: ElasticSearchClusterUnhealthy + expr: elasticsearch_cluster_health_status{color="red"}==1 + for: 1m + labels: + severity: warning + annotations: + summary: Elastic Search Cluster Is Unhealthy + description: "Elastic Search Cluster is in Unhealthy State. Current health values is RED" + - alert: ElasticSearchHighHeapMemoryUsage + expr: elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} > 0.7 + for: 10m + labels: + severity: warning + annotations: + summary: ElasticSearch node {{ $labels.name }} heap usage is high + description: The heap usage in {{ $labels.name }} is over 80% for 10m. + - alert: ElasticSearchLowHeapMemoryUsage + expr: elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} < 0.15 + for: 10m + labels: + severity: warning + annotations: + summary: ElasticSearch node {{ $labels.name }} heap usage is high + description: The heap usage in {{ $labels.name }} is less than 15% for 10m. + + - name: Rabbitmq + rules: + - alert: RabbitmqNodeDown + expr: sum(rabbitmq_build_info) < 2 + for: 0m + labels: + severity: critical + annotations: + summary: Rabbitmq node down (instance {{ $labels.instance }}) + description: "Less than 3 nodes running in RabbitMQ cluster\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RabbitmqTooManyUnackMessages + expr: sum(rabbitmq_queue_messages_unacked) BY (QUEUE) > 1000 + for: 1m + labels: + severity: warning + annotations: + summary: Rabbitmq too many unack messages (instance {{ $labels.instance }}) + description: "Too many unacknowledged messages\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RabbitmqTooManyConnections + expr: rabbitmq_connections > 1000 + for: 2m + labels: + severity: warning + annotations: + summary: Rabbitmq too many connections (instance {{ $labels.instance }}) + description: "The total connections of a node is too high\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RabbitmqUnroutableMessages + expr: sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_dropped_total[5m]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) >= 1 + for: 2m + labels: + severity: warning + annotations: + summary: Rabbitmq unroutable messages (instance {{ $labels.instance }}) + description: "A queue has unroutable messages\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: RabbitmqFileDescriptorsNearLimit + expr: sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_fds[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info) / sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_tcp_sockets * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info) > 0.8 + for: 10m + annotations: + description: | + `{{ $value | humanizePercentage }}` file descriptors of file + descriptor limit are used in RabbitMQ node `{{ $labels.rabbitmq_node }}`, + pod `{{ $labels.pod }}`, RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`, + namespace `{{ $labels.namespace }}`. + summary: | + More than 80% of file descriptors are used on the RabbitMQ node. + When this value reaches 100%, new connections will not be accepted and disk write operations may fail. + Client libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available file descriptors. + See https://www.rabbitmq.com/production-checklist.html#resource-limits-file-handle-limit. + labels: + rulesgroup: rabbitmq + severity: warning + + +#Thanos Configuration +thanosRuler: + enabled: true + + ## Annotations for ThanosRuler + ## + annotations: {} + + ## Service account for ThanosRuler to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + annotations: {} + + ## Configure pod disruption budgets for ThanosRuler + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget + ## + podDisruptionBudget: + enabled: true + minAvailable: 1 + maxUnavailable: "" + + serviceMonitor: + ## If true, create a serviceMonitor for thanosRuler + ## + selfMonitor: true + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/main/Documentation/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional Endpoints + ## + additionalEndpoints: [] + # - port: oauth-metrics + # path: /metrics + + ## Settings affecting thanosRulerpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#thanosrulerspec + ## + thanosRulerSpec: + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Metadata Labels and Annotations gets propagated to the ThanosRuler pods. + ## + podMetadata: {} + ruleNamespaceSelector: {} + + ## If true, a nil or {} value for thanosRuler.thanosRulerSpec.ruleSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the PrometheusRule resources created + ## + ruleSelectorNilUsesHelmValues: true + + ## PrometheusRules to be selected for target discovery. + ## If {}, select all PrometheusRules + ## + ruleSelector: {} + ## Example which select all PrometheusRules resources + ## with label "prometheus" with values any of "example-rules" or "example-rules-2" + # ruleSelector: + # matchExpressions: + # - key: prometheus + # operator: In + # values: + # - example-rules + # - example-rules-2 + # + ## Example which select all PrometheusRules resources with label "role" set to "example-rules" + # ruleSelector: + # matchLabels: + # role: example-rules + + ## Define Log Format + # Use logfmt (default) or json logging + logFormat: logfmt + + ## Log level for ThanosRuler to be configured with. + ## + logLevel: info + + ## Size is the expected size of the thanosRuler cluster. The controller will eventually make the size of the + ## running cluster equal to the expected size. + replicas: 1 + + ## Time duration ThanosRuler shall retain data for. Default is '24h', and must match the regular expression + ## [0-9]+(ms|s|m|h) (milliseconds seconds minutes hours). + ## + retention: 24h + + ## Interval between consecutive evaluations. + ## + evaluationInterval: "" + + ## Storage is the definition of how storage will be used by the ThanosRuler instances. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/storage.md + ## + storage: {} + # volumeClaimTemplate: + # spec: + # storageClassName: gluster + # accessModes: ["ReadWriteOnce"] + # resources: + # requests: + # storage: 50Gi + # selector: {} + + ## AlertmanagerConfig define configuration for connecting to alertmanager. + ## Only available with Thanos v0.10.0 and higher. Maps to the alertmanagers.config Thanos Ruler arg. + alertmanagersConfig: + # use existing secret, if configured, alertmanagersConfig.secret will not be used + existingSecret: {} + # name: "" + # key: "" + # will render render alertmanagersConfig secret data and configure it to be used by Thanos Ruler custom resource, ignored when alertmanagersConfig.existingSecret is set + # https://thanos.io/tip/components/rule.md/#alertmanager + secret: {} + # alertmanagers: + # - api_version: v2 + # http_config: + # basic_auth: + # username: some_user + # password: some_pass + # static_configs: + # - alertmanager.thanos.io + # scheme: http + # timeout: 10s + + ## DEPRECATED. Define URLs to send alerts to Alertmanager. For Thanos v0.10.0 and higher, alertmanagersConfig should be used instead. + ## Note: this field will be ignored if alertmanagersConfig is specified. Maps to the alertmanagers.url Thanos Ruler arg. + # alertmanagersUrl: + + ## The external URL the Thanos Ruler instances will be available under. This is necessary to generate correct URLs. This is necessary if Thanos Ruler is not served from root of a DNS name. string false + ## + externalPrefix: + + ## If true, http://{{ template "kube-prometheus-stack.thanosRuler.name" . }}.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.thanosRuler.service.port }} + ## will be used as value for externalPrefix + externalPrefixNilUsesHelmValues: true + + ## The route prefix ThanosRuler registers HTTP handlers for. This is useful, if using ExternalURL and a proxy is rewriting HTTP routes of a request, and the actual ExternalURL is still true, + ## but the server serves requests under a different route prefix. For example for use with kubectl proxy. + ## + routePrefix: / + + ## ObjectStorageConfig configures object storage in Thanos + objectStorageConfig: + # use existing secret, if configured, objectStorageConfig.secret will not be used + # existingSecret: + # name: "thanos-objstore" + # key: "objstore.yml" + # will render objectStorageConfig secret data and configure it to be used by Thanos Ruler custom resource, ignored when objectStorageConfig.existingSecret is set + # https://thanos.io/tip/thanos/storage.md/#s3 + secret: + type: S3 + config: + bucket: ${s3_bucket_name} + endpoint: ${s3_endpoint} + region: ${s3_bucket_region} + # access_key: "" + # secret_key: "" + + ## Labels by name to drop before sending to alertmanager + ## Maps to the --alert.label-drop flag of thanos ruler. + alertDropLabels: [] + + ## QueryEndpoints defines Thanos querier endpoints from which to query metrics. + ## Maps to the --query flag of thanos ruler. + queryEndpoints: [http://prometheus-prometheus.monitoring.svc.cluster.local:9090/] + + ## Define configuration for connecting to thanos query instances. If this is defined, the queryEndpoints field will be ignored. + ## Maps to the query.config CLI argument. Only available with thanos v0.11.0 and higher. + queryConfig: + # use existing secret, if configured, queryConfig.secret will not be used + existingSecret: {} + # name: "" + # key: "" + # render queryConfig secret data and configure it to be used by Thanos Ruler custom resource, ignored when queryConfig.existingSecret is set + # https://thanos.io/tip/components/rule.md/#query-api + secret: {} + # - http_config: + # basic_auth: + # username: some_user + # password: some_pass + # static_configs: + # - URL + # scheme: http + # timeout: 10s + + ## Labels configure the external label pairs to ThanosRuler. A default replica + ## label `thanos_ruler_replica` will be always added as a label with the value + ## of the pod's name and it will be dropped in the alerts. + labels: {} + + ## If set to true all actions on the underlying managed objects are not going to be performed, except for delete actions. + ## + paused: false + + ## Allows setting additional arguments for the ThanosRuler container + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#thanosruler + ## + additionalArgs: [] + # - name: remote-write.config + # value: |- + # "remote_write": + # - "name": "receiver-0" + # "remote_timeout": "30s" + # "url": "http://thanos-receiver-0.thanos-receiver:8081/api/v1/receive" + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + ## Define resources requests and limits for single Pods. + ## ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + requests: + memory: 200Mi + cpu: 100m + limits: + memory: 400Mi + cpu: 200m + + ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. + ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. + ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. + ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured. + ## + podAntiAffinity: "" + + ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. + ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone + ## + podAntiAffinityTopologyKey: kubernetes.io/hostname + + ## Assign custom affinity rules to the thanosRuler instance + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" diff --git a/helm/values/prometheus/values.yaml b/helm/values/prometheus/values.yaml index 8c2a628..95ccab7 100644 --- a/helm/values/prometheus/values.yaml +++ b/helm/values/prometheus/values.yaml @@ -1,3 +1,5 @@ +nameOverride: "prometheus" +fullnameOverride: "prometheus" ## Create default rules for monitoring the cluster ## defaultRules: @@ -202,28 +204,24 @@ alertmanager: grafana: enabled: ${grafana_enabled} - image: - repository: grafana/grafana - # Overrides the Grafana image tag whose default is the chart appVersion - tag: "11.1.0" serviceAccount: annotations: ${annotations} priorityClassName: grafana-pod-critical defaultDashboardsTimezone: browser replicas: 1 autoscaling: - enabled: false + enabled: ${grafana_ha_enabled} minReplicas: 1 - maxReplicas: 10 + maxReplicas: 5 metrics: - type: Resource resource: name: cpu - targetAverageUtilization: 60 + targetAverageUtilization: 85 - type: Resource resource: name: memory - targetAverageUtilization: 60 + targetAverageUtilization: 85 sidecar: dashboards: @@ -243,11 +241,11 @@ grafana: resources: limits: - cpu: 400m + cpu: 200m memory: 400Mi requests: cpu: 50m - memory: 50Mi + memory: 100Mi datasources: {} # datasources.yaml: @@ -387,6 +385,18 @@ prometheus: ## If true, the Operator won't process any Prometheus configuration changes ## paused: false + ## Number of replicas of each shard to deploy for a Prometheus deployment. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## + replicas: ${prometheus_replicas} + ## EXPERIMENTAL: Number of shards to distribute targets onto. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## Note that scaling down shards will not reshard data onto remaining instances, it must be manually moved. + ## Increasing shards will not reshard data either but it will continue to be available from the same instances. + ## To query globally use Thanos sidecar and Thanos querier or remote write data to a central location. + ## Sharding is done on the content of the `__address__` target meta-label. + ## + shards: ${prometheus_shards} resources: limits: cpu: 1200m @@ -395,8 +405,6 @@ prometheus: cpu: 200m memory: 800Mi - remoteWrite: - - url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push affinity: nodeAffinity: diff --git a/helm/values/thanos/thanos.yaml b/helm/values/thanos/thanos.yaml new file mode 100644 index 0000000..6b281f7 --- /dev/null +++ b/helm/values/thanos/thanos.yaml @@ -0,0 +1,823 @@ +global: + defaultStorageClass: ${storage_class_name} + storageClass: ${storage_class_name} + +existingObjstoreSecret: "prometheus-prometheus" +## @param existingObjstoreSecretItems Optional item list for specifying a custom Secret key. If so, path should be objstore.yml +## +existingObjstoreSecretItems: + - key: object-storage-configs.yaml # The key in your secret + path: objstore.yml + +query: + enabled: true + replicaLabel: [replica] + ## Dynamically configure store APIs using DNS discovery + ## @param query.dnsDiscovery.enabled Enable store APIs discovery via DNS + ## @param query.dnsDiscovery.sidecarsService Sidecars service name to discover them using DNS discovery + ## @param query.dnsDiscovery.sidecarsNamespace Sidecars namespace to discover them using DNS discovery + ## + dnsDiscovery: + enabled: true + sidecarsService: "prometheus-operated" + sidecarsNamespace: "monitoring" + + replicaCount: 1 + containerPorts: + http: 10902 + grpc: 10901 + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + serviceAccount: + create: true + name: "" + annotations: {} + automountServiceAccountToken: false + ## RBAC configuration + ## + rbac: + ## @param query.rbac.create Create a ClusterRole and ClusterRoleBinding for the Thanos Query Service Account + ## + create: false + ## @param query.rbac.rules Custom RBAC rules to set + ## e.g: + ## rules: + ## - apiGroups: + ## - "" + ## resources: + ## - pods + ## verbs: + ## - get + ## - list + ## + rules: [] + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + targetPodMetrics: [] + ## Thanos Query Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb + ## @param query.pdb.create Enable/disable a Pod Disruption Budget creation for Thanos Query + ## @param query.pdb.minAvailable Minimum number/percentage of pods that should remain scheduled + ## @param query.pdb.maxUnavailable Maximum number/percentage of pods that may be made unavailable + ## + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" + +## @section Thanos Query Frontend parameters +queryFrontend: + enabled: true + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + ## + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + ## Thanos Query Frontend Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb + ## @param queryFrontend.pdb.create Enable/disable a Pod Disruption Budget creation for Thanos Query Frontend + ## @param queryFrontend.pdb.minAvailable Minimum number/percentage of pods that should remain scheduled + ## @param queryFrontend.pdb.maxUnavailable Maximum number/percentage of pods that may be made unavailable + ## + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" + +## @section Thanos Bucket Web parameters +bucketweb: + ## @param bucketweb.enabled Enable/disable Thanos Bucket Web component + ## + enabled: true + ## @param bucketweb.refresh Refresh interval to download metadata from remote storage + ## + refresh: 30m + ## @param bucketweb.timeout Timeout to download metadata from remote storage + ## + timeout: 5m + ## @param bucketweb.extraEnvVars Extra environment variables for Thanos Bucket Web container + ## e.g: + ## extraEnvVars: + ## - name: FOO + ## value: "bar" + ## + replicaCount: 1 + ## @param bucketweb.revisionHistoryLimit The number of old history to retain to allow rollback + ## + revisionHistoryLimit: 10 + ## @param bucketweb.updateStrategy.type Update strategy type for Thanos Bucket Web replicas + ## + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + ## Thanos Bucket Web Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb + ## @param bucketweb.pdb.create Enable/disable a Pod Disruption Budget creation for Thanos Bucket Web + ## @param bucketweb.pdb.minAvailable Minimum number/percentage of pods that should remain scheduled + ## @param bucketweb.pdb.maxUnavailable Maximum number/percentage of pods that may be made unavailable + ## + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" + ## Configure the ingress resource that allows you to access Thanos Bucketweb + ## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/ + ## +compactor: + + enabled: true + + dataDir: /data + ## Resolution and Retention flags + ## @param compactor.retentionResolutionRaw Resolution and Retention flag + ## @param compactor.retentionResolution5m Resolution and Retention flag + ## @param compactor.retentionResolution1h Resolution and Retention flag + ## + retentionResolutionRaw: 30d + retentionResolution5m: 90d + retentionResolution1h: 1y + ## @param compactor.concurrency Number of goroutines to use when compacting groups. + concurrency: 1 + ## @param compactor.consistencyDelay Minimum age of fresh (non-compacted) blocks before they are being processed + ## + consistencyDelay: 5m + revisionHistoryLimit: 10 + ## K8s CronJob configuration + ## ref: https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/ + ## @param compactor.cronJob.enabled Run compactor as a CronJob rather than a Deployment + ## @param compactor.cronJob.schedule The schedule in Cron format, see + ## @param compactor.cronJob.timeZone The time zone name for the given schedule, see + ## @param compactor.cronJob.concurrencyPolicy Specifies how to treat concurrent executions of a Job + ## @param compactor.cronJob.startingDeadlineSeconds Optional deadline in seconds for starting the job if it misses scheduled time for any reason + ## @param compactor.cronJob.suspend This flag tells the controller to suspend subsequent executions + ## @param compactor.cronJob.successfulJobsHistoryLimit The number of successful finished jobs to retain + ## @param compactor.cronJob.failedJobsHistoryLimit The number of failed finished jobs to retain + ## @param compactor.cronJob.backoffLimit The number of retries before marking this job failed + ## @param compactor.cronJob.ttlSecondsAfterFinished The maximum retention before removing the job + ## + cronJob: + enabled: false + schedule: "0 */6 * * *" + timeZone: "" + startingDeadlineSeconds: "" + concurrencyPolicy: Forbid + suspend: "" + successfulJobsHistoryLimit: "" + failedJobsHistoryLimit: "" + backoffLimit: "" + ttlSecondsAfterFinished: "" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + persistence: + ## @param compactor.persistence.enabled Enable data persistence using PVC(s) on Thanos Compactor pods + ## + enabled: true + ## @param compactor.persistence.ephemeral Use ephemeral volume for data persistence using PVC(s) on Thanos Compactor pods + ## + ephemeral: false + ## @param compactor.persistence.defaultEmptyDir Defaults to emptyDir if persistence is disabled. + ## + defaultEmptyDir: true + ## @param compactor.persistence.storageClass Specify the `storageClass` used to provision the volume + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. + ## + storageClass: ${storage_class_name} + ## @param compactor.persistence.accessModes PVC Access Modes for data volume + ## + accessModes: + - ReadWriteOnce + ## @param compactor.persistence.size PVC Storage Request for data volume + ## + size: 8Gi + ## @param compactor.persistence.labels Labels for the PVC + ## + labels: {} + ## @param compactor.persistence.annotations Annotations for the PVC + ## + annotations: {} + ## @param compactor.persistence.existingClaim Name of an existing PVC to use + ## If defined, PVC must be created manually before volume will be bound + ## + existingClaim: "" +## @section Thanos Store Gateway parameters +storegateway: + enabled: true + replicaCount: 1 + ## @param storegateway.revisionHistoryLimit The number of old history to retain to allow rollback + ## + revisionHistoryLimit: 10 + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + persistence: + ## @param storegateway.persistence.enabled Enable data persistence using PVC(s) on Thanos Store Gateway pods + ## + enabled: true + ## @param storegateway.persistence.storageClass Specify the `storageClass` used to provision the volume + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. + ## + storageClass: ${storage_class_name} + ## @param storegateway.persistence.accessModes PVC Access Modes for data volume + ## + accessModes: + - ReadWriteOnce + ## @param storegateway.persistence.size PVC Storage Request for data volume + ## + size: 8Gi + ## @param storegateway.persistence.labels Labels for the PVC + ## + labels: {} + ## @param storegateway.persistence.annotations Annotations for the PVC + ## + annotations: {} + ## @param storegateway.persistence.existingClaim Name of an existing PVC to use + ## If defined, PVC must be created manually before volume will be bound + ## + existingClaim: "" + ## Persistent Volume Claim Retention Policy + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + ## + persistentVolumeClaimRetentionPolicy: + ## @param storegateway.persistentVolumeClaimRetentionPolicy.enabled Enable Persistent volume retention policy for Thanos Store Gateway Statefulset + ## + enabled: false + ## @param storegateway.persistentVolumeClaimRetentionPolicy.whenScaled Volume retention behavior when the replica count of the StatefulSet is reduced + ## + whenScaled: Retain + ## @param storegateway.persistentVolumeClaimRetentionPolicy.whenDeleted Volume retention behavior that applies when the StatefulSet is deleted + ## + whenDeleted: Retain + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + ## Thanos Store Gateway Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb + ## @param storegateway.pdb.create Enable/disable a Pod Disruption Budget creation for Thanos Store Gateway + ## @param storegateway.pdb.minAvailable Minimum number/percentage of pods that should remain scheduled + ## @param storegateway.pdb.maxUnavailable Maximum number/percentage of pods that may be made unavailable + ## + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" + ## Sharded parameters + ## @param storegateway.sharded.enabled Enable sharding for Thanos Store Gateway + ## @param storegateway.sharded.hashPartitioning.shards Setting hashPartitioning will create multiple store statefulsets based on the number of shards specified using the hashmod of the blocks + ## @param storegateway.sharded.hashPartitioning.extraRelabelingConfigs Setting extra relabel config + ## e,g: + ## extraRelabelingConfigs: + ## - action: keep + ## source_labels: ["region"] + ## regex: cn-zhangjiakou + ## @param storegateway.sharded.timePartitioning [array] Setting time timePartitioning will create multiple store deployments based on the number of partitions + ## @param storegateway.sharded.service.clusterIPs Array of cluster IPs for each Store Gateway service. Length must be the same as the number of shards + ## e.g: + ## clusterIPs: + ## - X.X.X.X + ## - Y.Y.Y.Y + ## @param storegateway.sharded.service.loadBalancerIPs Array of load balancer IPs for each Store Gateway service. Length must be the same as the number of shards + ## e.g: + ## loadBalancerIPs: + ## - X.X.X.X + ## - Y.Y.Y.Y + ## @param storegateway.sharded.service.http.nodePorts Array of http node ports used for Store Gateway service. Length must be the same as the number of shards + ## e.g: + ## nodePorts: + ## - 30001 + ## - 30002 + ## @param storegateway.sharded.service.grpc.nodePorts Array of grpc node ports used for Store Gateway service. Length must be the same as the number of shards + ## e.g: + ## nodePorts: + ## - 30011 + ## - 30012 + ## + sharded: + enabled: false + hashPartitioning: + shards: "" + extraRelabelingConfigs: [] + timePartitioning: + - min: "" + max: "" + service: + clusterIPs: [] + loadBalancerIPs: [] + http: + nodePorts: [] + grpc: + nodePorts: [] +## @section Thanos Ruler parameters +ruler: + enabled: false + replicaLabel: replica + queryURL: "" + ## @param ruler.alertmanagers Alert managers URLs array + ## NOTE: This is only used when ruler.alertmanagersConfig is not set + ## + alertmanagers: [] + ## @param ruler.alertmanagersConfig Alert managers configuration + ## NOTE: This is only used when ruler.alertmanagers is not set + ## ref: https://thanos.io/tip/components/rule.md/#alertmanager + ## e.g: + ## alertmanagersConfig: + ## alertmanagers: + ## - http_config: + ## basic_auth: + ## username: some_user + ## password: some_pass + ## static_configs: + ## - alertmanager.thanos.io + ## scheme: http + ## timeout: 10s + ## api_version: v2 + ## + alertmanagersConfig: "" + ## @param ruler.evalInterval The default evaluation interval to use + ## + evalInterval: 1m + ## @param ruler.clusterName Used to set the 'ruler_cluster' label + ## + clusterName: "" + ## @param ruler.ruleFile Change the location Thanos checks for rule files + ## + ruleFile: "/conf/rules/*.yml" + ## @param ruler.config Ruler configuration + ## Specify content for ruler.yml + + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + persistence: + ## @param ruler.persistence.enabled Enable data persistence using PVC(s) on Thanos Ruler pods + ## + enabled: true + ## @param ruler.persistence.storageClass Specify the `storageClass` used to provision the volume + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. + ## + storageClass: ${storage_class_name} + ## @param ruler.persistence.accessModes PVC Access Modes for data volume + ## + accessModes: + - ReadWriteOnce + ## @param ruler.persistence.size PVC Storage Request for data volume + ## + size: 8Gi + ## @param ruler.persistence.annotations Annotations for the PVC + ## + annotations: {} + ## @param ruler.persistence.existingClaim Name of an existing PVC to use + ## If defined, PVC must be created manually before volume will be bound + ## + existingClaim: "" + ## Persistent Volume Claim Retention Policy + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + ## + persistentVolumeClaimRetentionPolicy: + ## @param ruler.persistentVolumeClaimRetentionPolicy.enabled Enable Persistent volume retention policy for Thanos Ruler Statefulset + ## + enabled: false + ## @param ruler.persistentVolumeClaimRetentionPolicy.whenScaled Volume retention behavior when the replica count of the StatefulSet is reduced + ## + whenScaled: Retain + ## @param ruler.persistentVolumeClaimRetentionPolicy.whenDeleted Volume retention behavior that applies when the StatefulSet is deleted + ## + whenDeleted: Retain + ## @param ruler.automountServiceAccountToken Enable/disable auto mounting of the service account token only for the sts + ## + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + ## Thanos Ruler Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb + ## @param ruler.pdb.create Enable/disable a Pod Disruption Budget creation for Thanos Ruler + ## @param ruler.pdb.minAvailable Minimum number/percentage of pods that should remain scheduled + ## @param ruler.pdb.maxUnavailable Maximum number/percentage of pods that may be made unavailable + ## + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" + +receive: + enabled: false + ## @param receive.mode Mode to run receiver in. Valid options are "standalone" or "dual-mode" + ## ref: https://github.com/thanos-io/thanos/blob/release-0.22/docs/proposals-accepted/202012-receive-split.md + ## Enables running the Thanos Receiver in dual mode. Setting this to "dual-mode" will create a deployment for + ## the stateless thanos distributor. + mode: standalone + tsdbRetention: 15d + replicationFactor: 1 + replicaLabel: replica + replicaCount: 1 + ## @param receive.revisionHistoryLimit The number of old history to retain to allow rollback + ## + revisionHistoryLimit: 10 + ## @param receive.updateStrategy.type Update strategy type for Thanos Receive replicas + ## + updateStrategy: + type: RollingUpdate + ## @param receive.podManagementPolicy + ## @param receive.podManagementPolicy Statefulset Pod management policy: OrderedReady (default) or Parallel + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies + ## + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + ## Thanos Receive Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb + ## @param receive.pdb.create Enable/disable a Pod Disruption Budget creation for Thanos Receive + ## @param receive.pdb.minAvailable Minimum number/percentage of pods that should remain scheduled + ## @param receive.pdb.maxUnavailable Maximum number/percentage of pods that may be made unavailable + ## + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" + ## Persistence parameters + ## + persistence: + ## @param receive.persistence.enabled Enable data persistence using PVC(s) on Thanos Receive pods + ## + enabled: true + ## @param receive.persistence.storageClass Specify the `storageClass` used to provision the volume + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. + ## + storageClass: ${storage_class_name} + ## @param receive.persistence.accessModes PVC Access Modes for data volume + ## + accessModes: + - ReadWriteOnce + ## @param receive.persistence.size PVC Storage Request for data volume + ## + size: 8Gi + ## @param receive.persistence.labels Labels for the PVC + ## + labels: {} + ## @param receive.persistence.annotations Annotations for the PVC + ## + annotations: {} + ## @param receive.persistence.existingClaim Name of an existing PVC to use + ## If defined, PVC must be created manually before volume will be bound + ## + existingClaim: "" + ## Persistent Volume Claim Retention Policy + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + ## + persistentVolumeClaimRetentionPolicy: + ## @param receive.persistentVolumeClaimRetentionPolicy.enabled Enable Persistent volume retention policy for Thanos Receive Statefulset + ## + enabled: false + ## @param receive.persistentVolumeClaimRetentionPolicy.whenScaled Volume retention behavior when the replica count of the StatefulSet is reduced + ## + whenScaled: Retain + ## @param receive.persistentVolumeClaimRetentionPolicy.whenDeleted Volume retention behavior that applies when the StatefulSet is deleted + ## + whenDeleted: Retain + ## Configure the ingress resource that allows you to access Thanos Receive + ## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/ + ## + +receiveDistributor: + + enabled: false + replicaLabel: replica + ## @param receiveDistributor.replicationFactor Thanos Receive Distributor replication-factor + ## + replicationFactor: 1 + replicaCount: 1 + revisionHistoryLimit: 10 + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "Addons-Services" + operator: In + values: + - "true" + autoscaling: + enabled: false + minReplicas: "" + maxReplicas: "" + targetCPU: "" + targetMemory: "" + pdb: + create: true + minAvailable: 1 + maxUnavailable: "" +## @section Metrics parameters + +## Prometheus metrics +## +metrics: + enabled: true + serviceMonitor: + enabled: true + namespace: "" + labels: {} + jobLabel: "" + ## @param metrics.serviceMonitor.interval How frequently to scrape metrics + ## e.g: + ## interval: 10s + ## + interval: "" + ## @param metrics.serviceMonitor.scrapeTimeout Timeout after which the scrape is ended + ## e.g: + ## scrapeTimeout: 10s + ## + scrapeTimeout: "" + ## @param metrics.serviceMonitor.metricRelabelings [array] Specify additional relabeling of metrics + ## + metricRelabelings: [] + ## @param metrics.serviceMonitor.relabelings [array] Specify general relabeling + ## + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + ## @param metrics.serviceMonitor.selector Prometheus instance selector labels + ## ref: https://github.com/bitnami/charts/tree/main/bitnami/prometheus-operator#prometheus-configuration + ## + selector: {} + ## @param metrics.serviceMonitor.extraParameters Any extra parameter to be added to the endpoint configured in the ServiceMonitor + ## (e.g. tlsConfig for further customization of the HTTPS behavior) + ## Note that the 'scheme' is automatically set to 'https' when the 'https.enabled' flag is used in this chart. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.Endpoint + ## + extraParameters: {} + ## PrometheusRule CRD configuration + ## + prometheusRule: + ## @param metrics.prometheusRule.enabled If `true`, creates a Prometheus Operator PrometheusRule (also requires `metrics.enabled` to be `true`) + ## + enabled: false + ## Configure prometheus rules + ## + default: + ## @extra metrics.prometheusRule.default.absent_rules Enable absent_rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @extra metrics.prometheusRule.default.compaction Enable compaction rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @extra metrics.prometheusRule.default.query Enable query when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @extra metrics.prometheusRule.default.receive Enable receive rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @extra metrics.prometheusRule.default.replicate Enable replicate rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @extra metrics.prometheusRule.default.ruler Enable ruler rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @extra metrics.prometheusRule.default.sidecar Enable sidecar rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @param metrics.prometheusRule.default.sidecarJobRegex Allows the customization of the thanos-sidecar job name to use in the sidecar prometheus alerts + sidecarJobRegex: ".*thanos-sidecar.*" + ## @extra metrics.prometheusRule.default.store_gateway Enable store_gateway rules when metrics.prometheusRule.default.create is false (also requires `metrics.enabled` to be `true`) + ## @param metrics.prometheusRule.default.create would create all default prometheus alerts + ## + create: false + ## @extra metrics.prometheusRule.default.disabled.ThanosCompactIsDown Disable ThanosCompactIsDown rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.absent_rules is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryIsDown Disable ThanosQueryIsDown rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.absent_rules is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveIsDown Disable ThanosReceiveIsDown rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.absent_rules is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleIsDown Disable ThanosRuleIsDown rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.absent_rules is true + ## @extra metrics.prometheusRule.default.disabled.ThanosSidecarIsDown Disable ThanosSidecarIsDown rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.absent_rules is true + ## @extra metrics.prometheusRule.default.disabled.ThanosStoreIsDown Disable ThanosStoreIsDown rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.absent_rules is true + ## @extra metrics.prometheusRule.default.disabled.ThanosCompactMultipleRunning Disable ThanosCompactMultipleRunning rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.compaction is true + ## @extra metrics.prometheusRule.default.disabled.ThanosCompactHalted Disable ThanosCompactMultipleRunning rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.compaction is true + ## @extra metrics.prometheusRule.default.disabled.ThanosCompactHighCompactionFailures Disable ThanosCompactMultipleRunning rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.compaction is true + ## @extra metrics.prometheusRule.default.disabled.ThanosCompactBucketHighOperationFailures Disable ThanosCompactMultipleRunning rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.compaction is true + ## @extra metrics.prometheusRule.default.disabled.ThanosCompactHasNotRun Disable ThanosCompactMultipleRunning rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.compaction is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryHttpRequestQueryErrorRateHigh Disable ThanosQueryHttpRequestQueryErrorRateHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryHttpRequestQueryRangeErrorRateHigh Disable ThanosQueryHttpRequestQueryRangeErrorRateHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryGrpcServerErrorRate Disable ThanosQueryGrpcServerErrorRate rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryGrpcClientErrorRate Disable ThanosQueryGrpcClientErrorRate rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryHighDNSFailures Disable ThanosQueryHighDNSFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryInstantLatencyHigh Disable ThanosQueryInstantLatencyHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryRangeLatencyHigh Disable ThanosQueryRangeLatencyHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosQueryOverload Disable ThanosQueryOverload rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.query is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveHttpRequestErrorRateHigh Disable ThanosReceiveHttpRequestErrorRateHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveHttpRequestLatencyHigh Disable ThanosReceiveHttpRequestLatencyHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveHighReplicationFailures Disable ThanosReceiveHighReplicationFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveHighForwardRequestFailures Disable ThanosReceiveHighForwardRequestFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveHighHashringFileRefreshFailures Disable ThanosReceiveHighHashringFileRefreshFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveConfigReloadFailure Disable ThanosReceiveConfigReloadFailure rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveNoUpload Disable ThanosReceiveNoUpload rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosReceiveTrafficBelowThreshold Disable ThanosReceiveTrafficBelowThreshold rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosBucketReplicateErrorRate Disable ThanosBucketReplicateErrorRate rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosBucketReplicateRunLatency Disable ThanosBucketReplicateRunLatency rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.receive is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleQueueIsDroppingAlerts Disable ThanosRuleQueueIsDroppingAlerts rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleSenderIsFailingAlerts Disable ThanosRuleSenderIsFailingAlerts rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleHighRuleEvaluationFailures Disable ThanosRuleHighRuleEvaluationFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleHighRuleEvaluationWarnings Disable ThanosRuleHighRuleEvaluationWarnings rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleRuleEvaluationLatencyHigh Disable ThanosRuleRuleEvaluationLatencyHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleGrpcErrorRate Disable ThanosRuleGrpcErrorRate rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleConfigReloadFailure Disable ThanosRuleConfigReloadFailure rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleQueryHighDNSFailures Disable ThanosRuleQueryHighDNSFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleAlertmanagerHighDNSFailures Disable ThanosRuleAlertmanagerHighDNSFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosRuleNoEvaluationFor10Intervals Disable ThanosRuleNoEvaluationFor10Intervals rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosNoRuleEvaluations Disable ThanosNoRuleEvaluations rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.ruler is true + ## @extra metrics.prometheusRule.default.disabled.ThanosSidecarBucketOperationsFailed Disable ThanosSidecarBucketOperationsFailed rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.sidecar is true + ## @extra metrics.prometheusRule.default.disabled.ThanosSidecarNoConnectionToStartedPrometheus Disable ThanosSidecarNoConnectionToStartedPrometheus rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.sidecar is true + ## @extra metrics.prometheusRule.default.disabled.ThanosStoreGrpcErrorRate Disable ThanosSidecarNoConnectionToStartedPrometheus rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.store_gateway is true + ## @extra metrics.prometheusRule.default.disabled.ThanosStoreSeriesGateLatencyHigh Disable ThanosStoreSeriesGateLatencyHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.store_gateway is true + ## @extra metrics.prometheusRule.default.disabled.ThanosStoreBucketHighOperationFailures Disable ThanosStoreBucketHighOperationFailures rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.store_gateway is true + ## @extra metrics.prometheusRule.default.disabled.ThanosStoreObjstoreOperationLatencyHigh Disable ThanosStoreObjstoreOperationLatencyHigh rule when metrics.prometheusRule.default.create or metrics.prometheusRule.default.store_gateway is true + ## @param metrics.prometheusRule.default.disabled disable one specific prometheus alert rule + ## + disabled: {} + ## @param metrics.prometheusRule.runbookUrl Prefix for runbook URLs. Use this to override the first part of the runbookURLs that is common to all rules + ## + runbookUrl: "https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-" + ## @param metrics.prometheusRule.namespace Namespace in which the PrometheusRule CRD is created + ## + namespace: "" + ## @param metrics.prometheusRule.additionalLabels Additional labels for the prometheusRule + ## + additionalLabels: {} + ## @param metrics.prometheusRule.groups Prometheus Rule Groups for Thanos components + ## These are just examples rules, please adapt them to your needs. + ## groups: + ## - name: Compactor + ## rules: + ## - alert: ThanosCompactMultipleRunning + ## annotations: + ## description: No more than one Thanos Compact instance should be running at once. There are {{`{{`}}$value{{`}}`}} instances running. + ## runbook_url: {{ .Values.metrics.prometheusRule.runbookUrl }}thanoscompactmultiplerunning + ## summary: Thanos Compact has multiple instances running. + ## expr: sum by (job) (up{job=~"{{ template "common.names.fullname" . }}-compact.*"}) > 1 + ## for: 5m + ## labels: + ## severity: warning + groups: [] +## @section Volume Permissions parameters + +## 'volumePermissions' init container parameters +## Changes the owner and group of the persistent volume mount point to runAsUser:fsGroup values +## based on the *podSecurityContext/*containerSecurityContext parameters +## +volumePermissions: + ## @param volumePermissions.enabled Enable init container that changes the owner/group of the PV mount point to `runAsUser:fsGroup` + ## + enabled: false + ## @param volumePermissions.image.registry [default: REGISTRY_NAME] Init container volume-permissions image registry + ## @param volumePermissions.image.repository [default: REPOSITORY_NAME/os-shell] Init container volume-permissions image repository + ## @skip volumePermissions.image.tag Init container volume-permissions image tag + ## @param volumePermissions.image.digest Init container volume-permissions image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag + ## @param volumePermissions.image.pullPolicy Init container volume-permissions image pull policy + ## @param volumePermissions.image.pullSecrets Specify docker-registry secret names as an array + ## + image: + registry: docker.io + repository: bitnami/os-shell + tag: 12-debian-12-r35 + digest: "" + ## ref: https://kubernetes.io/docs/concepts/containers/images/#pre-pulled-images + ## + pullPolicy: IfNotPresent + ## Optionally specify an array of imagePullSecrets. + ## Secrets must be manually created in the namespace. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## e.g: + ## pullSecrets: + ## - myRegistryKeySecretName + ## + pullSecrets: [] +## @section MinIO® chart parameters +## @extra minio For full list of MinIO® values configurations please refer [here](https://github.com/bitnami/charts/tree/main/bitnami/minio) +minio: + enabled: false + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 100m + memory: 200Mi diff --git a/main.tf b/main.tf index e8a2c51..52e6dfe 100644 --- a/main.tf +++ b/main.tf @@ -54,8 +54,22 @@ locals { version: 1 EOF + thanos_datasource_config = < Date: Thu, 20 Feb 2025 10:57:59 +0530 Subject: [PATCH 2/2] Updated thanos dashboards --- README.md | 1 + examples/complete/README.md | 2 +- examples/complete/main.tf | 2 +- grafana/dashboards/ingress_nginx.json | 34 +- grafana/dashboards/k8s_view_global.json | 7 +- grafana/dashboards/k8s_view_namespace.json | 4346 +++++++------- grafana/dashboards/k8s_view_nodes.json | 99 +- grafana/dashboards/k8s_view_pods.json | 5033 ++++++++--------- .../dashboards/karpenter_node_dashboard.json | 2203 ++++---- grafana/dashboards/nginx_api_host.json | 22 +- grafana/dashboards/nginx_ingress.json | 31 +- .../dashboards/nginx_request_handling.json | 26 +- grafana/dashboards/nodegroup.json | 14 +- grafana/dashboards/thanos-overview.json | 1982 +++++++ main.tf | 12 +- thanos.tf | 115 +- 16 files changed, 7927 insertions(+), 6002 deletions(-) create mode 100644 grafana/dashboards/thanos-overview.json diff --git a/README.md b/README.md index a2721ee..9687964 100644 --- a/README.md +++ b/README.md @@ -238,6 +238,7 @@ No requirements. | [kubernetes_config_map.postgres_dashboard](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | | [kubernetes_config_map.rabbitmq_dashboard](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | | [kubernetes_config_map.redis_dashboard](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | +| [kubernetes_config_map.thanos-overview_dashboard](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource | | [kubernetes_namespace.monitoring](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | | [kubernetes_priority_class.priority_class](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/priority_class) | resource | | [null_resource.grafana_homepage](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | diff --git a/examples/complete/README.md b/examples/complete/README.md index a427bed..2c21949 100644 --- a/examples/complete/README.md +++ b/examples/complete/README.md @@ -21,7 +21,7 @@ No requirements. | Name | Source | Version | |------|--------|---------| -| [pgl](#module\_pgl) | squareops/grafana-stack/kubernetes | 3.0.3 | +| [pgl](#module\_pgl) | squareops/grafana-stack/kubernetes | 4.0.0 | ## Resources diff --git a/examples/complete/main.tf b/examples/complete/main.tf index 06b9a73..ebbb695 100644 --- a/examples/complete/main.tf +++ b/examples/complete/main.tf @@ -13,7 +13,7 @@ locals { module "pgl" { source = "squareops/grafana-stack/kubernetes" - version = "3.0.3" + version = "4.0.0" cluster_name = "" kube_prometheus_stack_enabled = true loki_enabled = true diff --git a/grafana/dashboards/ingress_nginx.json b/grafana/dashboards/ingress_nginx.json index e74a583..b258fa8 100644 --- a/grafana/dashboards/ingress_nginx.json +++ b/grafana/dashboards/ingress_nginx.json @@ -44,7 +44,7 @@ "fiscalYearStartMonth": 0, "gnetId": 14314, "graphTooltip": 0, - "id": 4246, + "id": 27, "links": [], "liveNow": false, "panels": [ @@ -139,7 +139,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -222,7 +222,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -305,7 +305,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -388,7 +388,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -474,7 +474,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -557,7 +557,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -641,7 +641,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -725,7 +725,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -809,7 +809,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -905,6 +905,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -1095,6 +1096,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -1207,6 +1209,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -1487,6 +1490,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -1645,6 +1649,7 @@ }, "showValue": "never", "tooltip": { + "maxHeight": 600, "mode": "single", "showColorScale": false, "yHistogram": false @@ -1656,7 +1661,7 @@ "unit": "s" } }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "reverseYBuckets": false, "targets": [ { @@ -2316,8 +2321,8 @@ { "current": { "selected": true, - "text": "Prometheus", - "value": "prometheus" + "text": "default", + "value": "default" }, "hide": 0, "includeAll": false, @@ -2337,6 +2342,7 @@ "from": "now-3h", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "refresh_intervals": [ "10s", @@ -2364,6 +2370,6 @@ "timezone": "browser", "title": "Kubernetes Nginx Ingress Prometheus NextGen", "uid": "k8s-nginx-ingress-prometheus-ng", - "version": 5, + "version": 1, "weekStart": "" } diff --git a/grafana/dashboards/k8s_view_global.json b/grafana/dashboards/k8s_view_global.json index 8a5478d..a88593d 100644 --- a/grafana/dashboards/k8s_view_global.json +++ b/grafana/dashboards/k8s_view_global.json @@ -25,7 +25,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 65, + "id": 32, "links": [], "liveNow": false, "panels": [ @@ -3646,10 +3646,9 @@ }, { "current": { - "isNone": true, "selected": false, - "text": "None", - "value": "" + "text": "v1.31.5-eks-5d632ec", + "value": "v1.31.5-eks-5d632ec" }, "datasource": { "type": "prometheus", diff --git a/grafana/dashboards/k8s_view_namespace.json b/grafana/dashboards/k8s_view_namespace.json index c8f31a6..e61aaa3 100644 --- a/grafana/dashboards/k8s_view_namespace.json +++ b/grafana/dashboards/k8s_view_namespace.json @@ -1,2307 +1,2279 @@ { - "__inputs": [ + "annotations": { + "list": [ { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" } - ], - "__elements": {}, - "__requires": [ - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" + ] + }, + "description": "This is a modern 'Namespaces View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 35, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "10.3.1" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" + "id": 38, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 70 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], - "annotations": { - "list": [ + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 46, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto", + "text": {} + }, + "pluginVersion": "11.0.0", + "targets": [ { - "builtIn": 1, "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" + "type": "prometheus", + "uid": "${datasource}" }, - "type": "dashboard" + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(machine_cpu_cores{cluster=\"$cluster\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" } - ] + ], + "title": "Namespace(s) usage on total cluster CPU in %", + "type": "gauge" }, - "description": "This is a modern 'Namespaces View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 38, - "panels": [], - "title": "Overview", - "type": "row" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 70 + } + ] + }, + "unit": "percentunit" }, - "fieldConfig": { - "defaults": { - "decimals": 2, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 50 - }, - { - "color": "red", - "value": 70 - } - ] - }, - "unit": "percentunit", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 1 - }, - "id": 46, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto", - "text": {} - }, - "pluginVersion": "10.3.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(machine_cpu_cores{cluster=\"$cluster\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "range": false, - "refId": "A" - } - ], - "title": "Namespace(s) usage on total cluster CPU in %", - "type": "gauge" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "decimals": 2, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 50 - }, - { - "color": "red", - "value": 70 - } - ] - }, - "unit": "percentunit", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 1 - }, - "id": 48, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto", - "text": {} - }, - "pluginVersion": "10.3.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}) / sum(machine_memory_bytes{cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Namespace(s) usage on total cluster RAM in %", - "type": "gauge" + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 1 }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "id": 48, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto", + "text": {} + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}) / sum(machine_memory_bytes{cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Namespace(s) usage on total cluster RAM in %", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 32, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Running Pods", - "refId": "A" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_service_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Services", - "refId": "B" + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_ingress_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Ingresses", - "refId": "C" + "expr": "sum(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Running Pods", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_deployment_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Deployments", - "refId": "D" + "expr": "sum(kube_service_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Services", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_statefulset_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Statefulsets", - "refId": "E" + "expr": "sum(kube_ingress_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Ingresses", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_daemonset_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Daemonsets", - "refId": "F" + "expr": "sum(kube_deployment_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Deployments", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_persistentvolumeclaim_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Persistent Volume Claims", - "refId": "G" + "expr": "sum(kube_statefulset_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Statefulsets", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_hpa_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Horizontal Pod Autoscalers", - "refId": "H" + "expr": "sum(kube_daemonset_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Daemonsets", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_configmap_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Configmaps", - "refId": "I" + "expr": "sum(kube_persistentvolumeclaim_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Persistent Volume Claims", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_secret_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Secrets", - "refId": "J" + "expr": "sum(kube_hpa_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Horizontal Pod Autoscalers", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_networkpolicy_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Network Policies", - "refId": "K" - } - ], - "title": "Kubernetes Resource Count", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "expr": "sum(kube_configmap_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Configmaps", + "refId": "I" }, - "fieldConfig": { - "defaults": { - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "none", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 8 - }, - "id": 62, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.3.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "", - "legendFormat": "Real", - "range": true, - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"cpu\", cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Requests", - "range": true, - "refId": "B" + "expr": "sum(kube_secret_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Secrets", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"cpu\", cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Limits", - "range": true, - "refId": "C" + "expr": "sum(kube_networkpolicy_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Network Policies", + "refId": "K" + } + ], + "title": "Kubernetes Resource Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(machine_cpu_cores{cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Cluster Total", - "range": true, - "refId": "D" - } - ], - "title": "Namespace(s) CPU Usage in cores", - "type": "stat" + "unit": "none" + }, + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 62, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "fieldConfig": { - "defaults": { - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "bytes", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 8 - }, - "id": 64, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.3.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Real", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"memory\", cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Requests", - "range": true, - "refId": "B" + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"memory\", cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Limits", - "range": true, - "refId": "C" + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(machine_memory_bytes{cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Cluster Total", - "range": true, - "refId": "D" - } - ], - "title": "Namespace(s) RAM Usage in bytes", - "type": "stat" - }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"cpu\", cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Requests", + "range": true, + "refId": "B" }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 12 + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"cpu\", cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Limits", + "range": true, + "refId": "C" }, - "id": 40, - "panels": [], - "title": "Resources", - "type": "row" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(machine_cpu_cores{cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Cluster Total", + "range": true, + "refId": "D" + } + ], + "title": "Namespace(s) CPU Usage in cores", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "CPU CORES", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 13 - }, - "id": 29, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + ] + }, + "unit": "bytes" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "CPU usage by Pod", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 64, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes", - "unitScale": true - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 13 - }, - "id": 30, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Memory usage by Pod", - "type": "timeseries" + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "A" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "SECONDS", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"memory\", cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Requests", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"memory\", cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Limits", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(machine_memory_bytes{cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Cluster Total", + "range": true, + "refId": "D" + } + ], + "title": "Namespace(s) RAM Usage in bytes", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 40, + "panels": [], + "title": "Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU CORES", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU usage by Pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (pod)", + "interval": "$resolution", + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage by Pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "SECONDS", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 68, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod) > 0", + "interval": "$resolution", + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Throttled seconds by pod", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 73, + "panels": [], + "title": "Kubernetes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 68, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "short" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod) > 0", - "interval": "$resolution", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Throttled seconds by pod", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 73, - "panels": [], - "title": "Kubernetes", - "type": "row" + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 30 }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "id": 70, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_status_qos_class{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (qos_class)", + "interval": "", + "legendFormat": "{{ qos_class }} pods", + "range": true, + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "hide": false, + "legendFormat": "Total pods", + "range": true, + "refId": "B" + } + ], + "title": "Kubernetes Pods QoS classes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 30 - }, - "id": 70, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "short" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(kube_pod_status_qos_class{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (qos_class)", - "interval": "", - "legendFormat": "{{ qos_class }} pods", - "range": true, - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "hide": false, - "legendFormat": "Total pods", - "range": true, - "refId": "B" - } - ], - "title": "Kubernetes Pods QoS classes", - "type": "timeseries" + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_status_reason{cluster=\"$cluster\"}) by (reason)", + "interval": "", + "legendFormat": "{{ reason }}", + "range": true, + "refId": "A" + } + ], + "title": "Kubernetes Pods Status Reason", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "description": "No data is generally a good thing here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 30 - }, - "id": 72, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "short" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(kube_pod_status_reason{cluster=\"$cluster\"}) by (reason)", - "interval": "", - "legendFormat": "{{ reason }}", - "range": true, - "refId": "A" - } - ], - "title": "Kubernetes Pods Status Reason", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "description": "No data is generally a good thing here.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 74, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(container_oom_events_total{namespace=~\"${namespace}\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace, pod) > 0", + "interval": "", + "legendFormat": "namespace: {{ namespace }} - pod: {{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "OOM Events by namespace, pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "No data is generally a good thing here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace, pod) > 0", + "interval": "", + "legendFormat": "namespace: {{ namespace }} - pod: {{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Container Restarts by namespace, pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 39 - }, - "id": 74, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "none" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(container_oom_events_total{namespace=~\"${namespace}\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace, pod) > 0", - "interval": "", - "legendFormat": "namespace: {{ namespace }} - pod: {{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "OOM Events by namespace, pod", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_status_ready{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Ready", + "range": true, + "refId": "A" }, - "description": "No data is generally a good thing here.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_status_running{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Running", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_pod_container_status_waiting{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Waiting", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_pod_container_status_restarts_total{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Restarts Total", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_pod_container_status_terminated{namespace=~\"$namespace\", cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Terminated", + "refId": "E" + } + ], + "title": "Nb of pods by state", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 39 - }, - "id": 75, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "none" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace, pod) > 0", - "interval": "", - "legendFormat": "namespace: {{ namespace }} - pod: {{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Container Restarts by namespace, pod", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_info{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (pod)", + "interval": "", + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Nb of containers by pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + ] + }, + "unit": "short" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_status_ready{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Ready", - "range": true, - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_status_running{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Running", - "range": true, - "refId": "B" + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_deployment_status_replicas_available{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (deployment)", + "interval": "", + "legendFormat": "{{ deployment }}", + "range": true, + "refId": "A" + } + ], + "title": "Replicas available by deployment", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_pod_container_status_waiting{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Waiting", - "refId": "C" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_pod_container_status_restarts_total{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Restarts Total", - "refId": "D" + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "expr": "sum(kube_pod_container_status_terminated{namespace=~\"$namespace\", cluster=\"$cluster\"})", - "interval": "", - "legendFormat": "Terminated", - "refId": "E" - } - ], - "title": "Nb of pods by state", - "type": "timeseries" + "editorMode": "code", + "expr": "sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (deployment)", + "interval": "", + "legendFormat": "{{ deployment }}", + "range": true, + "refId": "A" + } + ], + "title": "Replicas unavailable by deployment", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 64 + }, + "id": 42, + "panels": [], + "title": "Kubernetes Storage", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "hidden", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "percentunit" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_pod_container_info{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (pod)", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Nb of containers by pod", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 65, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", + "interval": "", + "legendFormat": "{{ persistentvolumeclaim }}", + "refId": "A" + } + ], + "title": "Persistent Volumes - Capacity and usage in %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 7, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "bytes" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(kube_deployment_status_replicas_available{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (deployment)", - "interval": "", - "legendFormat": "{{ deployment }}", - "range": true, - "refId": "A" - } - ], - "title": "Replicas available by deployment", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 66, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", + "interval": "", + "legendFormat": "{{ persistentvolumeclaim }} - Used", + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", + "hide": false, + "interval": "", + "legendFormat": "{{ persistentvolumeclaim }} - Capacity", + "refId": "B" + } + ], + "title": "Persistent Volumes - Capacity and usage in bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "percentunit" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (deployment)", - "interval": "", - "legendFormat": "{{ deployment }}", - "range": true, - "refId": "A" - } - ], - "title": "Replicas unavailable by deployment", - "type": "timeseries" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 73 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "1 - sum(kubelet_volume_stats_inodes_used{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_inodes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", + "interval": "", + "legendFormat": "{{ persistentvolumeclaim }}", + "refId": "A" + } + ], + "title": "Persistent Volumes - Inodes", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "Kubernetes", + "Prometheus" + ], + "templating": { + "list": [ { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 64 - }, - "id": 42, - "panels": [], - "title": "Kubernetes Storage", - "type": "row" + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" }, { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 65 - }, - "id": 65, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", - "interval": "", - "legendFormat": "{{ persistentvolumeclaim }}", - "refId": "A" - } - ], - "title": "Persistent Volumes - Capacity and usage in %", - "type": "timeseries" + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" }, { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 65 - }, - "id": 66, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ + "definition": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "30s", + "value": "30s" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "resolution", + "options": [ { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", - "interval": "", - "legendFormat": "{{ persistentvolumeclaim }} - Used", - "refId": "A" + "selected": false, + "text": "1s", + "value": "1s" }, { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", - "hide": false, - "interval": "", - "legendFormat": "{{ persistentvolumeclaim }} - Capacity", - "refId": "B" + "selected": false, + "text": "15s", + "value": "15s" + }, + { + "selected": true, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" } ], - "title": "Persistent Volumes - Capacity and usage in bytes", - "type": "timeseries" + "query": "1s, 15s, 30s, 1m, 3m, 5m", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" }, { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 73 - }, - "id": 27, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "1 - sum(kubelet_volume_stats_inodes_used{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_inodes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", - "interval": "", - "legendFormat": "{{ persistentvolumeclaim }}", - "refId": "A" - } - ], - "title": "Persistent Volumes - Inodes", - "type": "timeseries" + "definition": "label_values(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"},created_by_name)", + "description": "Can be used to filter on a specific deployment, statefulset or deamonset (only relevant panels).", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "created_by", + "options": [], + "query": { + "query": "label_values(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"},created_by_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "Kubernetes", - "Prometheus" - ], - "templating": { - "list": [ - { - "current": {}, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "label_values(kube_node_info,cluster)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "cluster", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(kube_node_info,cluster)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "namespace", - "options": [], - "query": { - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": false, - "text": "30s", - "value": "30s" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "1s", - "value": "1s" - }, - { - "selected": false, - "text": "15s", - "value": "15s" - }, - { - "selected": true, - "text": "30s", - "value": "30s" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "3m", - "value": "3m" - }, - { - "selected": false, - "text": "5m", - "value": "5m" - } - ], - "query": "1s, 15s, 30s, 1m, 3m, 5m", - "queryValue": "", - "skipUrlSync": false, - "type": "custom" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "label_values(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"},created_by_name)", - "description": "Can be used to filter on a specific deployment, statefulset or deamonset (only relevant panels).", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "created_by", - "options": [], - "query": { - "query": "label_values(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"},created_by_name)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Kubernetes / Views / Namespaces", - "uid": "k8s_views_ns", - "version": 35, - "weekStart": "" - } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "", + "title": "Kubernetes / Views / Namespaces", + "uid": "k8s_views_ns", + "version": 1, + "weekStart": "" +} diff --git a/grafana/dashboards/k8s_view_nodes.json b/grafana/dashboards/k8s_view_nodes.json index 6d1afc7..404cca1 100644 --- a/grafana/dashboards/k8s_view_nodes.json +++ b/grafana/dashboards/k8s_view_nodes.json @@ -1,53 +1,4 @@ { - "__inputs": [ - { - "name": "datasource", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": {}, - "__requires": [ - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "11.1.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], "annotations": { "list": [ { @@ -74,7 +25,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": null, + "id": 33, "links": [], "liveNow": false, "panels": [ @@ -150,7 +101,7 @@ "sizing": "auto", "text": {} }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -223,7 +174,7 @@ "sizing": "auto", "text": {} }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -287,7 +238,7 @@ "textMode": "value_and_name", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -441,7 +392,7 @@ "showHeader": true, "sortBy": [] }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -590,7 +541,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -655,7 +606,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -719,7 +670,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -784,7 +735,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -857,7 +808,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -971,6 +922,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -1071,6 +1023,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -3808,7 +3761,11 @@ "templating": { "list": [ { - "current": {}, + "current": { + "selected": true, + "text": "default", + "value": "default" + }, "hide": 0, "includeAll": false, "multi": false, @@ -3822,7 +3779,12 @@ "type": "datasource" }, { - "current": {}, + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, "datasource": { "type": "prometheus", "uid": "${datasource}" @@ -3892,7 +3854,11 @@ "type": "custom" }, { - "current": {}, + "current": { + "selected": false, + "text": "ip-10-20-41-244.us-east-2.compute.internal", + "value": "ip-10-20-41-244.us-east-2.compute.internal" + }, "datasource": { "type": "prometheus", "uid": "${datasource}" @@ -3914,7 +3880,11 @@ "type": "query" }, { - "current": {}, + "current": { + "selected": false, + "text": "10.20.41.244:9100", + "value": "10.20.41.244:9100" + }, "datasource": { "type": "prometheus", "uid": "${datasource}" @@ -3941,10 +3911,11 @@ "from": "now-1h", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": {}, "timezone": "", "title": "Kubernetes / Views / Nodes", "uid": "k8s_views_nodes", - "version": 3, + "version": 1, "weekStart": "" } diff --git a/grafana/dashboards/k8s_view_pods.json b/grafana/dashboards/k8s_view_pods.json index 4251b71..864ef4e 100644 --- a/grafana/dashboards/k8s_view_pods.json +++ b/grafana/dashboards/k8s_view_pods.json @@ -1,2699 +1,2662 @@ { - "__inputs": [ + "annotations": { + "list": [ { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" } - ], - "__elements": [], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "8.3.4" - }, - { + ] + }, + "description": "This is a modern 'Pods View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 34, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" + "uid": "grafana" }, - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" - } - ], - "annotations": { - "list": [ + "id": 43, + "panels": [], + "targets": [ { - "builtIn": 1, "datasource": { "type": "datasource", "uid": "grafana" }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" + "refId": "A" } - ] + ], + "title": "Information", + "type": "row" }, - "description": "This is a modern 'Pods View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 43, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Information", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 2, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ created_by_kind }}: {{ created_by_name }}", - "refId": "A" - } - ], - "title": "Created by", - "type": "stat" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "links": [ + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "title": "", - "url": "/d/k8s_views_nodes/kubernetes-views-nodes?var-datasource=${datasource}&var-node=${__data.fields.node}" + "color": "rgb(255, 255, 255)", + "value": null } - ], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 12, - "y": 1 - }, - "id": 33, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ node }}", - "refId": "A" - } - ], - "title": "Running on", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + ] + }, + "unit": "none" }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 18, - "y": 1 - }, - "id": 41, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ pod_ip }}", - "refId": "A" - } - ], - "title": "Pod IP", - "type": "stat" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 5, - "x": 0, - "y": 3 - }, - "id": 52, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", priority_class!=\"\", cluster=\"$cluster\"}", - "format": "time_series", - "instant": true, - "interval": "", - "legendFormat": "{{ priority_class }}", - "range": false, - "refId": "A" - } - ], - "title": "Priority Class", - "type": "stat" + "gridPos": { + "h": 2, + "w": 12, + "x": 0, + "y": 1 }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "id": 2, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Burstable" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", + "instant": true, + "interval": "", + "legendFormat": "{{ created_by_kind }}: {{ created_by_name }}", + "refId": "A" + } + ], + "title": "Created by", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "links": [ { - "matcher": { - "id": "byName", - "options": "BestEffort" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] + "title": "", + "url": "/d/k8s_views_nodes/kubernetes-views-nodes?var-datasource=${datasource}&var-node=${__data.fields.node}" } - ] + ], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "none" }, - "gridPos": { - "h": 2, - "w": 7, - "x": 5, - "y": 3 - }, - "id": 53, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_status_qos_class{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"} > 0", - "instant": true, - "interval": "", - "legendFormat": "{{ qos_class }}", - "refId": "A" - } - ], - "title": "QOS Class", - "type": "stat" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "text", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 12, - "y": 3 - }, - "id": 56, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_container_status_last_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ reason }}", - "refId": "A" - } - ], - "title": "Last Terminated Reason", - "type": "stat" + "gridPos": { + "h": 2, + "w": 6, + "x": 12, + "y": 1 }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "id": 33, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "description": "Panel only works when a single pod is selected.", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "text", - "value": null - }, - { - "color": "red", - "value": 1 - }, - { - "color": "#EAB839", - "value": 2 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 6, - "x": 18, - "y": 3 - }, - "id": 57, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [], - "fields": "", - "values": true - }, - "showPercentChange": false, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "kube_pod_container_status_last_terminated_exitcode{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Last Terminated Exit Code", - "type": "stat" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", + "instant": true, + "interval": "", + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "title": "Running on", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "none" }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 41, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "id": 47, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Resources", - "type": "row" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", + "instant": true, + "interval": "", + "legendFormat": "{{ pod_ip }}", + "refId": "A" + } + ], + "title": "Pod IP", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "none" }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "decimals": 2, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 60 - }, - { - "color": "red", - "value": 75 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 3, - "x": 0, - "y": 6 - }, - "id": 39, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"})", - "instant": true, - "interval": "$resolution", - "legendFormat": "Requests", - "refId": "A" - } - ], - "title": "Total pod CPU Requests usage", - "type": "gauge" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 2, + "w": 5, + "x": 0, + "y": 3 + }, + "id": 52, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "fieldConfig": { - "defaults": { - "decimals": 2, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 60 - }, - { - "color": "red", - "value": 75 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 3, - "x": 3, - "y": 6 - }, - "id": 48, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"})", - "instant": true, - "interval": "$resolution", - "legendFormat": "Limits", - "refId": "A" - } - ], - "title": "Total pod CPU Limits usage", - "type": "gauge" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", priority_class!=\"\", cluster=\"$cluster\"}", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "{{ priority_class }}", + "range": false, + "refId": "A" + } + ], + "title": "Priority Class", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "decimals": 2, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "blue", - "value": null - }, - { - "color": "#EAB839", - "value": 80 - }, - { - "color": "red", - "value": 99 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 3, - "x": 6, - "y": 6 - }, - "id": 40, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "10.4.1", - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "matcher": { + "id": "byName", + "options": "Burstable" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"})", - "instant": true, - "interval": "$resolution", - "legendFormat": "Requests", - "refId": "A" - } - ], - "title": "Total pod RAM Requests usage", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "decimals": 2, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 60 - }, - { - "color": "red", - "value": 75 + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 3, - "x": 9, - "y": 6 - }, - "id": 49, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "10.4.1", - "targets": [ + } + ] + }, { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "matcher": { + "id": "byName", + "options": "BestEffort" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) ", - "instant": true, - "interval": "$resolution", - "legendFormat": "Limits", - "refId": "B" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] } - ], - "title": "Total pod RAM Limits usage", - "type": "gauge" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false, - "minWidth": 100 - }, - "decimals": 4, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "none" + "gridPos": { + "h": 2, + "w": 7, + "x": 5, + "y": 3 + }, + "id": 53, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Memory Requests" + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_status_qos_class{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"} > 0", + "instant": true, + "interval": "", + "legendFormat": "{{ qos_class }}", + "refId": "A" + } + ], + "title": "QOS Class", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null }, - "properties": [ - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Memory Limits" + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 12, + "y": 3 + }, + "id": 56, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_container_status_last_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", + "instant": true, + "interval": "", + "legendFormat": "{{ reason }}", + "refId": "A" + } + ], + "title": "Last Terminated Reason", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Panel only works when a single pod is selected.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null }, - "properties": [ - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Memory Used" + { + "color": "red", + "value": 1 }, - "properties": [ - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - } - ] - } - ] + { + "color": "#EAB839", + "value": 2 + } + ] + }, + "unit": "none" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 38, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "10.4.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 18, + "y": 3 + }, + "id": 57, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": true + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "B" + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_container_status_last_terminated_exitcode{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Last Terminated Exit Code", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 47, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" + "refId": "A" + } + ], + "title": "Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 60 + }, + { + "color": "red", + "value": 75 + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 6 + }, + "id": 39, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", - "format": "table", - "hide": false, - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "E" + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"})", + "instant": true, + "interval": "$resolution", + "legendFormat": "Requests", + "refId": "A" + } + ], + "title": "Total pod CPU Requests usage", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 60 + }, + { + "color": "red", + "value": 75 + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}) by (container)", - "format": "table", - "hide": false, - "instant": true, - "range": false, - "refId": "F" - } - ], - "title": "Resources by container", - "transformations": [ - { - "id": "seriesToColumns", - "options": { - "byField": "container" - } + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 3, + "y": 6 + }, + "id": 48, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Time 1": true, - "Time 2": true, - "Time 4": true, - "__name__": true, - "__name__ 1": true, - "__name__ 2": true, - "__name__ 3": true, - "__name__ 4": true, - "container": false, - "endpoint": true, - "endpoint 2": true, - "endpoint 3": true, - "endpoint 4": true, - "instance": true, - "instance 2": true, - "instance 3": true, - "instance 4": true, - "job": true, - "job 2": true, - "job 3": true, - "job 4": true, - "namespace": true, - "namespace 2": true, - "namespace 3": true, - "namespace 4": true, - "node": true, - "node 2": true, - "node 3": true, - "node 4": true, - "pod": true, - "pod 2": true, - "pod 3": true, - "pod 4": true, - "resource 1": true, - "resource 2": true, - "resource 3": true, - "resource 4": true, - "service": true, - "service 2": true, - "service 3": true, - "service 4": true, - "uid 1": true, - "uid 2": true, - "uid 3": true, - "uid 4": true, - "unit 1": true, - "unit 2": true, - "unit 3": true, - "unit 4": true + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"})", + "instant": true, + "interval": "$resolution", + "legendFormat": "Limits", + "refId": "A" + } + ], + "title": "Total pod CPU Limits usage", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null }, - "indexByName": { - "Time 1": 7, - "Time 2": 8, - "Time 3": 9, - "Time 4": 10, - "Time 5": 11, - "Time 6": 12, - "Value #A": 2, - "Value #B": 3, - "Value #C": 5, - "Value #D": 6, - "Value #E": 1, - "Value #F": 4, - "container": 0 + { + "color": "#EAB839", + "value": 80 }, - "renameByName": { - "Value #A": "CPU Requests", - "Value #B": "CPU Limits", - "Value #C": "Memory Requests", - "Value #D": "Memory Limits", - "Value #E": "CPU Used", - "Value #F": "Memory Used", - "container": "Container" + { + "color": "red", + "value": 99 } - } - } - ], - "type": "table" + ] + }, + "unit": "percentunit" + }, + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 3, + "x": 6, + "y": 6 + }, + "id": 40, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Percent", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"})", + "instant": true, + "interval": "$resolution", + "legendFormat": "Requests", + "refId": "A" + } + ], + "title": "Total pod RAM Requests usage", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "#EAB839", + "value": 60 }, - "thresholdsStyle": { - "mode": "area" + { + "color": "red", + "value": 75 } - }, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 20 - }, - { - "color": "green", - "value": 30 - }, - { - "color": "yellow", - "value": 70 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 - }, - "id": 50, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + ] + }, + "unit": "percentunit" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "interval": "$resolution", - "legendFormat": "{{ container }} REQUESTS", - "range": true, - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 9, + "y": 6 + }, + "id": 49, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "hide": false, - "legendFormat": "{{ container }} LIMITS", - "range": true, - "refId": "B" - } - ], - "title": "CPU Usage / Requests & Limits by container", - "type": "timeseries" + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) ", + "instant": true, + "interval": "$resolution", + "legendFormat": "Limits", + "refId": "B" + } + ], + "title": "Total pod RAM Limits usage", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false, + "minWidth": 100 + }, + "decimals": 4, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "none" }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "thresholds" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Memory Requests" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Percent", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "properties": [ + { + "id": "unit", + "value": "bytes" }, - "thresholdsStyle": { - "mode": "area" - } - }, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 20 - }, - { - "color": "green", - "value": 30 - }, - { - "color": "#EAB839", - "value": 70 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 - }, - "id": 30, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ + { + "id": "decimals", + "value": 2 + } + ] + }, { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "matcher": { + "id": "byName", + "options": "Memory Limits" }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "interval": "", - "legendFormat": "{{ container }} REQUESTS", - "range": true, - "refId": "A" + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "matcher": { + "id": "byName", + "options": "Memory Used" }, - "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", - "hide": false, - "legendFormat": "{{ container }} LIMITS", - "range": true, - "refId": "B" + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + } + ] } - ], - "title": "Memory Usage / Requests & Limits by container", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 38, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "CPU Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 4, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 22 - }, - "id": 29, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "range": false, + "refId": "F" + } + ], + "title": "Resources by container", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "container" } }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", - "interval": "$resolution", - "legendFormat": "{{ container }}", - "range": true, - "refId": "A" + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 4": true, + "__name__": true, + "__name__ 1": true, + "__name__ 2": true, + "__name__ 3": true, + "__name__ 4": true, + "container": false, + "endpoint": true, + "endpoint 2": true, + "endpoint 3": true, + "endpoint 4": true, + "instance": true, + "instance 2": true, + "instance 3": true, + "instance 4": true, + "job": true, + "job 2": true, + "job 3": true, + "job 4": true, + "namespace": true, + "namespace 2": true, + "namespace 3": true, + "namespace 4": true, + "node": true, + "node 2": true, + "node 3": true, + "node 4": true, + "pod": true, + "pod 2": true, + "pod 3": true, + "pod 4": true, + "resource 1": true, + "resource 2": true, + "resource 3": true, + "resource 4": true, + "service": true, + "service 2": true, + "service 3": true, + "service 4": true, + "uid 1": true, + "uid 2": true, + "uid 3": true, + "uid 4": true, + "unit 1": true, + "unit 2": true, + "unit 3": true, + "unit 4": true + }, + "indexByName": { + "Time 1": 7, + "Time 2": 8, + "Time 3": 9, + "Time 4": 10, + "Time 5": 11, + "Time 6": 12, + "Value #A": 2, + "Value #B": 3, + "Value #C": 5, + "Value #D": 6, + "Value #E": 1, + "Value #F": 4, + "container": 0 + }, + "renameByName": { + "Value #A": "CPU Requests", + "Value #B": "CPU Limits", + "Value #C": "Memory Requests", + "Value #D": "Memory Limits", + "Value #E": "CPU Used", + "Value #F": "Memory Used", + "container": "Container" + } } - ], - "title": "CPU Usage by container", - "type": "timeseries" + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "red", + "value": null }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" + { + "color": "yellow", + "value": 20 }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + { + "color": "green", + "value": 30 }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "yellow", + "value": 70 }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 22 - }, - "id": 51, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + ] + }, + "unit": "percentunit" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}) by (container)", - "interval": "", - "legendFormat": "{{ container }}", - "range": true, - "refId": "A" - } - ], - "title": "Memory Usage by container", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 50, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "interval": "$resolution", + "legendFormat": "{{ container }} REQUESTS", + "range": true, + "refId": "A" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "SECONDS", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "hide": false, + "legendFormat": "{{ container }} LIMITS", + "range": true, + "refId": "B" + } + ], + "title": "CPU Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "red", + "value": null }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" + { + "color": "yellow", + "value": 20 }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + { + "color": "green", + "value": 30 }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "#EAB839", + "value": 70 }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 30 - }, - "id": 59, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + ] + }, + "unit": "percentunit" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", - "interval": "$resolution", - "legendFormat": "{{ container }}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Throttled seconds by container", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 38 - }, - "id": 62, - "panels": [], - "title": "Kubernetes", - "type": "row" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "interval": "", + "legendFormat": "{{ container }} REQUESTS", + "range": true, + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "thresholds" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Percent", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", + "hide": false, + "legendFormat": "{{ container }} LIMITS", + "range": true, + "refId": "B" + } + ], + "title": "Memory Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU Cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 4, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "area" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 20 - }, - { - "color": "green", - "value": 30 - }, - { - "color": "#EAB839", - "value": 70 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 39 - }, - "id": 60, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + ] + }, + "unit": "none" }, - "pluginVersion": "8.3.3", - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "matcher": { + "id": "byName", + "options": "limit" }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(container_oom_events_total{namespace=\"${namespace}\", pod=\"${pod}\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", - "interval": "", - "legendFormat": "{{ container }}", - "range": true, - "refId": "A" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] } - ], - "title": "OOM Events by container", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "thresholds" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Percent", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "interval": "$resolution", + "legendFormat": "{{ container }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "area" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 20 - }, - { - "color": "green", - "value": 30 - }, - { - "color": "#EAB839", - "value": 70 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 39 - }, - "id": 61, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + ] + }, + "unit": "bytes" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", pod=\"${pod}\", container!=\"\", job=~\"$job\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", - "interval": "", - "legendFormat": "{{ container }}", - "range": true, - "refId": "A" - } - ], - "title": "Container Restarts by container", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 47 - }, - "id": 45, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Network", - "type": "row" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "id": 51, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}) by (container)", + "interval": "", + "legendFormat": "{{ container }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "SECONDS", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 59, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "interval": "$resolution", + "legendFormat": "{{ container }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Throttled seconds by container", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 62, + "panels": [], + "title": "Kubernetes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "red" + }, + { + "color": "yellow", + "value": 20 }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + { + "color": "green", + "value": 30 }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "#EAB839", + "value": 70 }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 31, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "none" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Received", - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 60, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Transmitted", - "refId": "B" - } - ], - "title": "Network - Bandwidth", - "type": "timeseries" + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(container_oom_events_total{namespace=\"${namespace}\", pod=\"${pod}\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "{{ container }}", + "range": true, + "refId": "A" + } + ], + "title": "OOM Events by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "red" }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + { + "color": "yellow", + "value": 20 + }, + { + "color": "green", + "value": 30 }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "#EAB839", + "value": 70 }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ] + }, + "unit": "none" }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(rate(container_network_receive_packets_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Received", - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 61, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_packets_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Transmitted", - "refId": "B" - } - ], - "title": "Network - Packets Rate", - "type": "timeseries" + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", pod=\"${pod}\", container!=\"\", job=~\"$job\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "{{ container }}", + "range": true, + "refId": "A" + } + ], + "title": "Container Restarts by container", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 45, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Network", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 36, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Received", - "refId": "A" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Transmitted", - "refId": "B" - } - ], - "title": "Network - Packets Dropped", - "type": "timeseries" + "unit": "bytes" + }, + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 31, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Received", + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Transmitted", + "refId": "B" + } + ], + "title": "Network - Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 37, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "sum(rate(container_network_receive_errors_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Received", - "refId": "A" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_errors_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", - "interval": "$resolution", - "legendFormat": "Transmitted", - "refId": "B" - } - ], - "title": "Network - Errors", - "type": "timeseries" - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "Kubernetes", - "Prometheus" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "", - "value": "" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" + "unit": "pps" }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_node_info,cluster)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "cluster", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(kube_node_info,cluster)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" + "exemplar": true, + "expr": "sum(rate(container_network_receive_packets_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Received", + "refId": "A" }, { - "current": { - "selected": false, - "text": "monitoring", - "value": "monitoring" - }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "namespace", - "options": [], - "query": { - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refId": "Prometheus-namespace-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_packets_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Transmitted", + "refId": "B" + } + ], + "title": "Network - Packets Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ { - "current": { - "selected": false, - "text": "", - "value": "" - }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"}, pod)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "pod", - "options": [], - "query": { - "query": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"}, pod)", - "refId": "Prometheus-pod-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "exemplar": true, + "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Received", + "refId": "A" }, { - "current": { - "selected": false, - "text": "30s", - "value": "30s" + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "1s", - "value": "1s" - }, - { - "selected": false, - "text": "15s", - "value": "15s" - }, - { - "selected": true, - "text": "30s", - "value": "30s" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "3m", - "value": "3m" - }, - { - "selected": false, - "text": "5m", - "value": "5m" + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Transmitted", + "refId": "B" + } + ], + "title": "Network - Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ], - "query": "1s, 15s, 30s, 1m, 3m, 5m", - "queryValue": "", - "skipUrlSync": false, - "type": "custom" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ { - "current": { - "selected": false, - "text": "kube-state-metrics", - "value": "kube-state-metrics" + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, + "exemplar": true, + "expr": "sum(rate(container_network_receive_errors_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Received", + "refId": "A" + }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"},job)", - "hide": 0, - "includeAll": false, - "multi": true, - "name": "job", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"},job)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_errors_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Transmitted", + "refId": "B" } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Kubernetes / Views / Pods", - "uid": "k8s_views_pods", - "version": 28, - "weekStart": "" - } + ], + "title": "Network - Errors", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "Kubernetes", + "Prometheus" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "monitoring", + "value": "monitoring" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"}, pod)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "pod", + "options": [], + "query": { + "query": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"}, pod)", + "refId": "Prometheus-pod-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "30s", + "value": "30s" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "1s", + "value": "1s" + }, + { + "selected": false, + "text": "15s", + "value": "15s" + }, + { + "selected": true, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + } + ], + "query": "1s, 15s, 30s, 1m, 3m, 5m", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": true, + "text": [ + "kube-state-metrics" + ], + "value": [ + "kube-state-metrics" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"},job)", + "hide": 0, + "includeAll": false, + "multi": true, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "", + "title": "Kubernetes / Views / Pods", + "uid": "k8s_views_pods", + "version": 1, + "weekStart": "" +} diff --git a/grafana/dashboards/karpenter_node_dashboard.json b/grafana/dashboards/karpenter_node_dashboard.json index 6880d6b..432a223 100644 --- a/grafana/dashboards/karpenter_node_dashboard.json +++ b/grafana/dashboards/karpenter_node_dashboard.json @@ -1,1189 +1,1200 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "Grafana dashboard to monitor nodes grouped by their corresponding Karpenter managed node group.", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 13548, - "graphTooltip": 0, - "id": 311, - "links": [], - "liveNow": false, - "panels": [ + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "uid": "$datasource" + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "type": "dashboard" + } + ] + }, + "description": "Grafana dashboard to monitor nodes grouped by their corresponding Karpenter managed node group.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 13548, + "graphTooltip": 0, + "id": 32, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "short" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "short" }, - "pluginVersion": "9.0.4", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kube_node_info{node=~\"$node\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Total Nodes", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 0 + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 20, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kube_pod_info{node=~\"$node\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "expr": "sum(kube_node_info{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Nodes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] } - ], - "title": "Pod Count - Total", - "type": "stat" + }, + "overrides": [] }, - { - "datasource": { - "uid": "$datasource" + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "description": "", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "overrides": [] + "expr": "sum(kube_pod_info{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pod Count - Total", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } }, - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 0 + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "id": 14, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(kubelet_running_pods{node=~\"$node\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "expr": "sum(kubelet_running_pods{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pod Count - Running", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] } - ], - "title": "Pod Count - Running", - "type": "stat" + }, + "overrides": [] }, - { - "datasource": { - "uid": "$datasource" + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - } - ] - } + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 0 - }, - "id": 18, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false + "expr": "sum(kube_pod_info{node=~\"$node\"}) - sum(kubelet_running_pods{node=~\"$node\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pod Count - Not Running", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "expr": "sum(kube_pod_info{node=~\"$node\"}) - sum(kubelet_running_pods{node=~\"$node\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pod Count - Not Running", - "type": "stat" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" + ] }, - "overrides": [] + "unit": "percent" }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 6 + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.0.4", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "(1 - (sum(rate(node_cpu_seconds_total{instance=~\"$instance:9100\", mode=\"idle\"}[5m])) by (instance) / sum(rate(node_cpu_seconds_total{instance=~\"$instance:9100\"}[5m])) by (instance))) * 100", - "interval": "", - "legendFormat": "{{instance}}-cpu-utilization", - "refId": "A" - } - ], - "title": "CPU Utilization", - "type": "timeseries" + "expr": "(1 - (sum(rate(node_cpu_seconds_total{instance=~\"$instance:9100\", mode=\"idle\"}[5m])) by (instance) / sum(rate(node_cpu_seconds_total{instance=~\"$instance:9100\"}[5m])) by (instance))) * 100", + "interval": "", + "legendFormat": "{{instance}}-cpu-utilization", + "refId": "A" + } + ], + "title": "CPU Utilization", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "thresholdsStyle": { + "mode": "off" + } }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.0.4", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "(node_memory_MemTotal_bytes{instance=~\"$instance:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$instance:9100\"}) / node_memory_MemTotal_bytes{instance=~\"$instance:9100\"} * 100", - "interval": "", - "legendFormat": "{{instance}}-memory-utilization", - "refId": "A" - } - ], - "title": "Memory Utilization", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" + ] }, - "overrides": [] + "unit": "percent" }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 15 + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 6, - "interval": "", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.0.4", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "(sum (node_filesystem_size_bytes{instance=~\"$instance:9100\"}) by (instance) - sum (node_filesystem_free_bytes{instance=~\"$instance:9100\"}) by (instance)) / sum (node_filesystem_size_bytes{instance=~\"$instance:9100\"}) by (instance) * 100", - "interval": "", - "legendFormat": "{{kubernetes_node}}-disk-utilization", - "refId": "B" - } - ], - "title": "Disk Utilization", - "type": "timeseries" + "expr": "(node_memory_MemTotal_bytes{instance=~\"$instance:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$instance:9100\"}) / node_memory_MemTotal_bytes{instance=~\"$instance:9100\"} * 100", + "interval": "", + "legendFormat": "{{instance}}-memory-utilization", + "refId": "A" + } + ], + "title": "Memory Utilization", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "mbytes" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 8, - "y": 15 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "percent" }, - "pluginVersion": "9.0.4", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$instance:9100\"}[5m]) / (1024 * 1024)) by (instance)", - "interval": "", - "legendFormat": "{{instance}}-read-bytes", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$instance:9100\"}[5m]) / (1024 * 1024)) by (instance)", - "interval": "", - "legendFormat": "{{instance}}-write-bytes", - "refId": "B" - } - ], - "title": "Disk I/O", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "uid": "$datasource" + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 15 + }, + "id": 6, + "interval": "", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(sum (node_filesystem_size_bytes{instance=~\"$instance:9100\"}) by (instance) - sum (node_filesystem_free_bytes{instance=~\"$instance:9100\"}) by (instance)) / sum (node_filesystem_size_bytes{instance=~\"$instance:9100\"}) by (instance) * 100", + "interval": "", + "legendFormat": "{{kubernetes_node}}-disk-utilization", + "refId": "B" + } + ], + "title": "Disk Utilization", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "mbytes" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 16, - "y": 15 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "mbytes" }, - "pluginVersion": "9.0.4", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"$instance:9100\",device=~\"eth.*\"}[2m]) / (1024 * 1024)) by (instance)", - "instant": false, - "interval": "", - "legendFormat": "{{instance}}-network-in", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"$instance:9100\",device=~\"eth.*\"}[2m]) / (1024 * 1024)) by (instance)", - "interval": "", - "legendFormat": "{{instance}}-network-out", - "refId": "B" - } - ], - "title": "Network I/O", - "type": "timeseries" + "overrides": [] }, - { - "aliasColors": {}, - "autoMigrateFrom": "graph", - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "$datasource" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 23, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 15 + }, + "id": 10, + "options": { "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "percentage": false, - "pluginVersion": "9.0.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "node_load1{instance=~\"$instance:9100\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Load Avg (1m)", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false + "maxHeight": 600, + "mode": "multi", + "sort": "none" } }, - { - "aliasColors": {}, - "autoMigrateFrom": "graph", - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "$datasource" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 24 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.0.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "node_load5{instance=~\"$instance:9100\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Load Avg (5m)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "mode": "time", - "show": true, - "values": [] + "pluginVersion": "9.0.4", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$instance:9100\"}[5m]) / (1024 * 1024)) by (instance)", + "interval": "", + "legendFormat": "{{instance}}-read-bytes", + "refId": "A" }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true + { + "datasource": { + "uid": "$datasource" }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false + "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$instance:9100\"}[5m]) / (1024 * 1024)) by (instance)", + "interval": "", + "legendFormat": "{{instance}}-write-bytes", + "refId": "B" } + ], + "title": "Disk I/O", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" }, - { - "aliasColors": {}, - "autoMigrateFrom": "graph", - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "$datasource" - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 24 + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "mbytes" }, - "hiddenSeries": false, - "id": 24, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 15 + }, + "id": 12, + "options": { "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "percentage": false, - "pluginVersion": "9.0.4", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "node_load15{instance=~\"$instance:9100\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Load Avg (15m)", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false + "maxHeight": 600, + "mode": "multi", + "sort": "none" } - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ + }, + "pluginVersion": "9.0.4", + "targets": [ { - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, "datasource": { "uid": "$datasource" }, - "definition": "label_values(kube_node_labels,label_karpenter_sh_nodepool)", - "hide": 0, - "includeAll": true, - "label": "", - "multi": true, - "name": "nodegroup", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(kube_node_labels,label_karpenter_sh_nodepool)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"$instance:9100\",device=~\"eth.*\"}[2m]) / (1024 * 1024)) by (instance)", + "instant": false, + "interval": "", + "legendFormat": "{{instance}}-network-in", + "refId": "A" }, { - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, "datasource": { - "type": "prometheus", "uid": "$datasource" }, - "definition": "label_values(kube_node_labels{label_karpenter_sh_nodepool=~\"$nodegroup\"},node)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "node", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(kube_node_labels{label_karpenter_sh_nodepool=~\"$nodegroup\"},node)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" + "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"$instance:9100\",device=~\"eth.*\"}[2m]) / (1024 * 1024)) by (instance)", + "interval": "", + "legendFormat": "{{instance}}-network-out", + "refId": "B" + } + ], + "title": "Network I/O", + "type": "timeseries" + }, + { + "aliasColors": {}, + "autoMigrateFrom": "graph", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "expr": "node_load1{instance=~\"$instance:9100\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Load Avg (1m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true }, { - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "autoMigrateFrom": "graph", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 24 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { "datasource": { - "type": "prometheus", "uid": "$datasource" }, - "definition": "label_values(kube_pod_info{node =~ \"$node\"}, host_ip)", - "hide": 2, - "includeAll": true, - "multi": true, - "name": "instance", - "options": [], - "query": { - "query": "label_values(kube_pod_info{node =~ \"$node\"}, host_ip)", - "refId": "Prometheus-instance-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "expr": "node_load5{instance=~\"$instance:9100\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Load Avg (5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true }, { - "current": { - "selected": false, - "text": "Prometheus", - "value": "prometheus" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" + "format": "short", + "logBase": 1, + "show": true } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] + ], + "yaxis": { + "align": false + } }, - "timezone": "", - "title": "Karpenter Node (Groups)", - "uid": "K8s-CR-Nodesgroups", - "version": 2, - "weekStart": "" - } + { + "aliasColors": {}, + "autoMigrateFrom": "graph", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 24 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "node_load15{instance=~\"$instance:9100\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Load Avg (15m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "kubernetes", + "prometheus", + "karpenter" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(kube_node_labels,label_karpenter_sh_nodepool)", + "hide": 0, + "includeAll": true, + "label": "", + "multi": true, + "name": "nodegroup", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_labels,label_karpenter_sh_nodepool)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(kube_node_labels{label_karpenter_sh_nodepool=~\"$nodegroup\"},node)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "node", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_labels{label_karpenter_sh_nodepool=~\"$nodegroup\"},node)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(kube_pod_info{node =~ \"$node\"}, host_ip)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(kube_pod_info{node =~ \"$node\"}, host_ip)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Karpenter Node (Groups)", + "uid": "K8s-karpenter-Nodes", + "version": 1, + "weekStart": "" +} diff --git a/grafana/dashboards/nginx_api_host.json b/grafana/dashboards/nginx_api_host.json index 144f275..174219a 100644 --- a/grafana/dashboards/nginx_api_host.json +++ b/grafana/dashboards/nginx_api_host.json @@ -20,7 +20,7 @@ "fiscalYearStartMonth": 0, "gnetId": 10443, "graphTooltip": 0, - "id": 3562, + "id": 28, "links": [], "panels": [ { @@ -169,6 +169,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -338,6 +339,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -475,6 +477,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -614,6 +617,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -725,6 +729,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -836,6 +841,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -965,6 +971,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -1177,7 +1184,7 @@ }, "showHeader": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1362,7 +1369,7 @@ }, "showHeader": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1547,7 +1554,7 @@ }, "showHeader": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1644,8 +1651,8 @@ { "current": { "selected": true, - "text": "Prometheus-nonprod-atmosly", - "value": "PB8A82C5223A7CC33" + "text": "default", + "value": "default" }, "hide": 0, "includeAll": false, @@ -1666,6 +1673,7 @@ "from": "now-5m", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "refresh_intervals": [ "5s", @@ -1694,6 +1702,6 @@ "timezone": "browser", "title": "NGINX / API / Host", "uid": "NginxApiHost", - "version": 6, + "version": 1, "weekStart": "" } diff --git a/grafana/dashboards/nginx_ingress.json b/grafana/dashboards/nginx_ingress.json index d089b95..b8f89a8 100644 --- a/grafana/dashboards/nginx_ingress.json +++ b/grafana/dashboards/nginx_ingress.json @@ -36,7 +36,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 3559, + "id": 30, "links": [], "panels": [ { @@ -97,7 +97,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -178,7 +178,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -264,7 +264,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -346,7 +346,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -429,7 +429,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -545,6 +545,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -715,6 +716,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "asc" } @@ -819,6 +821,7 @@ "width": 200 }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "none" } @@ -984,6 +987,7 @@ "width": 200 }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -1083,6 +1087,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -1327,7 +1332,7 @@ }, "showHeader": true }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "repeatDirection": "h", "targets": [ { @@ -1454,8 +1459,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1804,9 +1808,9 @@ "list": [ { "current": { - "selected": false, - "text": "Prometheus-prod-atmosly", - "value": "cdverpho22sqoa" + "selected": true, + "text": "default", + "value": "default" }, "hide": 0, "includeAll": false, @@ -1967,6 +1971,7 @@ "from": "now-1h", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "refresh_intervals": [ "5s", @@ -1995,6 +2000,6 @@ "timezone": "browser", "title": "NGINX Ingress controller", "uid": "nginxingresscontroller", - "version": 4, + "version": 1, "weekStart": "" } diff --git a/grafana/dashboards/nginx_request_handling.json b/grafana/dashboards/nginx_request_handling.json index 072b2ec..aef28a2 100644 --- a/grafana/dashboards/nginx_request_handling.json +++ b/grafana/dashboards/nginx_request_handling.json @@ -26,7 +26,7 @@ "fiscalYearStartMonth": 0, "gnetId": 9614, "graphTooltip": 1, - "id": 3560, + "id": 29, "links": [], "liveNow": false, "panels": [ @@ -110,6 +110,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -230,6 +231,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -353,6 +355,7 @@ "sortDesc": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -455,6 +458,7 @@ "sortDesc": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -556,6 +560,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -661,6 +666,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "multi", "sort": "desc" } @@ -733,8 +739,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -837,8 +842,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -951,8 +955,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1016,8 +1019,8 @@ { "current": { "selected": true, - "text": "Prometheus-prod-atmosly", - "value": "cdverpho22sqoa" + "text": "default", + "value": "default" }, "hide": 0, "includeAll": false, @@ -1035,7 +1038,7 @@ { "allValue": ".*", "current": { - "selected": true, + "selected": false, "text": "All", "value": "$__all" }, @@ -1069,6 +1072,7 @@ "from": "now-5m", "to": "now" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "refresh_intervals": [ "5s", @@ -1097,6 +1101,6 @@ "timezone": "browser", "title": "Request Handling Performance", "uid": "Nginxrequesthandling", - "version": 6, + "version": 1, "weekStart": "" } diff --git a/grafana/dashboards/nodegroup.json b/grafana/dashboards/nodegroup.json index 8968489..6d36b3d 100644 --- a/grafana/dashboards/nodegroup.json +++ b/grafana/dashboards/nodegroup.json @@ -26,7 +26,7 @@ "fiscalYearStartMonth": 0, "gnetId": 13548, "graphTooltip": 0, - "id": 67, + "id": 34, "links": [], "liveNow": false, "panels": [ @@ -1520,12 +1520,16 @@ ], "refresh": "", "schemaVersion": 39, - "tags": [], + "tags": [ + "kubernetes", + "prometheus", + "nodegroup" + ], "templating": { "list": [ { "current": { - "selected": true, + "selected": false, "text": "default", "value": "default" }, @@ -1574,8 +1578,8 @@ { "current": { "selected": false, - "text": "ip-10-20-39-69.us-east-2.compute.internal", - "value": "ip-10-20-39-69.us-east-2.compute.internal" + "text": "ip-10-20-41-244.us-east-2.compute.internal", + "value": "ip-10-20-41-244.us-east-2.compute.internal" }, "datasource": { "type": "prometheus", diff --git a/grafana/dashboards/thanos-overview.json b/grafana/dashboards/thanos-overview.json new file mode 100644 index 0000000..a5b7b78 --- /dev/null +++ b/grafana/dashboards/thanos-overview.json @@ -0,0 +1,1982 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "This dashboard provides a resource usage overview of the Thanos components Querier, Query-Frontend, Compacter and Store.\r\n\r\nIt includes CPU and Memory requests, limits and usage. In addition, the last termination reason is plotted and the cache hit ratio is available.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12937, + "graphTooltip": 0, + "id": 55, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "![logo](https://repository-images.githubusercontent.com/109162639/97e49180-661b-11e9-9882-fdc44b74debd)", + "mode": "markdown" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "refId": "A" + } + ], + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 21, + "x": 3, + "y": 0 + }, + "id": 10, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": true, + "expr": "count\n(\n kube_pod_container_info{container=~\"thanos.*|query.*|storegateway.*|compactor.*|bucketweb.*\"}\n) by (container)\n", + "instant": true, + "interval": "", + "legendFormat": "{{app_kubernetes_io_component}}", + "refId": "A" + } + ], + "title": "Number of Pods", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 8, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "refId": "A" + } + ], + "title": "Thanos / Query", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "solid" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 6 + }, + "id": 14, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\", resource=\"cpu\"})", + "hide": false, + "interval": "", + "legendFormat": "{{container}} REQUESTS", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\", resource=\"cpu\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "{{container}} LIMITS", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "CPU Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 6 + }, + "id": 40, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\"}) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\", resource=\"memory\"})", + "hide": false, + "interval": "", + "legendFormat": "{{ container }} REQUESTS", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\"}) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"query.*\", container!~\"query-frontend.*\", resource=\"memory\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{ container }} LIMITS", + "range": true, + "refId": "Limits", + "useBackend": false + } + ], + "title": "Memory Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 6 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "expr": "kube_pod_container_status_last_terminated_reason{pod!~\"thanos-query-frontend.*\", pod=~\"thanos-query-.*\"} != 0", + "interval": "", + "legendFormat": "{{reason}} - {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Last Terminated Reason", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 33, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "refId": "A" + } + ], + "title": "Thanos / Query Frontend", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "solid" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 15 + }, + "id": 43, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"query-frontend.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"query-frontend.*\", resource=\"cpu\"})", + "hide": false, + "interval": "", + "legendFormat": "{{container}} REQUESTS", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"query-frontend.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"query-frontend.*\", resource=\"cpu\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "{{container}} LIMITS", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "CPU Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 15 + }, + "id": 42, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"query-frontend.*\"}) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"query-frontend.*\", resource=\"memory\"})", + "hide": false, + "interval": "", + "legendFormat": "{{ container }} REQUESTS", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"query-frontend.*\"}) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"query-frontend.*\", resource=\"memory\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{ container }} LIMITS", + "range": true, + "refId": "Limits", + "useBackend": false + } + ], + "title": "Memory Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 15 + }, + "id": 31, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "expr": "kube_pod_container_status_last_terminated_reason{pod=~\"thanos-query-frontend.*\"} != 0", + "interval": "", + "legendFormat": "{{reason}} - {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Last Terminated Reason", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 7, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "refId": "A" + } + ], + "title": "Thanos / Compact", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "solid" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 24 + }, + "id": 44, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"compactor.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"compactor.*\", resource=\"cpu\"})", + "hide": false, + "interval": "", + "legendFormat": "{{container}} REQUESTS", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"compactor.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"compactor.*\", resource=\"cpu\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "{{container}} LIMITS", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "CPU Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 24 + }, + "id": 45, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"compactor.*\"}) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"compactor.*\", resource=\"memory\"})", + "hide": false, + "interval": "", + "legendFormat": "{{ container }} REQUESTS", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"compactor.*\"}) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"compactor.*\", resource=\"memory\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{ container }} LIMITS", + "range": true, + "refId": "Limits", + "useBackend": false + } + ], + "title": "Memory Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 24 + }, + "id": 48, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "expr": "kube_pod_container_status_last_terminated_reason{pod=~\"thanos-compactor.*\"} != 0", + "interval": "", + "legendFormat": "{{reason}} - {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Last Terminated Reason", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 6, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "refId": "A" + } + ], + "title": "Thanos / Store", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/limit/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "solid" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 33 + }, + "id": 46, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"storegateway.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"storegateway.*\", resource=\"cpu\"})", + "hide": false, + "interval": "", + "legendFormat": "{{container}} REQUESTS", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(container) (irate(container_cpu_usage_seconds_total{namespace=\"monitoring\", container=~\"storegateway.*\"}[$__interval])) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"storegateway.*\", resource=\"cpu\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "{{container}} LIMITS", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "CPU Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 20 + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 33 + }, + "id": 47, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"storegateway.*\"}) / sum by(container) (kube_pod_container_resource_requests{namespace=\"monitoring\", container=~\"storegateway.*\", resource=\"memory\"})", + "hide": false, + "interval": "", + "legendFormat": "{{ container }} REQUESTS", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "sum by(container) (container_memory_working_set_bytes{namespace=\"monitoring\", container=~\"storegateway.*\"}) / sum by(container) (kube_pod_container_resource_limits{namespace=\"monitoring\", container=~\"storegateway.*\", resource=\"memory\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{ container }} LIMITS", + "range": true, + "refId": "Limits", + "useBackend": false + } + ], + "title": "Memory Usage / Requests & Limits by container", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 33 + }, + "id": 49, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "expr": "kube_pod_container_status_last_terminated_reason{pod=~\"thanos-storegateway.*\"} != 0", + "interval": "", + "legendFormat": "{{reason}} - {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Last Terminated Reason", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 41 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "editorMode": "code", + "expr": "sum(\n rate(\n thanos_store_index_cache_hits_total[$__rate_interval]\n )\n)\n/\nsum(\n rate(\n thanos_store_index_cache_requests_total[$__rate_interval]\n ) \n)", + "interval": "", + "legendFormat": "Hit Ratio", + "range": true, + "refId": "A" + } + ], + "title": "Cache Hit Ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 8, + "y": 41 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "expr": "rate(thanos_store_index_cache_hits_total[5m])", + "interval": "", + "legendFormat": "{{item_type}} - {{kubernetes_pod_name}}", + "refId": "A" + } + ], + "title": "Cache Hits", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 15, + "y": 41 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PC96415006F908B67" + }, + "expr": "rate(thanos_bucket_store_cached_series_fetch_duration_seconds_count[$__rate_interval])", + "interval": "", + "legendFormat": "{{kubernetes_pod_name}}", + "refId": "A" + } + ], + "title": "bucket_store_cached_series_fetch_duration_seconds_count", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Thanos / Overview", + "uid": "xhXVFwJ7f", + "version": 3, + "weekStart": "" +} diff --git a/main.tf b/main.tf index 52e6dfe..410098a 100644 --- a/main.tf +++ b/main.tf @@ -1152,12 +1152,12 @@ resource "kubernetes_config_map" "nodegroup_dashboard" { } data = { - "nodegroup-dashboard.json" = "${file("${path.module}/grafana/dashboards/nodegroup.json")}", - "cluster-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_global.json")}", - "namespace-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_namespace.json")}", - "node-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_nodes.json")}", - "pods-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_pods.json")}" - # "karpenter_node_dashboard.json" = "${file("${path.module}/grafana/dashboards/karpenter_node_dashboard.json")}" + "nodegroup-dashboard.json" = "${file("${path.module}/grafana/dashboards/nodegroup.json")}", + "cluster-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_global.json")}", + "namespace-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_namespace.json")}", + "node-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_nodes.json")}", + "pods-dashboard.json" = "${file("${path.module}/grafana/dashboards/k8s_view_pods.json")}", + "karpenter_node_dashboard.json" = "${file("${path.module}/grafana/dashboards/karpenter_node_dashboard.json")}" } } diff --git a/thanos.tf b/thanos.tf index 33b5115..99b71d4 100644 --- a/thanos.tf +++ b/thanos.tf @@ -52,37 +52,36 @@ resource "helm_release" "thanos" { ] } -# resource "kubernetes_config_map" "mimir-overview_dashboard" { -# count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 -# depends_on = [ -# helm_release.grafana_mimir -# ] -# metadata { -# name = "prometheus-operator-kube-p-mimir-overview-dashboard" -# namespace = var.pgl_namespace -# labels = { -# "grafana_dashboard" : "1" -# "app" : "kube-prometheus-stack-grafana" -# "chart" : "kube-prometheus-stack-61.1.0" -# "release" : "prometheus-operator" -# } -# annotations = { -# "grafana_folder" : "Mimir" -# } -# } - -# data = { -# "mimir-overview-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-overview.json")}" -# } -# } +resource "kubernetes_config_map" "thanos-overview_dashboard" { + count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 + depends_on = [ + helm_release.thanos + ] + metadata { + name = "prometheus-operator-kube-p-thanos-overview-dashboard" + namespace = var.pgl_namespace + labels = { + "grafana_dashboard" : "1" + "app" : "kube-prometheus-stack-grafana" + "chart" : "kube-prometheus-stack-61.1.0" + "release" : "prometheus-operator" + } + annotations = { + "grafana_folder" : "thanos" + } + } + data = { + "thanos-overview-dashboard.json" = "${file("${path.module}/grafana/dashboards/thanos-overview.json")}" + } +} -# resource "kubernetes_config_map" "mimir-compactor_dashboard" { +# resource "kubernetes_config_map" "thanos-compactor_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-compactor-dashboard" +# name = "prometheus-operator-kube-p-thanos-compactor-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -91,22 +90,22 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-compactor-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-compactor.json")}" +# "thanos-compactor-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-compactor.json")}" # } # } -# resource "kubernetes_config_map" "mimir-object-store_dashboard" { +# resource "kubernetes_config_map" "thanos-object-store_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-object-store-dashboard" +# name = "prometheus-operator-kube-p-thanos-object-store-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -115,22 +114,22 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-object-store-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-object-store.json")}" +# "thanos-object-store-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-object-store.json")}" # } # } -# resource "kubernetes_config_map" "mimir-queries_dashboard" { +# resource "kubernetes_config_map" "thanos-queries_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-queries-dashboard" +# name = "prometheus-operator-kube-p-thanos-queries-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -139,22 +138,22 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-queries-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-queries.json")}" +# "thanos-queries-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-queries.json")}" # } # } -# resource "kubernetes_config_map" "mimir-writes-resources_dashboard" { +# resource "kubernetes_config_map" "thanos-writes-resources_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-writes-resources-dashboard" +# name = "prometheus-operator-kube-p-thanos-writes-resources-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -163,22 +162,22 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-writes-resources-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-writes-resources.json")}" +# "thanos-writes-resources-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-writes-resources.json")}" # } # } -# resource "kubernetes_config_map" "mimir-writes_dashboard" { +# resource "kubernetes_config_map" "thanos-writes_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-writes-dashboard" +# name = "prometheus-operator-kube-p-thanos-writes-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -187,22 +186,22 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-writes-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-writes.json")}" +# "thanos-writes-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-writes.json")}" # } # } -# resource "kubernetes_config_map" "mimir-reads_dashboard" { +# resource "kubernetes_config_map" "thanos-reads_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-reads-dashboard" +# name = "prometheus-operator-kube-p-thanos-reads-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -211,22 +210,22 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-reads-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-reads.json")}" +# "thanos-reads-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-reads.json")}" # } # } -# resource "kubernetes_config_map" "mimir-reads-resources_dashboard" { +# resource "kubernetes_config_map" "thanos-reads-resources_dashboard" { # count = var.thanos_enabled && var.deployment_config.grafana_enabled ? 1 : 0 # depends_on = [ -# helm_release.grafana_mimir +# helm_release.thanos # ] # metadata { -# name = "prometheus-operator-kube-p-mimir-reads-resources-dashboard" +# name = "prometheus-operator-kube-p-thanos-reads-resources-dashboard" # namespace = var.pgl_namespace # labels = { # "grafana_dashboard" : "1" @@ -235,11 +234,11 @@ resource "helm_release" "thanos" { # "release" : "prometheus-operator" # } # annotations = { -# "grafana_folder" : "Mimir" +# "grafana_folder" : "thanos" # } # } # data = { -# "mimir-reads-resources-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-reads-resources.json")}" +# "thanos-reads-resources-dashboard.json" = "${file("${path.module}/grafana/dashboards/mimir-reads-resources.json")}" # } # }