PromQL Expression Reference

The PromQL expressions in this doc can be used to configure alerts.

For more information about querying the Prometheus time series database, refer to the official Prometheus documentation.

Cluster Metrics

Cluster CPU Utilization

CatalogExpression
Detail1 - (avg(irate(node_cpu_seconds_total{mode=”idle”}[5m])) by (instance))
Summary1 - (avg(irate(node_cpu_seconds_total{mode=”idle”}[5m])))

Cluster Load Average

CatalogExpression
Detail
load1sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode=”system”}) by (instance)
load5sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode=”system”}) by (instance)
load15sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode=”system”}) by (instance)
Summary
load1sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode=”system”})
load5sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode=”system”})
load15sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode=”system”})

Cluster Memory Utilization

CatalogExpression
Detail1 - sum(node_memory_MemAvailable_bytes) by (instance) / sum(node_memory_MemTotal_bytes) by (instance)
Summary1 - sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)

Cluster Disk Utilization

CatalogExpression
Detail(sum(node_filesystem_size_bytes{device!=”rootfs”}) by (instance) - sum(node_filesystem_free_bytes{device!=”rootfs”}) by (instance)) / sum(node_filesystem_size_bytes{device!=”rootfs”}) by (instance)
Summary(sum(node_filesystem_size_bytes{device!=”rootfs”}) - sum(node_filesystem_free_bytes{device!=”rootfs”})) / sum(node_filesystem_size_bytes{device!=”rootfs”})

Cluster Disk I/O

CatalogExpression
Detail
readsum(rate(node_disk_read_bytes_total[5m])) by (instance)
writtensum(rate(node_disk_written_bytes_total[5m])) by (instance)
Summary
readsum(rate(node_disk_read_bytes_total[5m]))
writtensum(rate(node_disk_written_bytes_total[5m]))

Cluster Network Packets

CatalogExpression
Detail
receive-droppedsum(rate(node_network_receive_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
receive-errssum(rate(node_network_receive_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
receive-packetssum(rate(node_network_receive_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
transmit-droppedsum(rate(node_network_transmit_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
transmit-errssum(rate(node_network_transmit_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
transmit-packetssum(rate(node_network_transmit_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
Summary
receive-droppedsum(rate(node_network_receive_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))
receive-errssum(rate(node_network_receive_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))
receive-packetssum(rate(node_network_receive_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))
transmit-droppedsum(rate(node_network_transmit_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))
transmit-errssum(rate(node_network_transmit_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))
transmit-packetssum(rate(node_network_transmit_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))

Cluster Network I/O

CatalogExpression
Detail
receivesum(rate(node_network_receive_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
transmitsum(rate(node_network_transmit_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m])) by (instance)
Summary
receivesum(rate(node_network_receive_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))
transmitsum(rate(node_network_transmit_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“}[5m]))

Node Metrics

Node CPU Utilization

CatalogExpression
Detailavg(irate(node_cpu_seconds_total{mode!=”idle”, instance=~”$instance”}[5m])) by (mode)
Summary1 - (avg(irate(node_cpu_seconds_total{mode=”idle”, instance=~”$instance”}[5m])))

Node Load Average

CatalogExpression
Detail
load1sum(node_load1{instance=~”$instance”}) / count(node_cpu_seconds_total{mode=”system”,instance=~”$instance”})
load5sum(node_load5{instance=~”$instance”}) / count(node_cpu_seconds_total{mode=”system”,instance=~”$instance”})
load15sum(node_load15{instance=~”$instance”}) / count(node_cpu_seconds_total{mode=”system”,instance=~”$instance”})
Summary
load1sum(node_load1{instance=~”$instance”}) / count(node_cpu_seconds_total{mode=”system”,instance=~”$instance”})
load5sum(node_load5{instance=~”$instance”}) / count(node_cpu_seconds_total{mode=”system”,instance=~”$instance”})
load15sum(node_load15{instance=~”$instance”}) / count(node_cpu_seconds_total{mode=”system”,instance=~”$instance”})

Node Memory Utilization

CatalogExpression
Detail1 - sum(node_memory_MemAvailable_bytes{instance=~”$instance”}) / sum(node_memory_MemTotal_bytes{instance=~”$instance”})
Summary1 - sum(node_memory_MemAvailable_bytes{instance=~”$instance”}) / sum(node_memory_MemTotal_bytes{instance=~”$instance”})

Node Disk Utilization

CatalogExpression
Detail(sum(node_filesystem_size_bytes{device!=”rootfs”,instance=~”$instance”}) by (device) - sum(node_filesystem_free_bytes{device!=”rootfs”,instance=~”$instance”}) by (device)) / sum(node_filesystem_size_bytes{device!=”rootfs”,instance=~”$instance”}) by (device)
Summary(sum(node_filesystem_size_bytes{device!=”rootfs”,instance=~”$instance”}) - sum(node_filesystem_free_bytes{device!=”rootfs”,instance=~”$instance”})) / sum(node_filesystem_size_bytes{device!=”rootfs”,instance=~”$instance”})

Node Disk I/O

CatalogExpression
Detail
readsum(rate(node_disk_read_bytes_total{instance=~”$instance”}[5m]))
writtensum(rate(node_disk_written_bytes_total{instance=~”$instance”}[5m]))
Summary
readsum(rate(node_disk_read_bytes_total{instance=~”$instance”}[5m]))
writtensum(rate(node_disk_written_bytes_total{instance=~”$instance”}[5m]))

Node Network Packets

CatalogExpression
Detail
receive-droppedsum(rate(node_network_receive_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
receive-errssum(rate(node_network_receive_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
receive-packetssum(rate(node_network_receive_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
transmit-droppedsum(rate(node_network_transmit_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
transmit-errssum(rate(node_network_transmit_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
transmit-packetssum(rate(node_network_transmit_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
Summary
receive-droppedsum(rate(node_network_receive_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))
receive-errssum(rate(node_network_receive_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))
receive-packetssum(rate(node_network_receive_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))
transmit-droppedsum(rate(node_network_transmit_drop_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))
transmit-errssum(rate(node_network_transmit_errs_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))
transmit-packetssum(rate(node_network_transmit_packets_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))

Node Network I/O

CatalogExpression
Detail
receivesum(rate(node_network_receive_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
transmitsum(rate(node_network_transmit_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m])) by (device)
Summary
receivesum(rate(node_network_receive_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))
transmitsum(rate(node_network_transmit_bytes_total{device!~”lo | veth. | docker. | flannel. | cali. | cbr.“,instance=~”$instance”}[5m]))

Etcd Metrics

Etcd Has a Leader

max(etcd_server_has_leader)

Number of Times the Leader Changes

max(etcd_server_leader_changes_seen_total)

Number of Failed Proposals

sum(etcd_server_proposals_failed_total)

GRPC Client Traffic

CatalogExpression
Detail
insum(rate(etcd_network_client_grpc_received_bytes_total[5m])) by (instance)
outsum(rate(etcd_network_client_grpc_sent_bytes_total[5m])) by (instance)
Summary
insum(rate(etcd_network_client_grpc_received_bytes_total[5m]))
outsum(rate(etcd_network_client_grpc_sent_bytes_total[5m]))

Peer Traffic

CatalogExpression
Detail
insum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)
outsum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)
Summary
insum(rate(etcd_network_peer_received_bytes_total[5m]))
outsum(rate(etcd_network_peer_sent_bytes_total[5m]))

DB Size

CatalogExpression
Detailsum(etcd_debugging_mvcc_db_total_size_in_bytes) by (instance)
Summarysum(etcd_debugging_mvcc_db_total_size_in_bytes)

Active Streams

CatalogExpression
Detail
lease-watchsum(grpc_server_started_total{grpc_service=”etcdserverpb.Lease”,grpc_type=”bidi_stream”}) by (instance) - sum(grpc_server_handled_total{grpc_service=”etcdserverpb.Lease”,grpc_type=”bidi_stream”}) by (instance)
watchsum(grpc_server_started_total{grpc_service=”etcdserverpb.Watch”,grpc_type=”bidi_stream”}) by (instance) - sum(grpc_server_handled_total{grpc_service=”etcdserverpb.Watch”,grpc_type=”bidi_stream”}) by (instance)
Summary
lease-watchsum(grpc_server_started_total{grpc_service=”etcdserverpb.Lease”,grpc_type=”bidi_stream”}) - sum(grpc_server_handled_total{grpc_service=”etcdserverpb.Lease”,grpc_type=”bidi_stream”})
watchsum(grpc_server_started_total{grpc_service=”etcdserverpb.Watch”,grpc_type=”bidi_stream”}) - sum(grpc_server_handled_total{grpc_service=”etcdserverpb.Watch”,grpc_type=”bidi_stream”})

Raft Proposals

CatalogExpression
Detail
appliedsum(increase(etcd_server_proposals_applied_total[5m])) by (instance)
committedsum(increase(etcd_server_proposals_committed_total[5m])) by (instance)
pendingsum(increase(etcd_server_proposals_pending[5m])) by (instance)
failedsum(increase(etcd_server_proposals_failed_total[5m])) by (instance)
Summary
appliedsum(increase(etcd_server_proposals_applied_total[5m]))
committedsum(increase(etcd_server_proposals_committed_total[5m]))
pendingsum(increase(etcd_server_proposals_pending[5m]))
failedsum(increase(etcd_server_proposals_failed_total[5m]))

RPC Rate

CatalogExpression
Detail
totalsum(rate(grpc_server_started_total{grpc_type=”unary”}[5m])) by (instance)
failsum(rate(grpc_server_handled_total{grpc_type=”unary”,grpc_code!=”OK”}[5m])) by (instance)
Summary
totalsum(rate(grpc_server_started_total{grpc_type=”unary”}[5m]))
failsum(rate(grpc_server_handled_total{grpc_type=”unary”,grpc_code!=”OK”}[5m]))

Disk Operations

CatalogExpression
Detail
commit-called-by-backendsum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m])) by (instance)
fsync-called-by-walsum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m])) by (instance)
Summary
commit-called-by-backendsum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m]))
fsync-called-by-walsum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m]))

Disk Sync Duration

CatalogExpression
Detail
walhistogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))
dbhistogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))
Summary
walsum(histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le)))
dbsum(histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le)))

Kubernetes Components Metrics

API Server Request Latency

CatalogExpression
Detailavg(apiserver_request_latencies_sum / apiserver_request_latencies_count) by (instance, verb) /1e+06
Summaryavg(apiserver_request_latencies_sum / apiserver_request_latencies_count) by (instance) /1e+06

API Server Request Rate

CatalogExpression
Detailsum(rate(apiserver_request_count[5m])) by (instance, code)
Summarysum(rate(apiserver_request_count[5m])) by (instance)

Scheduling Failed Pods

CatalogExpression
Detailsum(kube_pod_status_scheduled{condition=”false”})
Summarysum(kube_pod_status_scheduled{condition=”false”})

Controller Manager Queue Depth

CatalogExpression
Detail
volumessum(volumes_depth) by instance
deploymentsum(deployment_depth) by instance
replicasetsum(replicaset_depth) by instance
servicesum(service_depth) by instance
serviceaccountsum(serviceaccount_depth) by instance
endpointsum(endpoint_depth) by instance
daemonsetsum(daemonset_depth) by instance
statefulsetsum(statefulset_depth) by instance
replicationmanagersum(replicationmanager_depth) by instance
Summary
volumessum(volumes_depth)
deploymentsum(deployment_depth)
replicasetsum(replicaset_depth)
servicesum(service_depth)
serviceaccountsum(serviceaccount_depth)
endpointsum(endpoint_depth)
daemonsetsum(daemonset_depth)
statefulsetsum(statefulset_depth)
replicationmanagersum(replicationmanager_depth)

Scheduler E2E Scheduling Latency

CatalogExpression
Detailhistogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) by (le, instance)) / 1e+06
Summarysum(histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) by (le, instance)) / 1e+06)

Scheduler Preemption Attempts

CatalogExpression
Detailsum(rate(scheduler_total_preemption_attempts[5m])) by (instance)
Summarysum(rate(scheduler_total_preemption_attempts[5m]))

Ingress Controller Connections

CatalogExpression
Detail
readingsum(nginx_ingress_controller_nginx_process_connections{state=”reading”}) by (instance)
waitingsum(nginx_ingress_controller_nginx_process_connections{state=”waiting”}) by (instance)
writingsum(nginx_ingress_controller_nginx_process_connections{state=”writing”}) by (instance)
acceptedsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=”accepted”}[5m]))) by (instance)
activesum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=”active”}[5m]))) by (instance)
handledsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=”handled”}[5m]))) by (instance)
Summary
readingsum(nginx_ingress_controller_nginx_process_connections{state=”reading”})
waitingsum(nginx_ingress_controller_nginx_process_connections{state=”waiting”})
writingsum(nginx_ingress_controller_nginx_process_connections{state=”writing”})
acceptedsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=”accepted”}[5m])))
activesum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=”active”}[5m])))
handledsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=”handled”}[5m])))

Ingress Controller Request Process Time

CatalogExpression
Detailtopk(10, histogramquantile(0.95,sum by (le, host, path)(rate(nginx_ingress_controller_request_duration_seconds_bucket{host!=”“}[5m]))))
Summarytopk(10, histogramquantile(0.95,sum by (le, host)(rate(nginx_ingress_controller_request_duration_seconds_bucket{host!=”“}[5m]))))

Rancher Logging Metrics

Fluentd Buffer Queue Rate

CatalogExpression
Detailsum(rate(fluentd_output_status_buffer_queue_length[5m])) by (instance)
Summarysum(rate(fluentd_output_status_buffer_queue_length[5m]))

Fluentd Input Rate

CatalogExpression
Detailsum(rate(fluentd_input_status_num_records_total[5m])) by (instance)
Summarysum(rate(fluentd_input_status_num_records_total[5m]))

Fluentd Output Errors Rate

CatalogExpression
Detailsum(rate(fluentd_output_status_num_errors[5m])) by (type)
Summarysum(rate(fluentd_output_status_num_errors[5m]))

Fluentd Output Rate

CatalogExpression
Detailsum(rate(fluentd_output_status_num_records_total[5m])) by (instance)
Summarysum(rate(fluentd_output_status_num_records_total[5m]))

Workload Metrics

Workload CPU Utilization

CatalogExpression
Detail
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
user secondssum(rate(container_cpu_user_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
system secondssum(rate(container_cpu_system_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
usage secondssum(rate(container_cpu_usage_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
Summary
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
user secondssum(rate(container_cpu_user_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
system secondssum(rate(container_cpu_system_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
usage secondssum(rate(container_cpu_usage_seconds_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))

Workload Memory Utilization

CatalogExpression
Detailsum(container_memory_working_set_bytes{namespace=”$namespace”,pod_name=~”$podName”, container_name!=””}) by (pod_name)
Summarysum(container_memory_working_set_bytes{namespace=”$namespace”,pod_name=~”$podName”, container_name!=””})

Workload Network Packets

CatalogExpression
Detail
receive-packetssum(rate(container_network_receive_packets_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
receive-errorssum(rate(container_network_receive_errors_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
transmit-packetssum(rate(container_network_transmit_packets_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
transmit-errorssum(rate(container_network_transmit_errors_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
Summary
receive-packetssum(rate(container_network_receive_packets_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
receive-errorssum(rate(container_network_receive_errors_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
transmit-packetssum(rate(container_network_transmit_packets_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
transmit-errorssum(rate(container_network_transmit_errors_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))

Workload Network I/O

CatalogExpression
Detail
receivesum(rate(container_network_receive_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
transmitsum(rate(container_network_transmit_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
Summary
receivesum(rate(container_network_receive_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
transmitsum(rate(container_network_transmit_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))

Workload Disk I/O

CatalogExpression
Detail
readsum(rate(container_fs_reads_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
writesum(rate(container_fs_writes_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m])) by (pod_name)
Summary
readsum(rate(container_fs_reads_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))
writesum(rate(container_fs_writes_bytes_total{namespace=”$namespace”,pod_name=~”$podName”,container_name!=””}[5m]))

Pod Metrics

Pod CPU Utilization

CatalogExpression
Detail
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m])) by (container_name)
usage secondssum(rate(container_cpu_usage_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m])) by (container_name)
system secondssum(rate(container_cpu_system_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m])) by (container_name)
user secondssum(rate(container_cpu_user_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m])) by (container_name)
Summary
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m]))
usage secondssum(rate(container_cpu_usage_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m]))
system secondssum(rate(container_cpu_system_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m]))
user secondssum(rate(container_cpu_user_seconds_total{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”, container_name!=””}[5m]))

Pod Memory Utilization

CatalogExpression
Detailsum(container_memory_working_set_bytes{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”,container_name!=””}) by (container_name)
Summarysum(container_memory_working_set_bytes{container_name!=”POD”,namespace=”$namespace”,pod_name=”$podName”,container_name!=””})

Pod Network Packets

CatalogExpression
Detail
receive-packetssum(rate(container_network_receive_packets_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
receive-errorssum(rate(container_network_receive_errors_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmit-packetssum(rate(container_network_transmit_packets_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmit-errorssum(rate(container_network_transmit_errors_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
Summary
receive-packetssum(rate(container_network_receive_packets_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
receive-errorssum(rate(container_network_receive_errors_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmit-packetssum(rate(container_network_transmit_packets_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmit-errorssum(rate(container_network_transmit_errors_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))

Pod Network I/O

CatalogExpression
Detail
receivesum(rate(container_network_receive_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmitsum(rate(container_network_transmit_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
Summary
receivesum(rate(container_network_receive_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
transmitsum(rate(container_network_transmit_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))

Pod Disk I/O

CatalogExpression
Detail
readsum(rate(container_fs_reads_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m])) by (container_name)
writesum(rate(container_fs_writes_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m])) by (container_name)
Summary
readsum(rate(container_fs_reads_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))
writesum(rate(container_fs_writes_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name!=””}[5m]))

Container Metrics

Container CPU Utilization

CatalogExpression
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{namespace=”$namespace”,pod_name=”$podName”,container_name=”$containerName”}[5m]))
usage secondssum(rate(container_cpu_usage_seconds_total{namespace=”$namespace”,pod_name=”$podName”,container_name=”$containerName”}[5m]))
system secondssum(rate(container_cpu_system_seconds_total{namespace=”$namespace”,pod_name=”$podName”,container_name=”$containerName”}[5m]))
user secondssum(rate(container_cpu_user_seconds_total{namespace=”$namespace”,pod_name=”$podName”,container_name=”$containerName”}[5m]))

Container Memory Utilization

sum(container_memory_working_set_bytes{namespace="$namespace",pod_name="$podName",container_name="$containerName"})

Container Disk I/O

CatalogExpression
readsum(rate(container_fs_reads_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name=”$containerName”}[5m]))
writesum(rate(container_fs_writes_bytes_total{namespace=”$namespace”,pod_name=”$podName”,container_name=”$containerName”}[5m]))