监控MySQL

使用Telegraf监控MySQL也较为简单,会采集很多指标,但是具体应该关注哪些指标,是个难点,本章会提供一些例子,欢迎大家补充最佳实践。

Telegraf的配置为了便于管理,可以拆成多个文件,放到统一目录中,使用 --config-directory 参数指定具体的目录,举例:

  1. # telegraf 启动命令:
  2. ./usr/bin/telegraf --config ./etc/telegraf/telegraf.conf --config-directory ./etc/telegraf.d
  3. # 后面如果修改了telegraf.conf或者修改了telegraf.d下的配置,可以通过如下方式让Telegraf重新加载配置
  4. kill -HUP `pidof telegraf`

我们创建一个mysql.conf的配置放到telegraf.d下,内容如下:

  1. [[inputs.mysql]]
  2. servers = ["root:1234@tcp(localhost:3306)/?tls=false"]
  3. metric_version = 2
  4. gather_global_variables = true
  5. interval_slow = "1m"
  6. tagexclude = ["innodb_version"]

mysql采集插件的具体使用方式,参考 这里 的文档。下面笔者整理了监控大盘和告警规则,供大家参考:

大盘JSON:

  1. [
  2. {
  3. "name": "MySQL关键指标 - by Telegraf",
  4. "tags": "MySQL",
  5. "configs": "{\"var\":[{\"name\":\"ident\",\"selected\":\"10-255-0-34\",\"definition\":\"label_values(mysql_uptime, ident)\"},{\"name\":\"server\",\"definition\":\"label_values(mysql_uptime{ident=\\\"$ident\\\"}, server)\",\"selected\":\"localhost_3306\"}]}",
  6. "chart_groups": [
  7. {
  8. "name": "MySQL关键指标",
  9. "weight": 0,
  10. "charts": [
  11. {
  12. "configs": "{\"name\":\"Connections\",\"QL\":[{\"PromQL\":\"rate(mysql_aborted_connects{ident=\\\"$ident\\\", server=\\\"$server\\\"}[5m])\",\"Legend\":\"aborted_connections - {{ident}} - {{server}}\"},{\"PromQL\":\"mysql_variables_max_connections{ident=\\\"$ident\\\", server=\\\"$server\\\"}\",\"Legend\":\"max_connections - {{ident}} - {{server}}\"},{\"PromQL\":\"mysql_threads_connected{ident=\\\"$ident\\\", server=\\\"$server\\\"}\",\"Legend\":\"threads_connected - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}",
  13. "weight": 0
  14. },
  15. {
  16. "configs": "{\"name\":\"Slow Queries\",\"QL\":[{\"PromQL\":\"rate(mysql_slow_queries{ident=\\\"$ident\\\", server=\\\"$server\\\"}[5m])\",\"Legend\":\"slow_queries - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}",
  17. "weight": 0
  18. },
  19. {
  20. "configs": "{\"name\":\"Open Files\",\"QL\":[{\"PromQL\":\"mysql_variables_open_files_limit{ident=\\\"$ident\\\", server=\\\"$server\\\"}\",\"Legend\":\"open_files_limit - {{ident}} - {{server}}\"},{\"PromQL\":\"mysql_variables_innodb_open_files{ident=\\\"$ident\\\", server=\\\"$server\\\"}\",\"Legend\":\"open_files_used - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}",
  21. "weight": 0
  22. },
  23. {
  24. "configs": "{\"name\":\"Queries per second\",\"QL\":[{\"PromQL\":\"rate(mysql_queries{ident=\\\"$ident\\\", server=\\\"$server\\\"}[1m])\",\"Legend\":\"mysql_queries - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":2,\"i\":\"3\"}}",
  25. "weight": 0
  26. },
  27. {
  28. "configs": "{\"name\":\"Writes per second\",\"QL\":[{\"PromQL\":\"rate(mysql_com_insert{ident=\\\"$ident\\\", server=\\\"$server\\\"}[1m])\",\"Legend\":\"command_insert - {{ident}} - {{server}}\"},{\"PromQL\":\"rate(mysql_com_update{ident=\\\"$ident\\\", server=\\\"$server\\\"}[1m])\",\"Legend\":\"command_update - {{ident}} - {{server}}\"},{\"PromQL\":\"rate(mysql_com_delete{ident=\\\"$ident\\\", server=\\\"$server\\\"}[1m])\",\"Legend\":\"command_delete - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":2,\"i\":\"4\"}}",
  29. "weight": 0
  30. },
  31. {
  32. "configs": "{\"name\":\"Threads\",\"QL\":[{\"PromQL\":\"mysql_threads_running{ident=\\\"$ident\\\", server=\\\"$server\\\"}\",\"Legend\":\"threads_running - {{ident}} - {{server}}\"},{\"PromQL\":\"mysql_threads_connected{ident=\\\"$ident\\\", server=\\\"$server\\\"}\",\"Legend\":\"threads_connected - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":4,\"i\":\"5\"}}",
  33. "weight": 0
  34. },
  35. {
  36. "configs": "{\"name\":\"Data Reads/Writes per second\",\"QL\":[{\"PromQL\":\"rate(mysql_innodb_data_reads{ident=\\\"$ident\\\", server=\\\"$server\\\"}[5m])\",\"Legend\":\"mysql_innodb_data_reads - {{ident}} - {{server}}\"},{\"PromQL\":\"rate(mysql_innodb_data_writes{ident=\\\"$ident\\\", server=\\\"$server\\\"}[5m])\",\"Legend\":\"mysql_innodb_data_writes - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":4,\"i\":\"6\"}}",
  37. "weight": 0
  38. },
  39. {
  40. "configs": "{\"name\":\"InnoDB Buffer Pool Size\",\"QL\":[{\"PromQL\":\"mysql_variables_innodb_buffer_pool_size{ident=\\\"$ident\\\", server=\\\"$server\\\"}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":4,\"i\":\"7\"}}",
  41. "weight": 0
  42. },
  43. {
  44. "configs": "{\"name\":\"InnoDB Buffer Pool Pages\",\"QL\":[{\"PromQL\":\"mysql_innodb_buffer_pool_pages_free{ident=\\\"$ident\\\", server=\\\"$server\\\"}\"},{\"PromQL\":\"mysql_innodb_buffer_pool_pages_data{ident=\\\"$ident\\\", server=\\\"$server\\\"}\"},{\"PromQL\":\"mysql_innodb_buffer_pool_pages_total{ident=\\\"$ident\\\", server=\\\"$server\\\"}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":11,\"x\":0,\"y\":6,\"i\":\"8\"}}",
  45. "weight": 0
  46. },
  47. {
  48. "configs": "{\"name\":\"TPS\",\"QL\":[{\"PromQL\":\"rate(mysql_com_commit{ident=\\\"$ident\\\", server=\\\"$server\\\"}[1m]) + rate(mysql_com_rollback{ident=\\\"$ident\\\", server=\\\"$server\\\"}[1m])\",\"Legend\":\"tps - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":2,\"i\":\"9\"}}",
  49. "weight": 0
  50. },
  51. {
  52. "configs": "{\"name\":\"InnoDB Buffer Pool Hit Rate\",\"QL\":[{\"PromQL\":\"100 - increase(mysql_innodb_buffer_pool_reads{ident=\\\"$ident\\\", server=\\\"$server\\\"}[5m]) / increase(mysql_innodb_buffer_pool_read_requests{ident=\\\"$ident\\\", server=\\\"$server\\\"}[5m]) * 100\",\"Legend\":\"rate - {{ident}} - {{server}}\"}],\"legend\":false,\"highLevelConfig\":{\"shared\":true,\"sharedSortDirection\":\"desc\",\"precision\":\"short\",\"formatUnit\":1000},\"version\":1,\"layout\":{\"h\":2,\"w\":13,\"x\":11,\"y\":6,\"i\":\"10\"}}",
  53. "weight": 0
  54. }
  55. ]
  56. }
  57. ]
  58. }
  59. ]

告警规则JSON:

  1. [
  2. {
  3. "name": "MySQL InnoDB buffer pool 命中率太低",
  4. "note": "",
  5. "severity": 2,
  6. "disabled": 0,
  7. "prom_for_duration": 0,
  8. "prom_ql": "100 - increase(mysql_innodb_buffer_pool_reads[5m]) / increase(mysql_innodb_buffer_pool_read_requests[5m]) * 100 < 99",
  9. "prom_eval_interval": 15,
  10. "enable_stime": "00:00",
  11. "enable_etime": "23:59",
  12. "enable_days_of_week": [
  13. "1",
  14. "2",
  15. "3",
  16. "4",
  17. "5",
  18. "6",
  19. "0"
  20. ],
  21. "notify_recovered": 1,
  22. "notify_channels": [
  23. "email",
  24. "dingtalk",
  25. "wecom"
  26. ],
  27. "notify_repeat_step": 60,
  28. "callbacks": [],
  29. "runbook_url": "",
  30. "append_tags": []
  31. },
  32. {
  33. "name": "MySQL出现慢查询",
  34. "note": "",
  35. "severity": 3,
  36. "disabled": 0,
  37. "prom_for_duration": 0,
  38. "prom_ql": "rate(mysql_slow_queries[5m]) > 0",
  39. "prom_eval_interval": 15,
  40. "enable_stime": "00:00",
  41. "enable_etime": "23:59",
  42. "enable_days_of_week": [
  43. "1",
  44. "2",
  45. "3",
  46. "4",
  47. "5",
  48. "6",
  49. "0"
  50. ],
  51. "notify_recovered": 1,
  52. "notify_channels": [
  53. "email",
  54. "dingtalk",
  55. "wecom"
  56. ],
  57. "notify_repeat_step": 60,
  58. "callbacks": [],
  59. "runbook_url": "",
  60. "append_tags": []
  61. },
  62. {
  63. "name": "MySQL出现连接失败的情况",
  64. "note": "或许需要调整max_connections",
  65. "severity": 1,
  66. "disabled": 0,
  67. "prom_for_duration": 0,
  68. "prom_ql": "rate(mysql_aborted_connects[5m]) > 0",
  69. "prom_eval_interval": 15,
  70. "enable_stime": "00:00",
  71. "enable_etime": "23:59",
  72. "enable_days_of_week": [
  73. "1",
  74. "2",
  75. "3",
  76. "4",
  77. "5",
  78. "6",
  79. "0"
  80. ],
  81. "notify_recovered": 1,
  82. "notify_channels": [
  83. "email",
  84. "dingtalk",
  85. "wecom"
  86. ],
  87. "notify_repeat_step": 60,
  88. "callbacks": [],
  89. "runbook_url": "",
  90. "append_tags": []
  91. },
  92. {
  93. "name": "MySQL句柄快用完了",
  94. "note": "",
  95. "severity": 2,
  96. "disabled": 0,
  97. "prom_for_duration": 0,
  98. "prom_ql": "100 * mysql_variables_innodb_open_files / mysql_variables_open_files_limit > 90",
  99. "prom_eval_interval": 15,
  100. "enable_stime": "00:00",
  101. "enable_etime": "23:59",
  102. "enable_days_of_week": [
  103. "1",
  104. "2",
  105. "3",
  106. "4",
  107. "5",
  108. "6",
  109. "0"
  110. ],
  111. "notify_recovered": 1,
  112. "notify_channels": [
  113. "email",
  114. "dingtalk",
  115. "wecom"
  116. ],
  117. "notify_repeat_step": 60,
  118. "callbacks": [],
  119. "runbook_url": "",
  120. "append_tags": []
  121. },
  122. {
  123. "name": "MySQL近期有重启",
  124. "note": "",
  125. "severity": 3,
  126. "disabled": 0,
  127. "prom_for_duration": 0,
  128. "prom_ql": "mysql_uptime < 1800",
  129. "prom_eval_interval": 15,
  130. "enable_stime": "00:00",
  131. "enable_etime": "23:59",
  132. "enable_days_of_week": [
  133. "1",
  134. "2",
  135. "3",
  136. "4",
  137. "5",
  138. "6",
  139. "0"
  140. ],
  141. "notify_recovered": 1,
  142. "notify_channels": [
  143. "email",
  144. "dingtalk",
  145. "wecom"
  146. ],
  147. "notify_repeat_step": 60,
  148. "callbacks": [],
  149. "runbook_url": "",
  150. "append_tags": []
  151. },
  152. {
  153. "name": "MySQL连接数快用完了",
  154. "note": "或许需要调整max_connections",
  155. "severity": 2,
  156. "disabled": 0,
  157. "prom_for_duration": 0,
  158. "prom_ql": "100 * mysql_threads_connected / mysql_variables_max_connections > 90",
  159. "prom_eval_interval": 15,
  160. "enable_stime": "00:00",
  161. "enable_etime": "23:59",
  162. "enable_days_of_week": [
  163. "1",
  164. "2",
  165. "3",
  166. "4",
  167. "5",
  168. "6",
  169. "0"
  170. ],
  171. "notify_recovered": 1,
  172. "notify_channels": [
  173. "email",
  174. "dingtalk",
  175. "wecom"
  176. ],
  177. "notify_repeat_step": 60,
  178. "callbacks": [],
  179. "runbook_url": "",
  180. "append_tags": []
  181. }
  182. ]