grafana-agent内置了elasticsearch_exporter,可以采集Elasticsearch的运行指标。

目前grafana-agent不支持配置多个elasticsearch的地址,只能配置一个ElasticSearch地址对其进行metrics的采集。

我们强烈推荐您使用独立的账号运行grafana-agent,并做好访问elasticsearch实例的最小化授权,避免过度授权带来的安全隐患,更多可以参考official documentation

配置并启用elasticsearch_exporter

  1. elasticsearch_exporter:
  2. enabled: true
  3. address: "http://localhost:9200"

采集的关键指标列表

  1. # Estimated size in bytes of breaker
  2. # 断路器预估内存大小
  3. # Gauge
  4. elasticsearch_breakers_estimated_size_bytes
  5. # Limit size in bytes for breaker
  6. # 断路器设置内存限制
  7. # Gauge
  8. elasticsearch_breakers_limit_size_bytes
  9. # tripped for breaker
  10. # 断路器累计阻断此时
  11. # Counter
  12. elasticsearch_breakers_tripped
  13. # The number of primary shards in your cluster. This is an aggregate total across all indices
  14. # 集群主分片数量
  15. # Gauge
  16. elasticsearch_cluster_health_active_primary_shards
  17. # Aggregate total of all shards across all indices, which includes replica shards.
  18. # 集群分片总数
  19. # Gauge
  20. elasticsearch_cluster_health_active_shards
  21. # Shards delayed to reduce reallocation overhead
  22. # 暂缓重分配的分片数
  23. # Gauge
  24. elasticsearch_cluster_health_delayed_unassigned_shards
  25. # Count of shards that are being freshly created
  26. # 创建中的分片数
  27. # Gauge
  28. elasticsearch_cluster_health_initializing_shards
  29. # Number of data nodes in the cluster
  30. # 数据节点数
  31. # Gauge
  32. elasticsearch_cluster_health_number_of_data_nodes
  33. # Number of nodes in the cluster
  34. # 节点总数
  35. # Gauge
  36. elasticsearch_cluster_health_number_of_nodes
  37. # Cluster level changes which have not yet been executed
  38. # 等待执行的集群变更总数
  39. # Gauge
  40. elasticsearch_cluster_health_number_of_pending_tasks
  41. # The number of shards that are currently moving from one node to another node
  42. # 迁移中的分片数
  43. # Gauge
  44. elasticsearch_cluster_health_relocating_shards
  45. # Whether all primary and replica shards are allocated
  46. # 集群健康度
  47. # Gauge
  48. elasticsearch_cluster_health_status
  49. # The number of shards that exist in the cluster state, but cannot be found in the cluster itself
  50. # 集群未分配的分片数
  51. # Gauge
  52. elasticsearch_cluster_health_unassigned_shards
  53. # Available space on block device in bytes
  54. # 可用磁盘容量(byte)
  55. # Gauge
  56. elasticsearch_filesystem_data_available_bytes
  57. # Size of block device in bytes
  58. # 磁盘容量(byte)
  59. # Gauge
  60. elasticsearch_filesystem_data_size_bytes
  61. # Count of documents on this node
  62. # 节点文档总数
  63. # Gauge
  64. elasticsearch_indices_docs
  65. # Count of deleted documents on this node
  66. # 节点删除文档数
  67. # Gauge
  68. elasticsearch_indices_docs_deleted
  69. # Count of documents with only primary shards on all nodes
  70. # 所有节点主分片文档总数
  71. # Gauge
  72. elasticsearch_indices_docs_primary
  73. # Evictions from field data
  74. # field data cache 内存剔除次数
  75. # Counter
  76. elasticsearch_indices_fielddata_evictions
  77. # Field data cache memory usage in bytes
  78. # field data cache 内存占用(byte)
  79. # Gauge
  80. elasticsearch_indices_fielddata_memory_size_bytes
  81. # Evictions from filter cache
  82. # filter cache 内存剔除次数
  83. # Counter
  84. elasticsearch_indices_filter_cache_evictions
  85. # Filter cache memory usage in bytes
  86. # filter cache 内存占用(byte)
  87. # Gauge
  88. elasticsearch_indices_flush_time_seconds
  89. # Total flushes
  90. # flush操作次数累计
  91. # Counter
  92. elasticsearch_indices_flush_total
  93. # Total time get exists in seconds
  94. # get成功操作次数累计
  95. # Counter
  96. elasticsearch_indices_get_exists_time_seconds
  97. # Total get exists operations
  98. # get操作次数累计
  99. # Counter
  100. elasticsearch_indices_get_exists_total
  101. # Total time of get missing in seconds
  102. # get失败操作耗时累计(秒)
  103. # Counter
  104. elasticsearch_indices_get_missing_time_seconds
  105. # Total get missing
  106. # get失败操作次数累计
  107. # Counter
  108. elasticsearch_indices_get_missing_total
  109. # Total get time in seconds
  110. # get操作耗时累计(秒)
  111. # Counter
  112. elasticsearch_indices_get_time_seconds
  113. # Total get
  114. # get操作次数累计
  115. # Counter
  116. elasticsearch_indices_get_tota
  117. # Total time indexing delete in seconds
  118. # 索引删除累计耗时(秒)
  119. # Counter
  120. elasticsearch_indices_indexing_delete_time_seconds_total
  121. # Total indexing deletes
  122. # 索引删除操作次数累计
  123. # Counter
  124. elasticsearch_indices_indexing_delete_total
  125. # Cumulative index time in seconds
  126. # index操作累计耗时(秒)
  127. # Counter
  128. elasticsearch_indices_indexing_index_time_seconds_total
  129. # Total index calls
  130. # index操作数量累计
  131. # Counter
  132. elasticsearch_indices_indexing_index_total
  133. # Cumulative docs merged
  134. # merge文档数量累计
  135. # Counter
  136. elasticsearch_indices_merges_docs_total
  137. # Total merges
  138. # merge操作数量累计
  139. # Counter
  140. elasticsearch_indices_merges_total
  141. # Total merge size in bytes
  142. # merge操作数据大小累计(byte)
  143. # Counter
  144. elasticsearch_indices_merges_total_size_bytes_total
  145. # Total time spent merging in seconds
  146. # merge操作累计耗时(秒)
  147. # Counter
  148. elasticsearch_indices_merges_total_time_seconds_total
  149. # Evictions from query cache
  150. # query cache 内存剔除次数
  151. # Counter
  152. elasticsearch_indices_query_cache_evictions
  153. # Query cache memory usage in bytes
  154. # query cache 内存占用(byte)
  155. # Gauge
  156. elasticsearch_indices_query_cache_memory_size_bytes
  157. # Total time spent refreshing in seconds
  158. # refresh操作耗时累计(秒)
  159. # Counter
  160. elasticsearch_indices_refresh_time_seconds_total
  161. # Total refreshes
  162. # refresh操作次数累计
  163. # Counter
  164. elasticsearch_indices_refresh_total
  165. # Total search fetch time in seconds
  166. # fetch操作耗时累计(秒)
  167. # Counter
  168. elasticsearch_indices_search_fetch_time_seconds
  169. # Total number of fetches
  170. # fetch操作次数累计
  171. # Counter
  172. elasticsearch_indices_search_fetch_total
  173. # Total search query time in seconds
  174. # query操作耗时累计(秒)
  175. # Counter
  176. elasticsearch_indices_search_query_time_seconds
  177. # Total number of queries
  178. # query操作次数累计
  179. # Counter
  180. elasticsearch_indices_search_query_total
  181. # Segments with only primary shards on all nodes
  182. # 所有节点主分片segment总数
  183. # Gauge
  184. elasticsearch_indices_segment_count_primary
  185. # Segments with all shards on all nodes
  186. # 所有节点所有分片segment总数
  187. # Gauge
  188. elasticsearch_indices_segment_count_total
  189. # Doc values with only primary shards on all nodes in bytes
  190. # 主分片doc value内存占用(byte)
  191. # Gauge
  192. elasticsearch_indices_segment_doc_values_memory_bytes_primary
  193. # Doc values with all shards on all nodes in bytes
  194. # 所有分片doc value内存占用(byte)
  195. # Gauge
  196. elasticsearch_indices_segment_doc_values_memory_bytes_total
  197. # Size of fields with only primary shards on all nodes in bytes
  198. # 分片field内存占用(byte)
  199. # Gauge
  200. elasticsearch_indices_segment_fields_memory_bytes_primary
  201. # Size of fields with all shards on all nodes in bytes
  202. # 所有分片field内存占用(byte)
  203. # Gauge
  204. elasticsearch_indices_segment_fields_memory_bytes_total
  205. # Size of fixed bit with only primary shards on all nodes in bytes
  206. # 主分片fixed bit set内存占用(byte)
  207. # Gauge
  208. elasticsearch_indices_segment_fixed_bit_set_memory_bytes_primary
  209. # Size of fixed bit with all shards on all nodes in bytes
  210. # 所有分片fixed bit set内存占用(byte)
  211. # Gauge
  212. elasticsearch_indices_segment_fixed_bit_set_memory_bytes_total
  213. # Index writer with only primary shards on all nodes in bytes
  214. # 主分片索引写入数据量(byte)
  215. # Gauge
  216. elasticsearch_indices_segment_index_writer_memory_bytes_primary
  217. # Index writer with all shards on all nodes in bytes
  218. # 所有分片索引写入数据量(byte)
  219. # Gauge
  220. elasticsearch_indices_segment_index_writer_memory_bytes_total
  221. # Size of segments with only primary shards on all nodes in bytes
  222. # 主分片segment数
  223. # Gauge
  224. elasticsearch_indices_segment_memory_bytes_primary
  225. # Size of segments with all shards on all nodes in bytes
  226. # 所有分片segment总数
  227. # Gauge
  228. elasticsearch_indices_segment_memory_bytes_total
  229. # Size of norms with only primary shards on all nodes in bytes
  230. # 主分片normalization factor内存占用(byte)
  231. # Gauge
  232. elasticsearch_indices_segment_norms_memory_bytes_primary
  233. # Size of norms with all shards on all nodes in bytes
  234. # 所有分片normalization factor内存占用(byte)
  235. # Gauge
  236. elasticsearch_indices_segment_norms_memory_bytes_total
  237. # Size of points with only primary shards on all nodes in bytes
  238. # 主分片point内存占用(byte)
  239. # Gauge
  240. elasticsearch_indices_segment_points_memory_bytes_primary
  241. # Size of points with all shards on all nodes in bytes
  242. # 所有分片point内存占用(byte)
  243. # Gauge
  244. elasticsearch_indices_segment_points_memory_bytes_total
  245. # Size of terms with only primary shards on all nodes in bytes
  246. # 主分片term内存占用(byte)
  247. # Gauge
  248. elasticsearch_indices_segment_terms_memory_primary
  249. # Number of terms with all shards on all nodes in bytes
  250. # 所有分片term内存占用(byte)
  251. # Gauge
  252. elasticsearch_indices_segment_terms_memory_total
  253. # Size of version map with only primary shards on all nodes in bytes
  254. # 所有分片version map内存占用(byte)
  255. # Gauge
  256. elasticsearch_indices_segment_version_map_memory_bytes_primary
  257. # Size of version map with all shards on all nodes in bytes
  258. # 所有分片version map内存占用(byte)
  259. # Gauge
  260. elasticsearch_indices_segment_version_map_memory_bytes_total
  261. # Count of index segments
  262. # segment个数
  263. # Gauge
  264. elasticsearch_indices_segments_count
  265. # Current memory size of segments in bytes
  266. # segment内存占用(byte)
  267. # Gauge
  268. elasticsearch_indices_segments_memory_bytes
  269. # Current size of stored index data in bytes with only primary shards on all nodes
  270. # 主分片索引容量(byte)
  271. # Gauge
  272. elasticsearch_indices_store_size_bytes_primary
  273. # Current size of stored index data in bytes with all shards on all nodes
  274. # 所有分片索引容量(byte)
  275. # Gauge
  276. elasticsearch_indices_store_size_bytes_total
  277. # Throttle time for index store in seconds
  278. # 索引存储限制耗时(秒)
  279. # Counter
  280. elasticsearch_indices_store_throttle_time_seconds_total
  281. # Total translog operations
  282. # tranlog操作数累计
  283. # Counter
  284. elasticsearch_indices_translog_operations
  285. # Total translog size in bytes
  286. # tranlog大小累计(byte)
  287. # Counter
  288. elasticsearch_indices_translog_size_in_bytes
  289. # Count of JVM GC runs
  290. # GC运行次数累计
  291. # Counter
  292. elasticsearch_jvm_gc_collection_seconds_count
  293. # GC run time in seconds
  294. # GC运行耗时累计(秒)
  295. # C欧BT而
  296. elasticsearch_jvm_gc_collection_seconds_sum
  297. # JVM memory currently committed by area
  298. # JVM申请内存大小(byte)
  299. # Gauge
  300. elasticsearch_jvm_memory_committed_bytes
  301. # JVM memory max
  302. # JVM内存限制大小(byte)
  303. # Gauge
  304. elasticsearch_jvm_memory_max_bytes
  305. # JVM memory peak used by pool
  306. # JVM内存峰值大小(byte)
  307. # Counter
  308. elasticsearch_jvm_memory_pool_peak_used_bytes
  309. # JVM memory currently used by area
  310. # JVM内存占用大小(byte)
  311. # Gauge
  312. elasticsearch_jvm_memory_used_bytes
  313. # Shortterm load average
  314. # 系统负载(1分钟)
  315. # Gauge
  316. elasticsearch_os_load1
  317. # Midterm load average
  318. # 系统负载(5分钟)
  319. # Gauge
  320. elasticsearch_os_load15
  321. # Longterm load average
  322. # 系统负载(15分钟)
  323. # Gauge
  324. elasticsearch_os_load5
  325. # Percent CPU used by process
  326. # 进程CPU占用率
  327. # Gauge
  328. elasticsearch_process_cpu_percent
  329. # Open file descriptors
  330. # 进程打开文件数
  331. # Gauge
  332. elasticsearch_process_open_files_count
  333. # Thread Pool threads active
  334. # 活跃线程总数
  335. # Gauge
  336. elasticsearch_thread_pool_active_count
  337. # Thread Pool operations completed
  338. # 线程池complete次数
  339. # Counter
  340. elasticsearch_thread_pool_completed_count
  341. # Thread Pool operations rejected
  342. # 线程池reject次数
  343. # Counter
  344. elasticsearch_thread_pool_rejected_count
  345. # Total number of bytes received
  346. # 网络收流量(byte)
  347. # Counter
  348. elasticsearch_transport_rx_size_bytes_total
  349. # Total number of bytes received
  350. # 网络发流量(byte)
  351. # Counter
  352. elasticsearch_transport_tx_size_bytes_total

完整地配置项说明

  1. # Enables the elasticsearch_exporter integration, allowing the Agent to automatically
  2. # collect system metrics from the configured ElasticSearch server address
  3. [enabled: <boolean> | default = false]
  4. # Sets an explicit value for the instance label when the integration is
  5. # self-scraped. Overrides inferred values.
  6. #
  7. # The default value for this integration is inferred from the hostname portion
  8. # of address.
  9. [instance: <string>]
  10. # Automatically collect metrics from this integration. If disabled,
  11. # the elasticsearch_exporter integration will be run but not scraped and thus not
  12. # remote-written. Metrics for the integration will be exposed at
  13. # /integrations/elasticsearch_exporter/metrics and can be scraped by an external
  14. # process.
  15. [scrape_integration: <boolean> | default = <integrations_config.scrape_integrations>]
  16. # How often should the metrics be collected? Defaults to
  17. # prometheus.global.scrape_interval.
  18. [scrape_interval: <duration> | default = <global_config.scrape_interval>]
  19. # The timeout before considering the scrape a failure. Defaults to
  20. # prometheus.global.scrape_timeout.
  21. [scrape_timeout: <duration> | default = <global_config.scrape_timeout>]
  22. # Allows for relabeling labels on the target.
  23. relabel_configs:
  24. [- <relabel_config> ... ]
  25. # Relabel metrics coming from the integration, allowing to drop series
  26. # from the integration that you don't care about.
  27. metric_relabel_configs:
  28. [ - <relabel_config> ... ]
  29. # How frequent to truncate the WAL for this integration.
  30. [wal_truncate_frequency: <duration> | default = "60m"]
  31. # Monitor the exporter itself and include those metrics in the results.
  32. [include_exporter_metrics: <bool> | default = false]
  33. #
  34. # Exporter-specific configuration options
  35. #
  36. # HTTP API address of an Elasticsearch node.
  37. [ address: <string> | default = "http://localhost:9200" ]
  38. # Timeout for trying to get stats from Elasticsearch.
  39. [ timeout: <duration> | default = "5s" ]
  40. # Export stats for all nodes in the cluster. If used, this flag will override the flag `node`.
  41. [ all: <boolean> ]
  42. # Node's name of which metrics should be exposed.
  43. [ node: <boolean> ]
  44. # Export stats for indices in the cluster.
  45. [ indices: <boolean> ]
  46. # Export stats for settings of all indices of the cluster.
  47. [ indices_settings: <boolean> ]
  48. # Export stats for cluster settings.
  49. [ cluster_settings: <boolean> ]
  50. # Export stats for shards in the cluster (implies indices).
  51. [ shards: <boolean> ]
  52. # Export stats for the cluster snapshots.
  53. [ snapshots: <boolean> ]
  54. # Cluster info update interval for the cluster label.
  55. [ clusterinfo_interval: <duration> | default = "5m" ]
  56. # Path to PEM file that contains trusted Certificate Authorities for the Elasticsearch connection.
  57. [ ca: <string> ]
  58. # Path to PEM file that contains the private key for client auth when connecting to Elasticsearch.
  59. [ client_private_key: <string> ]
  60. # Path to PEM file that contains the corresponding cert for the private key to connect to Elasticsearch.
  61. [ client_cert: <string> ]
  62. # Skip SSL verification when connecting to Elasticsearch.
  63. [ ssl_skip_verify: <boolean> ]