OLAP

Patroni OLAP模板,针对高并行,长查询,高吞吐实例优化

Patroni OLAP模板主要针对吞吐量与计算并行度进行优化

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

  1. #!/usr/bin/env patroni
  2. #==============================================================#
  3. # File : patroni.yml
  4. # Ctime : 2020-04-08
  5. # Mtime : 2020-12-22
  6. # Desc : patroni cluster definition for {{ pg_cluster }} (olap)
  7. # Path : /pg/bin/patroni.yml
  8. # Real Path : /pg/conf/{{ pg_instance }}.yml
  9. # Link : /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
  10. # Note : Analysis Database Cluster Template
  11. # Doc : https://patroni.readthedocs.io/en/latest/SETTINGS.html
  12. # Copyright (C) 2018-2021 Ruohang Feng
  13. #==============================================================#
  14. # OLTP database are optimized for throughput
  15. # typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB
  16. ---
  17. #------------------------------------------------------------------------------
  18. # identity
  19. #------------------------------------------------------------------------------
  20. namespace: {{ pg_namespace }}/ # namespace
  21. scope: {{ pg_cluster }} # cluster name
  22. name: {{ pg_instance }} # instance name
  23. #------------------------------------------------------------------------------
  24. # log
  25. #------------------------------------------------------------------------------
  26. log:
  27. level: INFO # NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  28. dir: /pg/log/ # default log file: /pg/log/patroni.log
  29. file_size: 100000000 # 100MB log triggers a log rotate
  30. # format: '%(asctime)s %(levelname)s: %(message)s'
  31. #------------------------------------------------------------------------------
  32. # dcs
  33. #------------------------------------------------------------------------------
  34. consul:
  35. host: 127.0.0.1:8500
  36. consistency: default # default|consistent|stale
  37. register_service: true
  38. service_check_interval: 15s
  39. service_tags:
  40. - {{ pg_cluster }}
  41. #------------------------------------------------------------------------------
  42. # api
  43. #------------------------------------------------------------------------------
  44. # how to expose patroni service
  45. # listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
  46. restapi:
  47. listen: 0.0.0.0:{{ patroni_port }}
  48. connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  49. authentication:
  50. verify_client: none # none|optional|required
  51. username: {{ pg_monitor_username }}
  52. password: '{{ pg_monitor_password }}'
  53. #------------------------------------------------------------------------------
  54. # ctl
  55. #------------------------------------------------------------------------------
  56. ctl:
  57. optional:
  58. insecure: true
  59. # cacert: '/path/to/ca/cert'
  60. # certfile: '/path/to/cert/file'
  61. # keyfile: '/path/to/key/file'
  62. #------------------------------------------------------------------------------
  63. # tags
  64. #------------------------------------------------------------------------------
  65. tags:
  66. nofailover: false
  67. clonefrom: true
  68. noloadbalance: false
  69. nosync: false
  70. {% if pg_upstream is defined %}
  71. replicatefrom: {{ pg_upstream }} # clone from another replica rather than primary
  72. {% endif %}
  73. #------------------------------------------------------------------------------
  74. # watchdog
  75. #------------------------------------------------------------------------------
  76. # available mode: off|automatic|required
  77. watchdog:
  78. mode: {{ patroni_watchdog_mode }}
  79. device: /dev/watchdog
  80. # safety_margin: 10s
  81. #------------------------------------------------------------------------------
  82. # bootstrap
  83. #------------------------------------------------------------------------------
  84. bootstrap:
  85. #----------------------------------------------------------------------------
  86. # bootstrap method
  87. #----------------------------------------------------------------------------
  88. method: initdb
  89. # add custom bootstrap method here
  90. # default bootstrap method: initdb
  91. initdb:
  92. - locale: C
  93. - encoding: UTF8
  94. # - data-checksums # enable data-checksum
  95. #----------------------------------------------------------------------------
  96. # bootstrap users
  97. #---------------------------------------------------------------------------
  98. # additional users which need to be created after initializing new cluster
  99. # replication user and monitor user are required
  100. users:
  101. {{ pg_replication_username }}:
  102. password: '{{ pg_replication_password }}'
  103. {{ pg_monitor_username }}:
  104. password: '{{ pg_monitor_password }}'
  105. {{ pg_admin_username }}:
  106. password: '{{ pg_admin_password }}'
  107. # bootstrap hba, allow local and intranet password access & replication
  108. # will be overwritten later
  109. pg_hba:
  110. - local all postgres ident
  111. - local all all md5
  112. - host all all 0.0.0.0/0 md5
  113. - local replication postgres ident
  114. - local replication all md5
  115. - host replication all 0.0.0.0/0 md5
  116. #----------------------------------------------------------------------------
  117. # template
  118. #---------------------------------------------------------------------------
  119. # post_init: /pg/bin/pg-init
  120. #----------------------------------------------------------------------------
  121. # bootstrap config
  122. #---------------------------------------------------------------------------
  123. # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  124. # if will NOT take any effect after cluster bootstrap
  125. dcs:
  126. {% if pg_role == 'primary' and pg_upstream is defined %}
  127. #----------------------------------------------------------------------------
  128. # standby cluster definition
  129. #---------------------------------------------------------------------------
  130. standby_cluster:
  131. host: {{ pg_upstream }}
  132. port: {{ pg_port }}
  133. # primary_slot_name: patroni # must be create manually on upstream server, if specified
  134. create_replica_methods:
  135. - basebackup
  136. {% endif %}
  137. #----------------------------------------------------------------------------
  138. # important parameters
  139. #---------------------------------------------------------------------------
  140. # constraint: ttl >: loop_wait + retry_timeout * 2
  141. # the number of seconds the loop will sleep. Default value: 10
  142. # this is patroni check loop interval
  143. loop_wait: 10
  144. # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
  145. # config this according to your network condition to avoid false-positive failover
  146. ttl: 30
  147. # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
  148. retry_timeout: 10
  149. # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
  150. # Max RTO: 2 loop wait + master_start_timeout
  151. master_start_timeout: 10
  152. # import: candidate will not be promoted if replication lag is higher than this
  153. # maximum RPO: 16MB (analysis tolerate more data loss)
  154. maximum_lag_on_failover: 16777216
  155. # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
  156. master_stop_timeout: 30
  157. # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
  158. # set to true for RPO mode
  159. synchronous_mode: false
  160. # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
  161. synchronous_mode_strict: false
  162. #----------------------------------------------------------------------------
  163. # postgres parameters
  164. #---------------------------------------------------------------------------
  165. postgresql:
  166. use_slots: true
  167. use_pg_rewind: true
  168. remove_data_directory_on_rewind_failure: true
  169. parameters:
  170. #----------------------------------------------------------------------
  171. # IMPORTANT PARAMETERS
  172. #----------------------------------------------------------------------
  173. max_connections: 400 # 100 -> 400
  174. superuser_reserved_connections: 10 # reserve 10 connection for su
  175. max_locks_per_transaction: 256 # 64 -> 256 (analysis)
  176. max_prepared_transactions: 0 # 0 disable 2PC
  177. track_commit_timestamp: on # enabled xact timestamp
  178. max_worker_processes: 64 # default 8 -> 64, SET THIS ACCORDING TO YOUR CPU CORES
  179. wal_level: logical # logical
  180. wal_log_hints: on # wal log hints to support rewind
  181. max_wal_senders: 16 # 10 -> 16
  182. max_replication_slots: 16 # 10 -> 16
  183. wal_keep_size: 100GB # keep at least 100GB WAL
  184. password_encryption: md5 # use traditional md5 auth
  185. #----------------------------------------------------------------------
  186. # RESOURCE USAGE (except WAL)
  187. #----------------------------------------------------------------------
  188. # memory: shared_buffers and maintenance_work_mem will be dynamically set
  189. shared_buffers: {{ pg_shared_buffers }}
  190. maintenance_work_mem: {{ pg_maintenance_work_mem }}
  191. work_mem: 128MB # 4MB -> 128MB (analysis)
  192. huge_pages: try # try huge pages
  193. temp_file_limit: 500GB # 0 -> 500GB (analysis)
  194. vacuum_cost_delay: 2ms # wait 2ms per 10000 cost
  195. vacuum_cost_limit: 10000 # 10000 cost each round
  196. bgwriter_delay: 10ms # check dirty page every 10ms
  197. bgwriter_lru_maxpages: 1600 # 100 -> 1600 (analysis)
  198. bgwriter_lru_multiplier: 5.0 # 2.0 -> 5.0 more cushion buffer
  199. max_parallel_workers: 64 # SET THIS ACCORDING TO YOUR CPU CORES
  200. max_parallel_workers_per_gather: 64 # SET THIS ACCORDING TO YOUR CPU CORES
  201. max_parallel_maintenance_workers: 4 # 2 -> 4
  202. #----------------------------------------------------------------------
  203. # WAL
  204. #----------------------------------------------------------------------
  205. wal_buffers: 16MB # max to 16MB
  206. wal_writer_delay: 20ms # wait period
  207. wal_writer_flush_after: 16MB # max allowed data loss (analysis)
  208. min_wal_size: 100GB # at least 100GB WAL
  209. max_wal_size: 400GB # at most 400GB WAL
  210. commit_delay: 20 # 200ms -> 20ms, increase speed
  211. commit_siblings: 10 # 5 -> 10
  212. checkpoint_timeout: 60min # checkpoint 5min -> 1h
  213. checkpoint_completion_target: 0.95 # 0.5 -> 0.95
  214. archive_mode: on
  215. archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'
  216. #----------------------------------------------------------------------
  217. # REPLICATION
  218. #----------------------------------------------------------------------
  219. # synchronous_standby_names: ''
  220. vacuum_defer_cleanup_age: 0 # 0 (default)
  221. promote_trigger_file: promote.signal # default promote trigger file path
  222. max_standby_archive_delay: 10min # max delay before canceling queries when reading WAL from archive;
  223. max_standby_streaming_delay: 3min # max delay before canceling queries when reading streaming WAL;
  224. wal_receiver_status_interval: 1s # send replies at least this often
  225. hot_standby_feedback: on # send info from standby to prevent query conflicts
  226. wal_receiver_timeout: 60s # time that receiver waits for
  227. max_logical_replication_workers: 8 # 4 -> 8
  228. max_sync_workers_per_subscription: 8 # 4 -> 8
  229. #----------------------------------------------------------------------
  230. # QUERY TUNING
  231. #----------------------------------------------------------------------
  232. # planner
  233. enable_partitionwise_join: on # enable on analysis
  234. random_page_cost: 1.1 # 4 for HDD, 1.1 for SSD
  235. effective_cache_size: 320GB # max mem - shared buffer
  236. default_statistics_target: 1000 # stat bucket 100 -> 1000
  237. jit: on # default on
  238. jit_above_cost: 100000 # default jit threshold
  239. #----------------------------------------------------------------------
  240. # REPORTING AND LOGGING
  241. #----------------------------------------------------------------------
  242. log_destination: csvlog # use standard csv log
  243. logging_collector: on # enable csvlog
  244. log_directory: log # default log dir: /pg/data/log
  245. # log_filename: 'postgresql-%a.log' # weekly auto-recycle
  246. log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
  247. log_checkpoints: on # log checkpoint info
  248. log_lock_waits: on # log lock wait info
  249. log_replication_commands: on # log replication info
  250. log_statement: ddl # log ddl change
  251. log_min_duration_statement: 1000 # log slow query (>1s)
  252. #----------------------------------------------------------------------
  253. # STATISTICS
  254. #----------------------------------------------------------------------
  255. track_io_timing: on # collect io statistics
  256. track_functions: all # track all functions (none|pl|all)
  257. track_activity_query_size: 8192 # max query length in pg_stat_activity
  258. #----------------------------------------------------------------------
  259. # AUTOVACUUM
  260. #----------------------------------------------------------------------
  261. log_autovacuum_min_duration: 1s # log autovacuum activity take more than 1s
  262. autovacuum_max_workers: 3 # default autovacuum worker 3
  263. autovacuum_naptime: 1min # default autovacuum naptime 1min
  264. autovacuum_vacuum_scale_factor: 0.08 # fraction of table size before vacuum 20% -> 8%
  265. autovacuum_analyze_scale_factor: 0.04 # fraction of table size before analyze 10% -> 4%
  266. autovacuum_vacuum_cost_delay: -1 # default vacuum cost delay: same as vacuum_cost_delay
  267. autovacuum_vacuum_cost_limit: -1 # default vacuum cost limit: same as vacuum_cost_limit
  268. autovacuum_freeze_max_age: 100000000 # age > 1 billion triggers force vacuum
  269. #----------------------------------------------------------------------
  270. # CLIENT
  271. #----------------------------------------------------------------------
  272. deadlock_timeout: 50ms # 50ms for deadlock
  273. idle_in_transaction_session_timeout: 0 # Disable idle in xact timeout in analysis database
  274. #----------------------------------------------------------------------
  275. # CUSTOMIZED OPTIONS
  276. #----------------------------------------------------------------------
  277. # extensions
  278. shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'
  279. # auto_explain
  280. auto_explain.log_min_duration: 1s # auto explain query slower than 1s
  281. auto_explain.log_analyze: true # explain analyze
  282. auto_explain.log_verbose: true # explain verbose
  283. auto_explain.log_timing: true # explain timing
  284. auto_explain.log_nested_statements: true
  285. # pg_stat_statements
  286. pg_stat_statements.max: 10000 # 5000 -> 10000 queries
  287. pg_stat_statements.track: all # track all statements (all|top|none)
  288. pg_stat_statements.track_utility: off # do not track query other than CRUD
  289. pg_stat_statements.track_planning: off # do not track planning metrics
  290. #------------------------------------------------------------------------------
  291. # postgres
  292. #------------------------------------------------------------------------------
  293. postgresql:
  294. #----------------------------------------------------------------------------
  295. # how to connect to postgres
  296. #----------------------------------------------------------------------------
  297. bin_dir: {{ pg_bin_dir }}
  298. data_dir: {{ pg_data }}
  299. config_dir: {{ pg_data }}
  300. pgpass: {{ pg_dbsu_home }}/.pgpass
  301. listen: {{ pg_listen }}:{{ pg_port }}
  302. connect_address: {{ inventory_hostname }}:{{ pg_port }}
  303. use_unix_socket: true # default: /var/run/postgresql, /tmp
  304. #----------------------------------------------------------------------------
  305. # who to connect to postgres
  306. #----------------------------------------------------------------------------
  307. authentication:
  308. superuser:
  309. username: {{ pg_dbsu }}
  310. replication:
  311. username: {{ pg_replication_username }}
  312. password: '{{ pg_replication_password }}'
  313. rewind:
  314. username: {{ pg_replication_username }}
  315. password: '{{ pg_replication_password }}'
  316. #----------------------------------------------------------------------------
  317. # how to react to database operations
  318. #----------------------------------------------------------------------------
  319. # event callback script log: /pg/log/callback.log
  320. callbacks:
  321. on_start: /pg/bin/pg-failover-callback
  322. on_stop: /pg/bin/pg-failover-callback
  323. on_reload: /pg/bin/pg-failover-callback
  324. on_restart: /pg/bin/pg-failover-callback
  325. on_role_change: /pg/bin/pg-failover-callback
  326. # rewind policy: data checksum should be enabled before using rewind
  327. use_pg_rewind: true
  328. remove_data_directory_on_rewind_failure: true
  329. remove_data_directory_on_diverged_timelines: false
  330. #----------------------------------------------------------------------------
  331. # how to create replica
  332. #----------------------------------------------------------------------------
  333. # create replica method: default pg_basebackup
  334. create_replica_methods:
  335. - basebackup
  336. basebackup:
  337. - max-rate: '1000M'
  338. - checkpoint: fast
  339. - status-interva: 1s
  340. - verbose
  341. - progress
  342. #----------------------------------------------------------------------------
  343. # ad hoc parameters (overwrite with default)
  344. #----------------------------------------------------------------------------
  345. # parameters:
  346. #----------------------------------------------------------------------------
  347. # host based authentication, overwrite default pg_hba.conf
  348. #----------------------------------------------------------------------------
  349. # pg_hba:
  350. # - local all postgres ident
  351. # - local all all md5
  352. # - host all all 0.0.0.0/0 md5
  353. # - local replication postgres ident
  354. # - local replication all md5
  355. # - host replication all 0.0.0.0/0 md5
  356. ...

最后修改 2022-05-27: init commit (1e3e284)