背景

PostgreSQL 有3种停库模式:

“Smart” mode waits for all active clients to disconnect and any online backup to finish.
If the server is in hot standby, recovery and streaming replication will be terminated once all clients have disconnected.

“Fast” mode (the default) does not wait for clients to disconnect and will terminate an online backup in progress.
All active transactions are rolled back and clients are forcibly disconnected, then the server is shut down.

“Immediate” mode will abort all server processes immediately, without a clean shutdown.
This choice will lead to a crash-recovery cycle during the next server start.

smart :等用户进程自然退出。最后做检查点。

fast : 主动断开用户进程。最后做检查点。

immediate : 直接停库(不做检查点,最快)。

除了用户进程,还有归档进程、walsender进程。smart,fast停库时,这些进程又会如何处理呢?

如果数据库开启了归档,smart, fast 停库时会怎么处理pgarch进程

发起最后一次archive周期,将所有.ready的wal进行归档,除非中间archive_command遇到错误,否则要等所有的.ready文件都触发并执行完成archive_command。

如果有walsender进程在,smart, fast 停库时会怎么处理walsender进程

如果有walsender进程存在(例如有standby,有pg_basebackup,有pg_receivewal等利用流复制协议的客户端就有walsender进程),那么要等这个walsender将所有未发送完的wal日志都发送给下游。

src/backend/postmaster/postmaster.c

注释如下

  1. /*
  2. * Reaper -- signal handler to cleanup after a child process dies.
  3. */
  4. static void
  5. reaper(SIGNAL_ARGS)
  6. {
  7. .....................
  8. while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
  9. {
  10. .......................
  11. /*
  12. * Was it the checkpointer?
  13. */
  14. if (pid == CheckpointerPID)
  15. {
  16. CheckpointerPID = 0;
  17. if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
  18. {
  19. /*
  20. * OK, we saw normal exit of the checkpointer after it's been
  21. * told to shut down. We expect that it wrote a shutdown
  22. * checkpoint. (If for some reason it didn't, recovery will
  23. * occur on next postmaster start.)
  24. *
  25. * At this point we should have no normal backend children
  26. * left (else we'd not be in PM_SHUTDOWN state) but we might
  27. * have dead_end children to wait for.
  28. *
  29. * If we have an archiver subprocess, tell it to do a last
  30. * archive cycle and quit. Likewise, if we have walsender
  31. * processes, tell them to send any remaining WAL and quit.
  32. */
  33. Assert(Shutdown > NoShutdown);
  34. /* 唤醒归档进程 进行一轮归档 */
  35. /* Waken archiver for the last time */
  36. if (PgArchPID != 0)
  37. signal_child(PgArchPID, SIGUSR2);
  38. /* wal sender,发送完所有未发送的redo */
  39. /*
  40. * Waken walsenders for the last time. No regular backends
  41. * should be around anymore.
  42. */
  43. SignalChildren(SIGUSR2);
  44. pmState = PM_SHUTDOWN_2;
  45. /*
  46. * We can also shut down the stats collector now; there's
  47. * nothing left for it to do.
  48. */
  49. if (PgStatPID != 0)
  50. signal_child(PgStatPID, SIGQUIT);
  51. }

唤醒归档

src/backend/postmaster/pgarch.c

  1. /* SIGUSR2 signal handler for archiver process */
  2. static void
  3. pgarch_waken_stop(SIGNAL_ARGS)
  4. {
  5. int save_errno = errno;
  6. /* set flag to do a final cycle and shut down afterwards */
  7. /* 停库,触发最后一轮归档周期 */
  8. ready_to_stop = true;
  9. SetLatch(MyLatch);
  10. errno = save_errno;
  11. }
  1. /*
  2. * pgarch_MainLoop
  3. *
  4. * Main loop for archiver
  5. */
  6. static void
  7. pgarch_MainLoop(void)
  8. {
  9. pg_time_t last_copy_time = 0;
  10. bool time_to_stop;
  11. /*
  12. * We run the copy loop immediately upon entry, in case there are
  13. * unarchived files left over from a previous database run (or maybe the
  14. * archiver died unexpectedly). After that we wait for a signal or
  15. * timeout before doing more.
  16. */
  17. wakened = true;
  18. /*
  19. * There shouldn't be anything for the archiver to do except to wait for a
  20. * signal ... however, the archiver exists to protect our data, so she
  21. * wakes up occasionally to allow herself to be proactive.
  22. */
  23. do
  24. {
  25. ResetLatch(MyLatch);
  26. /* When we get SIGUSR2, we do one more archive cycle, then exit */
  27. /* 停库,触发最后一轮归档周期 */
  28. time_to_stop = ready_to_stop;
  29. /* Check for config update */
  30. if (got_SIGHUP)
  31. {
  32. got_SIGHUP = false;
  33. ProcessConfigFile(PGC_SIGHUP);
  34. }
  35. /*
  36. * If we've gotten SIGTERM, we normally just sit and do nothing until
  37. * SIGUSR2 arrives. However, that means a random SIGTERM would
  38. * disable archiving indefinitely, which doesn't seem like a good
  39. * idea. If more than 60 seconds pass since SIGTERM, exit anyway, so
  40. * that the postmaster can start a new archiver if needed.
  41. */
  42. if (got_SIGTERM)
  43. {
  44. time_t curtime = time(NULL);
  45. if (last_sigterm_time == 0)
  46. last_sigterm_time = curtime;
  47. else if ((unsigned int) (curtime - last_sigterm_time) >=
  48. (unsigned int) 60)
  49. break;
  50. }
  51. /* Do what we're here for */
  52. if (wakened || time_to_stop)
  53. {
  54. wakened = false;
  55. pgarch_ArchiverCopyLoop(); // 最后一次循环
  56. last_copy_time = time(NULL);
  57. }
  58. /*
  59. * Sleep until a signal is received, or until a poll is forced by
  60. * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or
  61. * until postmaster dies.
  62. */
  63. if (!time_to_stop) /* Don't wait during last iteration */
  64. {
  65. pg_time_t curtime = (pg_time_t) time(NULL);
  66. int timeout;
  67. timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time);
  68. if (timeout > 0)
  69. {
  70. int rc;
  71. rc = WaitLatch(MyLatch,
  72. WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
  73. timeout * 1000L,
  74. WAIT_EVENT_ARCHIVER_MAIN);
  75. if (rc & WL_TIMEOUT)
  76. wakened = true;
  77. if (rc & WL_POSTMASTER_DEATH)
  78. time_to_stop = true;
  79. }
  80. else
  81. wakened = true;
  82. }
  83. /*
  84. * The archiver quits either when the postmaster dies (not expected)
  85. * or after completing one more archiving cycle after receiving
  86. * SIGUSR2.
  87. */
  88. } while (!time_to_stop); /* 停库,触发最后一轮归档周期 */
  89. }

归档所有未归档日志,直到全部的.ready对应调度wal都归档完成,或者报错

  1. /*
  2. * pgarch_ArchiverCopyLoop
  3. *
  4. * Archives all outstanding xlogs then returns
  5. */
  6. static void
  7. pgarch_ArchiverCopyLoop(void)
  8. {
  9. char xlog[MAX_XFN_CHARS + 1];
  10. /*
  11. * loop through all xlogs with archive_status of .ready and archive
  12. * them...mostly we expect this to be a single file, though it is possible
  13. * some backend will add files onto the list of those that need archiving
  14. * while we are still copying earlier archives
  15. */
  16. while (pgarch_readyXlog(xlog))
  17. {
  18. int failures = 0;
  19. int failures_orphan = 0;
  20. for (;;)
  21. {
  22. struct stat stat_buf;
  23. char pathname[MAXPGPATH];
  24. /*
  25. * Do not initiate any more archive commands after receiving
  26. * SIGTERM, nor after the postmaster has died unexpectedly. The
  27. * first condition is to try to keep from having init SIGKILL the
  28. * command, and the second is to avoid conflicts with another
  29. * archiver spawned by a newer postmaster.
  30. */
  31. if (got_SIGTERM || !PostmasterIsAlive())
  32. return;
  33. /*
  34. * Check for config update. This is so that we'll adopt a new
  35. * setting for archive_command as soon as possible, even if there
  36. * is a backlog of files to be archived.
  37. */
  38. if (got_SIGHUP)
  39. {
  40. got_SIGHUP = false;
  41. ProcessConfigFile(PGC_SIGHUP);
  42. }
  43. /* can't do anything if no command ... */
  44. if (!XLogArchiveCommandSet())
  45. {
  46. ereport(WARNING,
  47. (errmsg("archive_mode enabled, yet archive_command is not set")));
  48. return;
  49. }
  50. /*
  51. * Since archive status files are not removed in a durable manner,
  52. * a system crash could leave behind .ready files for WAL segments
  53. * that have already been recycled or removed. In this case,
  54. * simply remove the orphan status file and move on. unlink() is
  55. * used here as even on subsequent crashes the same orphan files
  56. * would get removed, so there is no need to worry about
  57. * durability.
  58. */
  59. snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
  60. if (stat(pathname, &stat_buf) != 0 && errno == ENOENT)
  61. {
  62. char xlogready[MAXPGPATH];
  63. StatusFilePath(xlogready, xlog, ".ready");
  64. if (unlink(xlogready) == 0)
  65. {
  66. ereport(WARNING,
  67. (errmsg("removed orphan archive status file \"%s\"",
  68. xlogready)));
  69. /* leave loop and move to the next status file */
  70. break;
  71. }
  72. if (++failures_orphan >= NUM_ORPHAN_CLEANUP_RETRIES)
  73. {
  74. ereport(WARNING,
  75. (errmsg("removal of orphan archive status file \"%s\" failed too many times, will try again later",
  76. xlogready)));
  77. /* give up cleanup of orphan status files */
  78. return;
  79. }
  80. /* wait a bit before retrying */
  81. pg_usleep(1000000L);
  82. continue;
  83. }
  84. if (pgarch_archiveXlog(xlog))
  85. {
  86. /* successful */
  87. pgarch_archiveDone(xlog);
  88. /*
  89. * Tell the collector about the WAL file that we successfully
  90. * archived
  91. */
  92. pgstat_send_archiver(xlog, false);
  93. break; /* out of inner retry loop */
  94. }
  95. else
  96. {
  97. /*
  98. * Tell the collector about the WAL file that we failed to
  99. * archive
  100. */
  101. pgstat_send_archiver(xlog, true);
  102. if (++failures >= NUM_ARCHIVE_RETRIES)
  103. {
  104. ereport(WARNING,
  105. (errmsg("archiving write-ahead log file \"%s\" failed too many times, will try again later",
  106. xlog)));
  107. return; /* give up archiving for now */
  108. }
  109. pg_usleep(1000000L); /* wait a bit before retrying */
  110. }
  111. }
  112. }
  113. }

那么fast,smart停库时,如果有walsender或归档时到底有什么问题?

1、如果walsender有很多很多的wal没有发送完,则停库可能要很久很久(因为要等walsender发完)

2、同样的道理,如果有很多很多文件没有归档,并且归档过程中没有报错,则一个归档周期会非常漫长,也会导致停库可能要很久很久。

immediate模式停库没有影响,但是immediate停库不写检查点,启动数据库时需要进入recovery模式恢复数据库。