Skip to content

Commit 7d7ebdb

Browse files
committed
Fix stalled lag columns in pg_stat_replication when replay LSN stops advancing.
Previously, when the replay LSN reported in feedback messages from a standby stopped advancing, for example, due to a recovery conflict, the write_lag and flush_lag columns in pg_stat_replication would initially update but then stop progressing. This prevented users from correctly monitoring replication lag. The problem occurred because when any LSN stopped updating, the lag tracker's cyclic buffer became full (the write head reached the slowest read head). In that state, the lag tracker could no longer compute round-trip lag values correctly. This commit fixes the issue by handling the slowest read entry (the one causing the buffer to fill) as a separate overflow entry and freeing space so the write and other read heads can continue advancing in the buffer. As a result, write_lag and flush_lag now continue updating even if the reported replay LSN remains stalled. Backpatch to all supported versions. Author: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Chao Li <lic@highgo.com> Reviewed-by: Shinya Kato <shinya11.kato@gmail.com> Reviewed-by: Xuneng Zhou <xunengzhou@gmail.com> Discussion: https://postgr.es/m/CAHGQGwGdGQ=1-X-71Caee-LREBUXSzyohkoQJd4yZZCMt24C0g@mail.gmail.com Backpatch-through: 13
1 parent 7d47278 commit 7d7ebdb

File tree

1 file changed

+33
-17
lines changed

1 file changed

+33
-17
lines changed

src/backend/replication/walsender.c

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ typedef struct
214214
int write_head;
215215
int read_heads[NUM_SYNC_REP_WAIT_MODE];
216216
WalTimeSample last_read[NUM_SYNC_REP_WAIT_MODE];
217+
WalTimeSample overflowed[NUM_SYNC_REP_WAIT_MODE];
217218
} LagTracker;
218219

219220
static LagTracker *lag_tracker;
@@ -3545,7 +3546,6 @@ WalSndKeepaliveIfNecessary(void)
35453546
static void
35463547
LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time)
35473548
{
3548-
bool buffer_full;
35493549
int new_write_head;
35503550
int i;
35513551

@@ -3567,25 +3567,19 @@ LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time)
35673567
* of space.
35683568
*/
35693569
new_write_head = (lag_tracker->write_head + 1) % LAG_TRACKER_BUFFER_SIZE;
3570-
buffer_full = false;
35713570
for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; ++i)
35723571
{
3572+
/*
3573+
* If the buffer is full, move the slowest reader to a separate
3574+
* overflow entry and free its space in the buffer so the write head
3575+
* can advance.
3576+
*/
35733577
if (new_write_head == lag_tracker->read_heads[i])
3574-
buffer_full = true;
3575-
}
3576-
3577-
/*
3578-
* If the buffer is full, for now we just rewind by one slot and overwrite
3579-
* the last sample, as a simple (if somewhat uneven) way to lower the
3580-
* sampling rate. There may be better adaptive compaction algorithms.
3581-
*/
3582-
if (buffer_full)
3583-
{
3584-
new_write_head = lag_tracker->write_head;
3585-
if (lag_tracker->write_head > 0)
3586-
lag_tracker->write_head--;
3587-
else
3588-
lag_tracker->write_head = LAG_TRACKER_BUFFER_SIZE - 1;
3578+
{
3579+
lag_tracker->overflowed[i] =
3580+
lag_tracker->buffer[lag_tracker->read_heads[i]];
3581+
lag_tracker->read_heads[i] = -1;
3582+
}
35893583
}
35903584

35913585
/* Store a sample at the current write head position. */
@@ -3612,6 +3606,28 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
36123606
{
36133607
TimestampTz time = 0;
36143608

3609+
/*
3610+
* If 'lsn' has not passed the WAL position stored in the overflow entry,
3611+
* return the elapsed time (in microseconds) since the saved local flush
3612+
* time. If the flush time is in the future (due to clock drift), return
3613+
* -1 to treat as no valid sample.
3614+
*
3615+
* Otherwise, switch back to using the buffer to control the read head and
3616+
* compute the elapsed time. The read head is then reset to point to the
3617+
* oldest entry in the buffer.
3618+
*/
3619+
if (lag_tracker->read_heads[head] == -1)
3620+
{
3621+
if (lag_tracker->overflowed[head].lsn > lsn)
3622+
return (now >= lag_tracker->overflowed[head].time) ?
3623+
now - lag_tracker->overflowed[head].time : -1;
3624+
3625+
time = lag_tracker->overflowed[head].time;
3626+
lag_tracker->last_read[head] = lag_tracker->overflowed[head];
3627+
lag_tracker->read_heads[head] =
3628+
(lag_tracker->write_head + 1) % LAG_TRACKER_BUFFER_SIZE;
3629+
}
3630+
36153631
/* Read all unread samples up to this LSN or end of buffer. */
36163632
while (lag_tracker->read_heads[head] != lag_tracker->write_head &&
36173633
lag_tracker->buffer[lag_tracker->read_heads[head]].lsn <= lsn)

0 commit comments

Comments
 (0)