@@ -6344,13 +6344,18 @@ heap_inplace_update_and_unlock(Relation relation,
63446344 HeapTupleHeader htup = oldtup -> t_data ;
63456345 uint32 oldlen ;
63466346 uint32 newlen ;
6347+ char * dst ;
6348+ char * src ;
63476349
63486350 Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
63496351 oldlen = oldtup -> t_len - htup -> t_hoff ;
63506352 newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
63516353 if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
63526354 elog (ERROR , "wrong tuple length" );
63536355
6356+ dst = (char * ) htup + htup -> t_hoff ;
6357+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6358+
63546359 /*
63556360 * Construct shared cache inval if necessary. Note that because we only
63566361 * pass the new version of the tuple, this mustn't be used for any
@@ -6369,15 +6374,15 @@ heap_inplace_update_and_unlock(Relation relation,
63696374 */
63706375 PreInplace_Inval ();
63716376
6372- /* NO EREPORT(ERROR) from here till changes are logged */
6373- START_CRIT_SECTION ();
6374-
6375- memcpy ((char * ) htup + htup -> t_hoff ,
6376- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6377- newlen );
6378-
63796377 /*----------
6380- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6378+ * NO EREPORT(ERROR) from here till changes are complete
6379+ *
6380+ * Our buffer lock won't stop a reader having already pinned and checked
6381+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6382+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6383+ * checkpoint delay makes that acceptable. With the usual order of
6384+ * changes, a crash after memcpy() and before XLogInsert() could allow
6385+ * datfrozenxid to overtake relfrozenxid:
63816386 *
63826387 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
63836388 * ["R" is a VACUUM tbl]
@@ -6387,31 +6392,57 @@ heap_inplace_update_and_unlock(Relation relation,
63876392 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
63886393 * [crash]
63896394 * [recovery restores datfrozenxid w/o relfrozenxid]
6395+ *
6396+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6397+ * the buffer to the stack before logging. Here, that facilitates a FPI
6398+ * of the post-mutation block before we accept other sessions seeing it.
63906399 */
6391-
6392- MarkBufferDirty (buffer );
6400+ Assert ((MyProc -> delayChkptFlags & DELAY_CHKPT_START ) == 0 );
6401+ START_CRIT_SECTION ();
6402+ MyProc -> delayChkptFlags |= DELAY_CHKPT_START ;
63936403
63946404 /* XLOG stuff */
63956405 if (RelationNeedsWAL (relation ))
63966406 {
63976407 xl_heap_inplace xlrec ;
6408+ PGAlignedBlock copied_buffer ;
6409+ char * origdata = (char * ) BufferGetBlock (buffer );
6410+ Page page = BufferGetPage (buffer );
6411+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6412+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6413+ uintptr_t dst_offset_in_block ;
6414+ RelFileLocator rlocator ;
6415+ ForkNumber forkno ;
6416+ BlockNumber blkno ;
63986417 XLogRecPtr recptr ;
63996418
64006419 xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
64016420
64026421 XLogBeginInsert ();
64036422 XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
64046423
6405- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6406- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6424+ /* register block matching what buffer will look like after changes */
6425+ memcpy (copied_buffer .data , origdata , lower );
6426+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6427+ dst_offset_in_block = dst - origdata ;
6428+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6429+ BufferGetTag (buffer , & rlocator , & forkno , & blkno );
6430+ Assert (forkno == MAIN_FORKNUM );
6431+ XLogRegisterBlock (0 , & rlocator , forkno , blkno , copied_buffer .data ,
6432+ REGBUF_STANDARD );
6433+ XLogRegisterBufData (0 , src , newlen );
64076434
64086435 /* inplace updates aren't decoded atm, don't log the origin */
64096436
64106437 recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
64116438
6412- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6439+ PageSetLSN (page , recptr );
64136440 }
64146441
6442+ memcpy (dst , src , newlen );
6443+
6444+ MarkBufferDirty (buffer );
6445+
64156446 LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
64166447
64176448 /*
@@ -6424,6 +6455,7 @@ heap_inplace_update_and_unlock(Relation relation,
64246455 */
64256456 AtInplace_Inval ();
64266457
6458+ MyProc -> delayChkptFlags &= ~DELAY_CHKPT_START ;
64276459 END_CRIT_SECTION ();
64286460 UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
64296461
0 commit comments