@@ -127,6 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
127127 bool readonly , bool heapallindexed );
128128static BtreeLevel bt_check_level_from_leftmost (BtreeCheckState * state ,
129129 BtreeLevel level );
130+ static bool bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
131+ BlockNumber start ,
132+ BTPageOpaque start_opaque );
130133static void bt_target_page_check (BtreeCheckState * state );
131134static ScanKey bt_right_page_check_scankey (BtreeCheckState * state );
132135static void bt_downlink_check (BtreeCheckState * state , BlockNumber childblock ,
@@ -716,7 +719,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
716719 */
717720 if (state -> readonly )
718721 {
719- if (!P_LEFTMOST ( opaque ))
722+ if (!bt_leftmost_ignoring_half_dead ( state , current , opaque ))
720723 ereport (ERROR ,
721724 (errcode (ERRCODE_INDEX_CORRUPTED ),
722725 errmsg ("block %u is not leftmost in index \"%s\"" ,
@@ -769,10 +772,14 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
769772 }
770773
771774 /*
772- * readonly mode can only ever land on live pages and half-dead pages,
773- * so sibling pointers should always be in mutual agreement
775+ * Sibling links should be in mutual agreement. There arises
776+ * leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
777+ * of the parent's low-key downlink is half-dead. (A half-dead page
778+ * has no downlink from its parent.) Under heavyweight locking, the
779+ * last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
774780 */
775- if (state -> readonly && opaque -> btpo_prev != leftcurrent )
781+ if (state -> readonly &&
782+ opaque -> btpo_prev != leftcurrent && leftcurrent != P_NONE )
776783 ereport (ERROR ,
777784 (errcode (ERRCODE_INDEX_CORRUPTED ),
778785 errmsg ("left link/right link pair in index \"%s\" not in agreement" ,
@@ -822,6 +829,67 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
822829 return nextleveldown ;
823830}
824831
832+ /*
833+ * Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
834+ * half-dead, sibling-linked pages to the left. If a half-dead page appears
835+ * under state->readonly, the database exited recovery between the first-stage
836+ * and second-stage WAL records of a deletion.
837+ */
838+ static bool
839+ bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
840+ BlockNumber start ,
841+ BTPageOpaque start_opaque )
842+ {
843+ BlockNumber reached = start_opaque -> btpo_prev ,
844+ reached_from = start ;
845+ bool all_half_dead = true;
846+
847+ /*
848+ * To handle the !readonly case, we'd need to accept BTP_DELETED pages and
849+ * potentially observe nbtree/README "Page deletion and backwards scans".
850+ */
851+ Assert (state -> readonly );
852+
853+ while (reached != P_NONE && all_half_dead )
854+ {
855+ Page page = palloc_btree_page (state , reached );
856+ BTPageOpaque reached_opaque = (BTPageOpaque ) PageGetSpecialPointer (page );
857+
858+ CHECK_FOR_INTERRUPTS ();
859+
860+ /*
861+ * Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
862+ * writes that side-links will continue to point to the siblings.
863+ * Check btpo_next for that property.
864+ */
865+ all_half_dead = P_ISHALFDEAD (reached_opaque ) &&
866+ reached != start &&
867+ reached != reached_from &&
868+ reached_opaque -> btpo_next == reached_from ;
869+ if (all_half_dead )
870+ {
871+ XLogRecPtr pagelsn = PageGetLSN (page );
872+
873+ /* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
874+ ereport (DEBUG1 ,
875+ (errcode (ERRCODE_NO_DATA ),
876+ errmsg_internal ("harmless interrupted page deletion detected in index \"%s\"" ,
877+ RelationGetRelationName (state -> rel )),
878+ errdetail_internal ("Block=%u right block=%u page lsn=%X/%X." ,
879+ reached , reached_from ,
880+ (uint32 ) (pagelsn >> 32 ),
881+ (uint32 ) pagelsn )));
882+
883+ reached_from = reached ;
884+ reached = reached_opaque -> btpo_prev ;
885+ }
886+
887+ pfree (page );
888+ }
889+
890+ return all_half_dead ;
891+ }
892+
825893/*
826894 * Function performs the following checks on target page, or pages ancillary to
827895 * target page:
0 commit comments