Skip to content

Commit fe8f25c

Browse files
authored
ROX-20769: Use pg_upgrade in init-entrypoint (#14447)
Introduce a major upgrade logic into CentralDB init-db script. The implementation is based on the pg_upgrade [1] and postgresql-container [2]. We can't use the latter upgrade scripts directly, because they require to use provided entrypoints, which will need some more work from our side. We also couldn't use `postgresql-upgrade` script directly, because it turns out it doesn't allow to keep logs after upgrade if something went wrong. The upgrade scenario looks like this: * Check if the binaries version is newer than the data version and the upgrade is needed. * Make sure the cluster was shutdown properly. * Figure out if there is enough disk space available for the upgrade. The disk space needs to accomodate one mandatory backup and one temporary restored database to verify the backup. Hence the available size must be more than two times existing database size. * Take a physical backup, and verify it via restoring a temporary copy of the database. * Spin up a new PG15 database. * Do the upgrade check first. * If successfull, do the full upgrade, then swap old and new databases. The script knows about a backup volume, and tries to use it for making a backup. If it's not present, the main data volume will be used. The script is not idempotent: * If a backup was already taken, it will not be recreated. * If a new PG15 database already exists, the upgrade will be aborted. * If there is not enough disk space available, the upgrade will be aborted. [1]: https://www.postgresql.org/docs/current/pgupgrade.html [2]: https://github.com/sclorg/postgresql-container/
1 parent 6ee9603 commit fe8f25c

File tree

2 files changed

+171
-3
lines changed

2 files changed

+171
-3
lines changed

image/postgres/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
FROM quay.io/sclorg/postgresql-13-c8s:latest AS final
1+
ARG PG_VERSION=13
2+
FROM quay.io/sclorg/postgresql-${PG_VERSION}-c8s:latest AS final
23

34
USER root
45

image/postgres/scripts/init-entrypoint.sh

Lines changed: 169 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,174 @@
22

33
set -Eeo pipefail
44

5+
# Inspect the file system mounted at the specified point, and tell if the
6+
# available disk space is more than the specified threshold. E.g. we have a
7+
# file system mounted at "/some/path" with available disk space 100GB, out of
8+
# which 20GB (2147483648) are taken. In this scenario the call:
9+
#
10+
# check_volume_use "/some/path" 26843545600
11+
#
12+
# will fail, because the available space is less than required.
13+
check_available_space () {
14+
MOUNT_POINT=$1
15+
THRESHOLD=$2
16+
17+
AVAILABLE=$(df "${MOUNT_POINT}" | tail -n -1 | awk '{print $4}')
18+
19+
if [ -z "${AVAILABLE}" ]; then
20+
echo "No volume at ${MOUNT_POINT}"
21+
return 1
22+
fi
23+
24+
if [ "${AVAILABLE}" -lt "${THRESHOLD}" ]; then
25+
echo "Volume at ${MOUNT_POINT} does not have enough available disk space"
26+
echo "Current value is ${AVAILABLE}, required ${THRESHOLD}"
27+
return 1
28+
fi
29+
30+
echo "Volume at ${MOUNT_POINT} has enough available disk space (${AVAILABLE})"
31+
return 0
32+
}
33+
34+
# Will be used by both initdb and postgresql-upgrade
35+
export PGSETUP_INITDB_OPTIONS="--auth-host=scram-sha-256 \
36+
--auth-local=scram-sha-256 \
37+
--pwfile /run/secrets/stackrox.io/secrets/password \
38+
--data-checksums"
39+
540
# Initialize DB if it does not exist
6-
if [ ! -s "$PGDATA/PG_VERSION" ]; then
7-
initdb --auth-host=scram-sha-256 --auth-local=scram-sha-256 --pwfile /run/secrets/stackrox.io/secrets/password --data-checksums
41+
if [ ! -s "${PGDATA}/PG_VERSION" ]; then
42+
# shellcheck disable=SC2086
43+
initdb $PGSETUP_INITDB_OPTIONS
44+
else
45+
# Verify if we need to perform major version upgrade
46+
PG_BINARY_VERSION=$(postgres -V |\
47+
sed 's/postgres (PostgreSQL) \([0-9]*\).\([0-9]*\).*/\1/')
48+
49+
PG_DATA_VERSION=$(cat "${PGDATA}/PG_VERSION")
50+
51+
if [ "$PG_DATA_VERSION" -lt "$PG_BINARY_VERSION" ]; then
52+
# Binaries version is newer, upgrade the data
53+
PGDATA_NEW="${PGDATA}-new"
54+
55+
# Verify that the upgrade data directory does not exist. If it is,
56+
# there was an upgrade attempt.
57+
if [ -d "$PGDATA_NEW" ]; then
58+
echo "Upgraded data directory already exists, stop."
59+
exit 1
60+
fi
61+
62+
# This is the amount of disk space we currently consume. Normally we
63+
# could use df as well, since the data will be the only disk space
64+
# consumer, but in testing environment it might not be the case.
65+
PG_DATA_USED=$(du -s "${PGDATA}" | awk '{print $1}')
66+
PG_BACKUP_VOLUME="/backups"
67+
echo "Checking backup volume space..."
68+
69+
# The backup volume needs to accomodate two copies of data, one is the
70+
# actual backup, and one is a restored copy, which will be deleted later.
71+
if ! check_available_space "${PG_BACKUP_VOLUME}" $((PG_DATA_USED * 2)); then
72+
echo "Not enough space. Checking data volume space..."
73+
PG_BACKUP_VOLUME="${PGDATA}/../"
74+
75+
# If no luck, check the main data volume. It has to accomodate two
76+
# extra copies of data as well, one is the backup and one is the
77+
# restored copy.
78+
if ! check_available_space "${PG_BACKUP_VOLUME}" $((PG_DATA_USED * 2)); then
79+
echo "Not enough disk space, upgrade is cancelled"
80+
exit 1
81+
fi
82+
fi
83+
84+
# After this point we know there is enough available disk space.
85+
OLD_BINARIES="/usr/lib64/pgsql/postgresql-${PG_DATA_VERSION}/bin"
86+
NEW_BINARIES="/usr/bin"
87+
88+
# Not sure how it works now, but during the upgrade group permissions
89+
# are rejected.
90+
chmod 0700 "${PGDATA}"
91+
92+
# Try to restart cluster temporary to make sure it was shutdown properly
93+
"${OLD_BINARIES}/pg_ctl" start -w --timeout 86400 -o "-h 127.0.0.1"
94+
"${OLD_BINARIES}/pg_isready" -h 127.0.0.1
95+
"${OLD_BINARIES}/pg_ctl" stop -w
96+
97+
STATUS=$("${OLD_BINARIES}/pg_controldata" -D "${PGDATA}" |\
98+
grep "Database cluster state" |\
99+
awk -F ':' '{print $2}' |\
100+
tr -d '[:space:]')
101+
102+
if [ "$STATUS" != "shutdown" ]; then
103+
echo "Cluster was not shutdown clearly"
104+
exit 1
105+
fi
106+
107+
BACKUP_DIR="${PG_BACKUP_VOLUME}/backups/$PG_DATA_VERSION-$PG_BINARY_VERSION/"
108+
# Do not care about symlinks yet
109+
if [ -d "${BACKUP_DIR}" ]; then
110+
echo "An upgrade backup directory already exists, skip."
111+
else
112+
# Do a backup before upgrading. Since the database is stopped we
113+
# may as well simple take a filesystem backup. Alternatives would
114+
# be pg_dump or pg_basebackup, both require running database
115+
# cluster.
116+
echo "Backup..."
117+
mkdir -p "${BACKUP_DIR}"
118+
tar -cf "${BACKUP_DIR}/backup.tar" -C "${PGDATA}" --checkpoint=1000 .
119+
sync "${BACKUP_DIR}/backup.tar"
120+
fi
121+
122+
echo "Verify backup..."
123+
BACKUP_VERIFY_PGDATA="${BACKUP_DIR}/backup-restore-test"
124+
mkdir -p "${BACKUP_VERIFY_PGDATA}"
125+
tar -xvf "${BACKUP_DIR}/backup.tar" -C "${BACKUP_VERIFY_PGDATA}"
126+
127+
"${OLD_BINARIES}/pg_ctl" \
128+
-D "${BACKUP_VERIFY_PGDATA}" \
129+
-w start -o "-h 127.0.0.1"
130+
"${OLD_BINARIES}/pg_ctl" \
131+
-D "${BACKUP_VERIFY_PGDATA}" \
132+
-w stop
133+
134+
rm -rf "${BACKUP_VERIFY_PGDATA}"
135+
136+
echo "Upgrade..."
137+
# Good idea to --check first
138+
# shellcheck disable=SC2086
139+
"${NEW_BINARIES}/initdb" $PGSETUP_INITDB_OPTIONS "${PGDATA_NEW}"
140+
141+
PGPASSWORD=$(cat /run/secrets/stackrox.io/secrets/password) \
142+
"${NEW_BINARIES}/pg_upgrade" \
143+
--old-bindir="${OLD_BINARIES}" \
144+
--new-bindir="${NEW_BINARIES}" \
145+
--old-datadir="${PGDATA}" \
146+
--new-datadir="${PGDATA_NEW}" \
147+
--clone -j 4 -k --check
148+
149+
RESULT=$?
150+
if [ $RESULT -ne 0 ]; then
151+
echo "Upgrade check failed"
152+
find "${PGDATA_NEW}" -name pg_upgrade_server.log -exec cat {} \;
153+
exit 1
154+
fi
155+
156+
PGPASSWORD=$(cat /run/secrets/stackrox.io/secrets/password) \
157+
"${NEW_BINARIES}/pg_upgrade" \
158+
--old-bindir="${OLD_BINARIES}" \
159+
--new-bindir="${NEW_BINARIES}" \
160+
--old-datadir="${PGDATA}" \
161+
--new-datadir="${PGDATA_NEW}" \
162+
--clone -j 4 -k
163+
164+
RESULT=$?
165+
if [ $RESULT -ne 0 ]; then
166+
echo "Upgrade failed"
167+
find "${PGDATA_NEW}" -name pg_upgrade_server.log -exec cat {} \;
168+
exit 1
169+
fi
170+
171+
mv "${PGDATA}"/*.conf "${PGDATA_NEW}"
172+
rm -rf "${PGDATA}"
173+
mv "${PGDATA_NEW}" "${PGDATA}"
174+
fi
8175
fi

0 commit comments

Comments
 (0)