Skip to content

Commit 1b29c95

Browse files
committed
Use separate disks for WAL and data
One of the performance optimization technique for insertion according to Timescaledb blog is to use separate disks for WAL & data. According to blog [1], ``` Use separate disks for WAL and data ----------------------------------- While this is a more advanced optimization that isn't always needed, if your disk becomes a bottleneck, you can further increase throughput by using a separate disk (tablespace) for the database's write-ahead log (WAL) and data. ``` [1] https://www.timescale.com/blog/13-tips-to-improve-postgresql-insert-performance/
1 parent 00d2f54 commit 1b29c95

7 files changed

Lines changed: 88 additions & 6 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.idea

ENVIRONMENT.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Environment Configuration Settings
1717
- **PGROOT**: a directory where we put the pgdata (by default /home/postgres/pgroot). One may adjust it to point to the mount point of the persistent volume, such as EBS.
1818
- **WALE_TMPDIR**: directory to store WAL-E temporary files. PGROOT/../tmp by default, make sure it has a few GBs of free space.
1919
- **PGDATA**: location of PostgreSQL data directory, by default PGROOT/pgdata.
20+
- **WAL_DIRECTORY**: location where the write-ahead log should be stored. By default stored inside PGDATA. This option is useful if you plan to use separate disks for WAL and data.
2021
- **PGUSER_STANDBY**: username for the replication user, 'standby' by default.
2122
- **PGPASSWORD_STANDBY**: a password for the replication user, 'standby' by default.
2223
- **STANDBY_HOST**: hostname or IP address of the primary to stream from.

postgres-appliance/bootstrap/clone_with_wale.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,23 @@ def read_configuration():
2222
parser = argparse.ArgumentParser(description="Script to clone from S3 with support for point-in-time-recovery")
2323
parser.add_argument('--scope', required=True, help='target cluster name')
2424
parser.add_argument('--datadir', required=True, help='target cluster postgres data directory')
25+
parser.add_argument('--waldir', required=True, help='target cluster postgres wal directory')
2526
parser.add_argument('--recovery-target-time',
2627
help='the timestamp up to which recovery will proceed (including time zone)',
2728
dest='recovery_target_time_string')
2829
parser.add_argument('--dry-run', action='store_true', help='find a matching backup and build the wal-e '
2930
'command to fetch that backup without running it')
3031
args = parser.parse_args()
3132

32-
options = namedtuple('Options', 'name datadir recovery_target_time dry_run')
33+
options = namedtuple('Options', 'name datadir recovery_target_time dry_run waldir')
3334
if args.recovery_target_time_string:
3435
recovery_target_time = parse(args.recovery_target_time_string)
3536
if recovery_target_time.tzinfo is None:
3637
raise Exception("recovery target time must contain a timezone")
3738
else:
3839
recovery_target_time = None
3940

40-
return options(args.scope, args.datadir, recovery_target_time, args.dry_run)
41+
return options(args.scope, args.datadir, recovery_target_time, args.dry_run, args.waldir)
4142

4243

4344
def build_wale_command(command, datadir=None, backup=None):
@@ -178,10 +179,28 @@ def run_clone_from_s3(options):
178179
return 0
179180

180181

182+
def create_symbolic_link_wal_directory(pg_data, wal_dir):
183+
pg_wal = f'{pg_data}/pg_wal'
184+
logger.info(f"Examining whether WAL already exists or not. directory={pg_wal}")
185+
186+
if not os.path.isdir(pg_wal):
187+
create_symbolic_link_wal_dir = ['ln', '-s', wal_dir, pg_wal]
188+
ret = subprocess.call(create_symbolic_link_wal_dir)
189+
if ret == 0:
190+
logger.info(f"Successfully created a wal directory with symbolic link to {wal_dir}")
191+
else:
192+
raise Exception("Creating a separate wal directory failed with exit code {0}".format(ret))
193+
else:
194+
logger.info(f"Wal directory with symbolic link to {wal_dir} already exists.")
195+
196+
181197
def main():
182198
options = read_configuration()
183199
try:
184200
run_clone_from_s3(options)
201+
logger.info(f'Found waldir={options.waldir}')
202+
if options.waldir:
203+
create_symbolic_link_wal_directory(options.datadir, options.waldir)
185204
except Exception:
186205
logger.exception("Clone failed")
187206
return 1

postgres-appliance/scripts/basebackup.sh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ while getopts ":-:" optchar; do
1414
retries=* )
1515
RETRIES=${OPTARG#*=}
1616
;;
17+
wal_dir=* )
18+
WAL_DIR=${OPTARG#*=}
19+
;;
1720
esac
1821
done
1922

@@ -27,6 +30,12 @@ else
2730
PG_BASEBACKUP_OPTS=()
2831
fi
2932

33+
if [[ -n "$WAL_DIR" ]]; then
34+
PG_WAL_OPTS=(--waldir="$WAL_DIR")
35+
else
36+
PG_WAL_OPTS=()
37+
fi
38+
3039
WAL_FAST=$(dirname "$DATA_DIR")/wal_fast
3140
readonly WAL_FAST
3241
mkdir -p "$WAL_FAST"
@@ -97,7 +106,7 @@ fi
97106

98107
ATTEMPT=0
99108
while [[ $((ATTEMPT++)) -le $RETRIES ]]; do
100-
pg_basebackup --pgdata="${DATA_DIR}" "${PG_BASEBACKUP_OPTS[@]}" --dbname="${CONNSTR}" &
109+
pg_basebackup --pgdata="${DATA_DIR}" "${PG_WAL_OPTS[@]}" "${PG_BASEBACKUP_OPTS[@]}" --dbname="${CONNSTR}" &
101110
basebackup_pid=$!
102111
wait $basebackup_pid
103112
EXITCODE=$?

postgres-appliance/scripts/configure_spilo.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def deep_update(a, b):
229229
method: clone_with_wale
230230
clone_with_wale:
231231
command: envdir "{{CLONE_WALE_ENV_DIR}}" python3 /scripts/clone_with_wale.py
232-
--recovery-target-time="{{CLONE_TARGET_TIME}}"
232+
--recovery-target-time="{{CLONE_TARGET_TIME}}" --waldir="{{WAL_DIRECTORY}}"
233233
recovery_conf:
234234
restore_command: envdir "{{CLONE_WALE_ENV_DIR}}" timeout "{{WAL_RESTORE_TIMEOUT}}"
235235
/scripts/restore_command.sh "%f" "%p"
@@ -254,6 +254,9 @@ def deep_update(a, b):
254254
--port={{CLONE_PORT}} --user="{{CLONE_USER}}"
255255
{{/CLONE_WITH_BASEBACKUP}}
256256
initdb:
257+
{{#WAL_DIRECTORY}}
258+
- waldir: /home/postgres/wal
259+
{{/WAL_DIRECTORY}}
257260
- encoding: UTF8
258261
- locale: {{INITDB_LOCALE}}.UTF-8
259262
- data-checksums
@@ -370,17 +373,20 @@ def deep_update(a, b):
370373
threshold_backup_size_percentage: {{WALE_BACKUP_THRESHOLD_PERCENTAGE}}
371374
retries: 2
372375
no_master: 1
376+
wal_dir: "{{WAL_DIRECTORY}}"
373377
{{/USE_WALE}}
374378
basebackup_fast_xlog:
375379
command: /scripts/basebackup.sh
376380
retries: 2
381+
wal_dir: "{{WAL_DIRECTORY}}"
377382
{{#STANDBY_WITH_WALE}}
378383
bootstrap_standby_with_wale:
379384
command: envdir "{{STANDBY_WALE_ENV_DIR}}" bash /scripts/wale_restore.sh
380385
threshold_megabytes: {{WALE_BACKUP_THRESHOLD_MEGABYTES}}
381386
threshold_backup_size_percentage: {{WALE_BACKUP_THRESHOLD_PERCENTAGE}}
382387
retries: 2
383388
no_master: 1
389+
wal_dir: "{{WAL_DIRECTORY}}"
384390
{{/STANDBY_WITH_WALE}}
385391
'''
386392

@@ -524,6 +530,7 @@ def get_placeholders(provider):
524530
placeholders.setdefault('PGROOT', os.path.join(placeholders['PGHOME'], 'pgroot'))
525531
placeholders.setdefault('WALE_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp')))
526532
placeholders.setdefault('PGDATA', os.path.join(placeholders['PGROOT'], 'pgdata'))
533+
placeholders.setdefault('WAL_DIRECTORY', '')
527534
placeholders.setdefault('HUMAN_ROLE', 'zalandos')
528535
placeholders.setdefault('PGUSER_STANDBY', 'standby')
529536
placeholders.setdefault('PGPASSWORD_STANDBY', 'standby')

postgres-appliance/scripts/wale_restore.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ while getopts ":-:" optchar; do
2727
no_master=*|no-master=* )
2828
NO_MASTER=${OPTARG#*=}
2929
;;
30+
wal_dir=* )
31+
WAL_DIR=${OPTARG#*=}
32+
;;
3033
esac
3134
done
3235

@@ -91,8 +94,16 @@ while true; do
9194
if $WAL_E backup-fetch "$DATA_DIR" LATEST; then
9295
version=$(<"$DATA_DIR/PG_VERSION")
9396
[[ "$version" =~ \. ]] && wal_name=xlog || wal_name=wal
94-
readonly wal_dir=$DATA_DIR/pg_$wal_name
95-
[[ ! -d $wal_dir ]] && rm -f "$wal_dir" && mkdir "$wal_dir"
97+
readonly pg_wal_location=$DATA_DIR/pg_$wal_name
98+
99+
# Only create a symbolic link when a separate WAL directory is specified.
100+
if [[ -n "$WAL_DIR" ]]; then
101+
PG_WAL_OPTS=(ln -s "$WAL_DIR" "$pg_wal_location")
102+
else
103+
PG_WAL_OPTS=(mkdir "$pg_wal_location")
104+
fi
105+
106+
[[ ! -d $pg_wal_location ]] && rm -f "$pg_wal_location" && "${PG_WAL_OPTS[@]}"
96107
# remove broken symlinks from PGDATA
97108
find "$DATA_DIR" -xtype l -delete
98109
exit 0

postgres-appliance/tests/test_spilo.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,16 @@ function start_clone_with_basebackup_upgrade_container() {
235235
-d spilo3
236236
}
237237

238+
function start_separate_wal_directory_container() {
239+
local ID=$1
240+
241+
docker-compose run \
242+
-e SCOPE=separatewal \
243+
-e WAL_DIRECTORY="/home/postgres/wal" \
244+
--name "${PREFIX}separatewal$ID" \
245+
-d "spilo$ID"
246+
}
247+
238248
function verify_clone_upgrade() {
239249
local type=$2
240250
local from_version=$3
@@ -249,13 +259,26 @@ function verify_archive_mode_is_on() {
249259
[ "$archive_mode" = "on" ]
250260
}
251261

262+
function verify_wal_outside_data_directory() {
263+
local target_path="/home/postgres/wal"
264+
is_symbolic_link=$(
265+
docker_exec "$1" "
266+
[ -L '/home/postgres/pgdata/pgroot/data/pg_wal' ] &&
267+
readlink -f '/home/postgres/pgdata/pgroot/data/pg_wal' | grep -q \"$target_path\" &&
268+
echo true ||
269+
echo false"
270+
)
271+
[ "$is_symbolic_link" = true ]
272+
}
273+
252274

253275
# TEST SUITE 1 - In-place major upgrade 10->11->...->15
254276
# TEST SUITE 2 - Major upgrade 10->15 after wal-e clone (with CLONE_PGVERSION set)
255277
# TEST SUITE 3 - PITR (clone with wal-e) with unreachable target (13+)
256278
# TEST SUITE 4 - Major upgrade 10->11 after wal-e clone (no CLONE_PGVERSION)
257279
# TEST SUITE 5 - Replica bootstrap with wal-e
258280
# TEST SUITE 6 - Major upgrade 11->12 after clone with basebackup
281+
# TEST SUITE 7 - Form a fresh cluster that persists WALs outside of data directory
259282
function test_spilo() {
260283
# TEST SUITE 1
261284
local container=$1
@@ -355,6 +378,11 @@ function test_spilo() {
355378
basebackup_container=$(start_clone_with_basebackup_upgrade_container "$upgrade_container") # SCOPE=upgrade2 PGVERSION=12 CLONE: _SCOPE=upgrade
356379
log_info "[TS6] Started $basebackup_container for testing major upgrade 11->12 after clone with basebackup"
357380

381+
# TEST SUITE 7
382+
local seapate_wal_container="${PREFIX}separatewal1"
383+
start_separate_wal_directory_container 1 # WAL_DIRECTORY="/home/postgres/wal" SCOPE=separatewal
384+
start_separate_wal_directory_container 2 # WAL_DIRECTORY="/home/postgres/wal" SCOPE=separatewal
385+
log_info "[TS7] Started a fresh cluster to test for persisting WALs on a specified location"
358386

359387
# TEST SUITE 1
360388
# run_test test_pg_upgrade_to_15_check_failed "$container" # pg_upgrade --check complains about timescaledb
@@ -377,6 +405,12 @@ function test_spilo() {
377405
log_info "[TS6] Testing in-place major upgrade 11->12 after clone with basebackup"
378406
run_test verify_clone_upgrade "$basebackup_container" "basebackup" 11 12
379407
run_test verify_archive_mode_is_on "$basebackup_container"
408+
409+
# TEST SUITE 7
410+
wait_all_streaming "$seapate_wal_container" 1
411+
wait_zero_lag "$seapate_wal_container" 1
412+
run_test verify_wal_outside_data_directory "$seapate_wal_container"
413+
run_test verify_wal_outside_data_directory "${PREFIX}separatewal2"
380414
}
381415

382416
function main() {

0 commit comments

Comments
 (0)