#!/bin/bash

#
# Copyright 2011-2020 Nicolas Thauvin and contributors. All rights
# reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

version="1.8"

# hardcoded defaults
PGBK_CONFIG="/etc/pg_back/pg_back.conf"
PGBK_BACKUP_DIR=/var/backups/postgresql
PGBK_TIMESTAMP="%Y-%m-%d_%H-%M-%S"
PGBK_PURGE=30
PGBK_PURGE_MIN_KEEP=0
PGBK_OPTS="-Fc"
PGBK_WITH_TEMPLATES="no"
PGBK_STANDBY_PAUSE_TIMEOUT=3600
PGBK_CONNDB="postgres"
PGBK_PRE_BACKUP_COMMAND=
PGBK_POST_BACKUP_COMMAND=
SIGNATURE_ALGO="none"

usage() {
    echo "PostgreSQL simple backup script"
    echo "usage: `basename $0` [OPTIONS] [DBNAME]..."
    echo "options:"
    echo "  -b dir        store dump files there (default: \"$PGBK_BACKUP_DIR\")"
    echo "  -c config     alternate config file (default: \"$PGBK_CONFIG\")"
    echo "  -P days       purge backups older than this number of days (default: \"$PGBK_PURGE\")"
    echo "  -S algo       signature algorithm (default: \"$SIGNATURE_ALGO\")"
    echo "                available algo.: none $(find /usr/bin/sha*?sum -type f -printf '%f ' | sed 's/sum//g')"
    echo "  -K number     minimum number of backups to keep when purging or 'all' to keep"
    echo "                everything (default: \"$PGBK_PURGE_MIN_KEEP\")"
    echo "  -D db1,...    list of databases to exclude"
    echo "  -t            include templates"
    echo "  -T timeout    seconds of pausing attempt (default: \"$PGBK_STANDBY_PAUSE_TIMEOUT\")"
    echo
    echo "  -h hostname   database server host or socket directory (default: \"${PGHOST:-local socket}\")"
    echo "  -p port       database server port (default: \"$PGPORT\")"
    echo "  -U username   database user name (default: \"$PGUSER\")"
    echo "  -d db         database used for connection (default: \"$PGBK_CONNDB\")"
    echo
    echo "  -q            quiet mode"
    echo
    echo "  -V            print version"
    echo "  -?            print usage"
    echo
    exit $1
}

now() {
    echo "$(date "+%F %T %Z")"
}

error() {
    echo "$(now)  ERROR: $*" 1>&2
}

warn() {
    echo "$(now)  WARNING: $*" 1>&2
}

info() {
    [ "$quiet" != "yes" ] && echo "$(now)  INFO: $*" 1>&2
    return 0
}

die() {
    error $*
    exit 1
}

# Hooks
pre_backup_hook() {
    if [ -n "$PGBK_PRE_BACKUP_COMMAND" ]; then
        info "running pre-backup command"
        if ! $PGBK_PRE_BACKUP_COMMAND; then
            die_and_hook "pre-backup command exited with a non-zero code"
        fi
    fi
}

post_backup_hook() {
    if [ -n "$PGBK_POST_BACKUP_COMMAND" ]; then
        info "running post-backup command"
        if ! $PGBK_POST_BACKUP_COMMAND; then
            die "post-backup command exited with a non-zero code"
        fi
    fi
}

# post_backup_hook should always be run
die_and_hook() {
    error $*
    post_backup_hook
    exit 1
}

# When the default config file does not exist, try the old path
[ -f "$PGBK_CONFIG" ] || PGBK_CONFIG="/etc/postgresql/pg_back.conf"

# Process command line
args=`getopt "b:c:P:K:D:T:th:p:S:U:d:qV?" $*`
if [ $? -ne 0 ]
then
    usage 2
fi

set -- $args
for i in $*
do
	case "$i" in
	-b) CLI_BACKUP_DIR=$2; shift 2;;
	-c) PGBK_CONFIG=$2; shift 2;;
	-P) CLI_PURGE=$2; shift 2;;
	-S) CLI_SIGNATURE_ALGO=$2; shift 2;;
	-K) CLI_PURGE_MIN_KEEP=$2; shift 2;;
	-D) CLI_EXCLUDE="`echo $2 | tr ',' ' '`"; shift 2;;
	-t) CLI_WITH_TEMPLATES="yes"; shift;;
    -T) CLI_STANDBY_PAUSE_TIMEOUT=$2; shift 2;;
	-h) CLI_HOSTNAME=$2; shift 2;;
	-p) CLI_PORT=$2; shift 2;;
	-d) CLI_CONNDB=$2; shift 2;;
	-U) CLI_USERNAME=$2; shift 2;;
	-q) quiet="yes"; shift;;
	-V) echo "pg_back version $version"; exit 0;;
	-\?) usage 1;;
	--) shift; break;;
	esac
done

CLI_DBLIST=$*

# Load configuration
if [ -f "$PGBK_CONFIG" ]; then
    . $PGBK_CONFIG
fi

# The backup directory overrides the one in the config file
if [ -n "$CLI_BACKUP_DIR" ]; then
    PGBK_BACKUP_DIR=$CLI_BACKUP_DIR
fi

# Override configuration with cli options
[ -n "$CLI_PURGE" ] && PGBK_PURGE=$CLI_PURGE
[ -n "$CLI_PURGE_MIN_KEEP" ] && PGBK_PURGE_MIN_KEEP=$CLI_PURGE_MIN_KEEP
[ -n "$CLI_EXCLUDE" ] && PGBK_EXCLUDE=$CLI_EXCLUDE
[ -n "$CLI_WITH_TEMPLATES" ] && PGBK_WITH_TEMPLATES=$CLI_WITH_TEMPLATES
[ -n "$CLI_STANDBY_PAUSE_TIMEOUT" ] && PGBK_STANDBY_PAUSE_TIMEOUT=$CLI_STANDBY_PAUSE_TIMEOUT
[ -n "$CLI_HOSTNAME" ] && PGBK_HOSTNAME=$CLI_HOSTNAME
[ -n "$CLI_PORT" ] && PGBK_PORT=$CLI_PORT
[ -n "$CLI_USERNAME" ] && PGBK_USERNAME=$CLI_USERNAME
[ -n "$CLI_DBLIST" ] && PGBK_DBLIST=$CLI_DBLIST
[ -n "$CLI_CONNDB" ] && PGBK_CONNDB=$CLI_CONNDB
[ -n "$CLI_SIGNATURE_ALGO" ] && SIGNATURE_ALGO=$CLI_SIGNATURE_ALGO

# Prepare common options for pg_dump and pg_dumpall
[ -n "$PGBK_HOSTNAME" ] && OPTS="$OPTS -h $PGBK_HOSTNAME"
[ -n "$PGBK_PORT" ] && OPTS="$OPTS -p $PGBK_PORT"
[ -n "$PGBK_USERNAME" ] && OPTS="$OPTS -U $PGBK_USERNAME"

# As of 1.8, it recommended to use an array for PGBK_OPTS to set
# pg_dump cli options. Ensure backward compatibility.
if [[ "$(declare -p PGBK_OPTS)" =~ "declare --" ]]; then
    declare -a _PGBK_OPTS=($PGBK_OPTS)
else
    _PGBK_OPTS=("${PGBK_OPTS[@]}")
fi

info "preparing to dump"

# Check if some options are integers
[[ $PGBK_PURGE =~ ^[[:digit:]]+$ ]] || die "PGBK_PURGE (-P) '$PGBK_PURGE' is not an integer"
if [[ $PGBK_PURGE_MIN_KEEP != "all" ]]; then
    [[ $PGBK_PURGE_MIN_KEEP =~ ^[[:digit:]]+$ ]] || die "PGBK_PURGE_MIN_KEEP (-K) '$PGBK_PURGE_MIN_KEEP' is not an integer"
fi
[[ $PGBK_STANDBY_PAUSE_TIMEOUT =~ ^[[:digit:]]+$ ]] || die "PGBK_STANDBY_PAUSE_TIMEOUT (-T) '$PGBK_STANDBY_PAUSE_TIMEOUT' is not an integer"

# Ensure there is a trailing slash in the path to the binaries
if [ -n "$PGBK_BIN" ]; then
    PGBK_BIN=$PGBK_BIN/
    # Also, it must exist
    if [ ! -d "$PGBK_BIN" ]; then
	die "$PGBK_BIN directory does not exist"
    fi
fi

if [ -n "${SIGNATURE_ALGO}" -a "${SIGNATURE_ALGO}" != "none" ]; then
    which ${SIGNATURE_ALGO}sum &>/dev/null
    if [ $? -ne 0 ]; then
        die "Signature program ${SIGNATURE_ALGO}sum is not available to create signature file"
    fi
fi

# Create the backup directory if missing
if [ ! -d $PGBK_BACKUP_DIR ]; then
    info "creating directory $PGBK_BACKUP_DIR"
    mkdir -p $PGBK_BACKUP_DIR
    if [ $? != 0 ]; then
	die "could not create $PGBK_BACKUP_DIR"
    fi
fi

# Get version of the server
PG_VERSION=`${PGBK_BIN}psql -X $OPTS -At -c "SELECT setting FROM pg_settings WHERE name = 'server_version_num';" $PGBK_CONNDB 2>/dev/null`
if [ $? != 0 ]; then
    die "could not get the version of the server"
fi

# As of PostgreSQL 10, "xlog" has been changed to "wal", it applies to functions
if (( 10#$PG_VERSION >= 100000 )); then
    xlog_or_wal="wal"
else
    xlog_or_wal="xlog"
fi

info "target directory is $PGBK_BACKUP_DIR"
pre_backup_hook

# Check if replay pause is available
PG_HASPAUSE=`${PGBK_BIN}psql -X $OPTS -At -c "SELECT 1 FROM pg_proc WHERE proname='pg_${xlog_or_wal}_replay_pause' AND pg_is_in_recovery();" $PGBK_CONNDB 2>/dev/null`
if [ $? != 0 ]; then
    info "could not check for replication control functions"
fi

# Pause replay if dumping from a standby
if [ "${PG_HASPAUSE}" = "1" ]; then
    info "pausing replication replay"

    # Wait for exclusive locks to get released on the standby before
    # pausing the replication replay
    typeset -i PGBK_EXCLLOCKS;
    PGBK_EXCLLOCKS=1;
    PGBK_PAUSE=10
    while [ $PGBK_EXCLLOCKS -gt 0 ]
    do
        PGBK_EXCLLOCKS=`${PGBK_BIN}psql -X $OPTS -At -c "SELECT count(*) FROM pg_locks WHERE mode = 'AccessExclusiveLock';" $PGBK_CONNDB`
        if [ $? != 0 ]; then
            die_and_hook "could not get lock information"
        fi
        
        if [ $PGBK_EXCLLOCKS -gt 0 ]; then
            if [ $PGBK_STANDBY_PAUSE_TIMEOUT -le 0 ]; then
                die_and_hook "attempt to pause replication exceeded timeout"
            fi

            info "the standby database has exclusive locks (vacuum full, truncate or other locking command) running on primary"
            info "resuming replication for ${PGBK_PAUSE}s"
            let "PGBK_STANDBY_PAUSE_TIMEOUT-=$PGBK_PAUSE"
            sleep $PGBK_PAUSE
        else
            ${PGBK_BIN}psql -X $OPTS -At -c "SELECT pg_${xlog_or_wal}_replay_pause() where pg_is_in_recovery();" $PGBK_CONNDB
            if [ $? != 0 ]; then
                die_and_hook "could not pause replication replay"
            fi
        fi
    done
fi

# Prepare the list of databases to dump
if [ -z "$PGBK_DBLIST" ]; then
    info "listing databases"
    if [ "$PGBK_WITH_TEMPLATES" = "yes" ]; then
	DB_QUERY="SELECT datname FROM pg_database WHERE datallowconn;"
    else
	DB_QUERY="SELECT datname FROM pg_database WHERE datallowconn AND NOT datistemplate;"
    fi

    PGBK_DBLIST=`${PGBK_BIN}psql -X $OPTS -At -c "$DB_QUERY" $PGBK_CONNDB`
    if [ $? != 0 ]; then
	die_and_hook "could not list databases"
    fi
fi

dumped=()
DUMP_DATE=$(date "+${PGBK_TIMESTAMP}")
# Dump roles and tablespaces first
dump="$PGBK_BACKUP_DIR/pg_global_${DUMP_DATE}.sql"
info "dumping global objects into ${dump}"
${PGBK_BIN}pg_dumpall $OPTS -g > "$dump"
if [ $? != 0 ]; then
    error "pg_dumpall -g failed"
    out_rc=1
else
    dumped+=( "pg_global" )
fi

# Dump configuration using SHOW ALL
dump="$PGBK_BACKUP_DIR/pg_settings_${DUMP_DATE}.out"
info "saving output of SHOW ALL to ${dump}"
${PGBK_BIN}psql -X -o "${dump}" -c "SHOW ALL;" $PGBK_CONNDB
if [ $? != 0 ]; then
    error "psql -c 'SHOW ALL' failed"
    out_rc=1
else
    dumped+=( "pg_settings" )
fi

# Dump database
for db in $PGBK_DBLIST
do
    # Do not dump excluded databases
    echo $PGBK_EXCLUDE | grep -w $db >/dev/null 2>&1
    if [ $? = 0 ]; then
	continue
    fi

    # Try to lock a file named after to database we are going to dump
    # to prevent stacking pg_back processes if pg_dump last longer
    # than a schedule of pg_back. If the lock cannot be acquired, skip
    # the dump and exit with an error at the end.
    if [ -x /usr/bin/flock ]; then
        lockfile="${PGBK_BACKUP_DIR}/${db}.lock"
        if ! exec 3>"$lockfile"; then
            warn "unable to open lockfile $lockfile"
        else
            info "acquiring internal lock for $db"
            if ! /usr/bin/flock -n 3; then
                warn "unable to obtain the lock on $db, skipping this database"
                out_rc=1
                continue
            fi
            need_unlock="yes"
        fi
    fi

    # Recompute the date of dump so that the real time of the pg_dump
    # execution is close to the filenames of the group of files we are
    # going to create. This avoid misleading file names if dumping
    # takes hours.
    DUMP_DATE=$(date "+${PGBK_TIMESTAMP}")

    # Dump all statements necessary to recreate the database include
    # privileges with pg_dumpacl. See https://github.com/dalibo/pg_dumpacl
    if command -v ${PGBK_BIN}pg_dumpacl &>>/dev/null ; then
        dump="${PGBK_BACKUP_DIR}/${db}_${DUMP_DATE}.createdb.sql"

        # Since -l is renamed -d in pg_dumpacl 0.2, find the good switch
        db_opt=$(${PGBK_BIN}pg_dumpacl --help | awk '$2 ~ /CONNSTR/ { if ($1 == "-c,") { print "-d" } else { print "-l" } }')
        info "dumping creation statement of database \"$db\" into ${dump}"
        if ! ${PGBK_BIN}pg_dumpacl $OPTS $db_opt $db -f "${dump}" ; then
            out_rc=1
            error "pg_dumpacl of database \"$db\" failed"
        fi
    fi

    # Dump and remember which db were properly dumped for purge
    dump="${PGBK_BACKUP_DIR}/${db}_${DUMP_DATE}.dump"
    info "dumping database \"$db\" into ${dump}"
    if ! ${PGBK_BIN}pg_dump $OPTS "${_PGBK_OPTS[@]}" -f "${dump}" $db; then
        out_rc=1
        rm -rf ${dump}
        error "pg_dump of database \"$db\" failed"
    else
        dumped+=( "$db" )
    fi

    # Unlock and clean lockfile
    if [[ "$need_unlock" == "yes" ]]; then
        /usr/bin/flock -u 3
        exec 3>&-
        rm "$lockfile"
        unset need_unlock
    fi

    if [ -n "${SIGNATURE_ALGO}" -a "${SIGNATURE_ALGO}" != "none" ]; then
        info "creating signature file for $(basename ${dump}) into $(basename ${dump}).${SIGNATURE_ALGO}"
        ${SIGNATURE_ALGO}sum ${dump} > ${dump}.${SIGNATURE_ALGO}
    fi
done

# Resume replay if dumping from a standby
if [ "${PG_HASPAUSE}" = "1" ]; then
    info "resuming replication replay"
    ${PGBK_BIN}psql -X $OPTS -At -c "SELECT pg_${xlog_or_wal}_replay_resume();" $PGBK_CONNDB
    if [ $? != 0 ]; then
        die_and_hook "could not resume replication replay"
    fi
fi

if [[ $PGBK_PURGE_MIN_KEEP == "all" ]]; then
    info "old backups kept as requested"
else
    # Purge old backups, only if current backup succeeded
    info "purging old backups"

    # Transform number of days to a timestamp for Epoch
    limit_ts=$(($(date +%s) - 86400 * $PGBK_PURGE))

    for db in "${dumped[@]}"; do
        # List the dump for databases that were successfully dumped along
        # with the time of last modification since Epoch. Sort them so
        # that the newest come first and keep
        to_purge=()
        for t in dump sql ${SIGNATURE_ALGO} out; do
            i=1
            while read line; do
                if (( $i > ${PGBK_PURGE_MIN_KEEP:-0} )); then
                    to_purge+=( "$line" )
                fi
                (( i++ ))
            done < <(stat -c '%Y|%n' "$PGBK_BACKUP_DIR"/"${db}"_*.${t} 2>/dev/null | sort -rn)
        done

        # Check if the file/dir is old enough to be removed
        for dump in "${to_purge[@]}"; do
            ts=$(cut -d'|' -f 1 <<< "$dump")
            filename=$(cut -d'|' -f 2 <<< "$dump")

            if (( $ts <= $limit_ts )); then
                info "removing $filename"
                if ! rm -rf -- "$filename"; then
                    error "could not purge $filename"
                fi
            fi
        done
    done
fi

post_backup_hook
info "done"

exit $out_rc
