暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

postgresql14+repmgr+keepalived实现pg高可用与自动切换

原创 手机用户7549 2024-11-11
386

搭建postgresql repmgr集群
集群hosts
192.168.100.206 node1
192.168.100.207 node2
192.168.100.208 node3

修改各节点hostname
hostnamectl set-hostname node1
hostnamectl set-hostname node2
hostnamectl set-hostname node3

安装依赖包
yum -y install flex
yum -y install readline-devel
yum -y install zlib-devel
yum -y install gettext-devel
yum -y install openssl
yum -y install openssl-devel
yum -y install pam
yum -y install pam-devel
yum -y install libxml2
yum -y install libxml2-devel
yum -y install libxslt
yum -y install libxslt-devel
yum -y install perl
yum -y install perl-devel
yum -y install tcl-devel
yum -y install uuid-devel
yum -y install gcc
yum -y install gcc-c++
yum -y install make
yum -y install perl-ExtUtils*

创建用户和组
groupadd dba -g 1001
useradd postgres --gid 1001 --uid 1001 --create-home
echo "xxxxxxx"|passwd postgres --stdin

关闭并禁用防火墙
systemctl disable firewalld --now

内核参数调整
# vi /etc/sysctl.conf
fs.file-max = 76724200
kernel.sem = 10000 10240000 10000 1024
kernel.shmmni = 4096
kernel.shmall = 1073741824
kernel.shmmax = 4398046511104
net.ipv4.ip_local_port_range = 9000 65500
net.core.rmem_default = 262144
net.core.wmem_default = 262144
net.core.rmem_max = 4194304
net.core.wmem_max = 4194304
fs.aio-max-nr = 40960000
vm.dirty_ratio=20
vm.dirty_background_ratio=3
vm.dirty_writeback_centisecs=100
vm.dirty_expire_centisecs=500
vm.swappiness=10
vm.min_free_kbytes=524288

sysctl -p

用户limits设置
# vi /etc/security/limits.conf
postgres soft nofile 1048576
postgres hard nofile 1048576
postgres soft nproc 131072
postgres hard nproc 131072
postgres soft memlock unlimited
postgres hard memlock unlimited
postgres soft core unlimited
postgres hard core unlimited
postgres soft stack unlimited
postgres hard stack unlimited


目录规划
--软件目录
mkdir -p /home/postgres/pg14/
数据与日志目录
mkdir -p /data/pg/
mkdir -p /data/pg/{pgdata,pgxlog,archive,scripts,soft} && chown -R postgres:dba /data/pg/

节点互信
ssh-keygen -t rsa
ssh-copy-id -i .ssh/id_rsa.pub postgres@node1
ssh-copy-id -i .ssh/id_rsa.pub postgres@node2
ssh-copy-id -i .ssh/id_rsa.pub postgres@node3

上传安装包并利用互信scp到所有节点
repmgr-5.3.1.tar.gz
postgresql-14.2.tar.gz

scp *.tar.gz node2:/home/postgres/

编译安装
cd postgresql-14.2
./configure --prefix=/home/postgres/pg14/
gmake world && gmake install-world

初始化数据(node1)
cd /home/postgres/pg14/bin
./initdb -D /data/pg/pgdata -E UTF8 --lc-collate=C --lc-ctype=en_US.utf8 -U postgres -X /data/pg/pgxlog

修改postgres用户环境变量
vi .bash_profile

export PGDATA=/data/pg/pgdata
export PGHOME=/home/postgres/pg14
export PGPORT=6432
export LANG=en_US.UTF8
PATH=$PGHOME/bin:$PATH

修改参数
listen_addresses = '*'
wal_level = replica
synchronous_commit = on
archive_mode = on
archive_command = 'cp %p /data/pg/pgxlog/%f'
max_wal_senders=5
#wal_keep_segments=10
hot_standby = on
restart_after_crash = off
wal_receiver_status_interval = 2
max_standby_streaming_delay = -1
max_standby_archive_delay = -1
synchronous_commit = on
restart_after_crash = off
hot_standby_feedback = on
recovery_target_timeline = 'latest'
min_wal_size = 1GB
max_wal_size = 2GB
wal_keep_size = 1GB
logging_collector=on
log_directory='pg_log'
log_destination=csvlog
log_filename='pg_log_%u.log'
log_file_mode=0600
log_truncate_on_rotation=on
log_rotation_age=1d
log_min_messages=warning
log_min_duration_statement=30s
log_checkpoints=on
log_duration=on
log_lock_waits=on
log_statement='mod'
#shared_preload_libraries = 'repmgr'
wal_log_hints = on
port=6432


解压repmgr并安装
tar -zxvf repmgr-5.3.1.tar.gz
cd repmgr-5.3.1
./configure PG_CONFIG=/home/postgres/pg14/bin/pg_config
make && make install


启动节点1实例
pg_ctl start

配置pg_hba.conf 允许repmgr与replication
新增以下内容
# IPv6 local connections:
host repmgr repmgr 192.168.100.206/24 trust
host repmgr repmgr 192.168.100.207/24 trust
host repmgr repmgr 192.168.100.208/24 trust
host replication repmgr 192.168.100.206/24 trust
host replication repmgr 192.168.100.207/24 trust
host replication repmgr 192.168.100.208/24 trust
# Allow replication connections from localhost, by a user with the
# replication privilege.
host replication pgrepuser 0.0.0.0/0 trust

pg_ctl reload

配置用户
create role pgrepuser replication login password 'replica';
create user repmgr superuser ;
\c postgres repmgr
create database repmgr owner repmgr;
\c repmgr repmgr drop schema public ;
create schema repmgr authorization repmgr;
create extension repmgr with schema repmgr;
alter user repmgr with password '123456';

授权
grant execute on function pg_read_binary_file(text) to repmgr;
grant execute on function pg_read_binary_file(text, bigint, bigint) to repmgr;
grant execute on function pg_read_binary_file(text, bigint, bigint, boolean) to repmgr;
grant execute on function pg_ls_dir(text) to repmgr;
grant execute on function pg_ls_dir(text, boolean, boolean) to repmgr;
grant execute on function pg_stat_file(text) to repmgr;
grant execute on function pg_stat_file(text, boolean) to repmgr;

配置repmgr.conf文件
--可以指定路径,默认使用/etc/repmgr.conf
id/name/coninfo对应修改成节点匹配信息
node_id=1
node_name='node1'
conninfo='host=192.168.100.206 port=6432 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/data/pg/pgdata'
replication_user='repmgr'
replication_type='physical'
repmgr_bindir='/home/postgres/pg14/bin'
pg_bindir='/home/postgres/pg14/bin'
monitoring_history=yes
monitor_interval_secs=5
log_level='debug'
log_file='/data/pg/pgdata/repmgr.log'

添加/home/postgres/.pgpass文件
192.168.100.206:6432:repmgr:repmgr:123456
192.168.100.207:6432:repmgr:repmgr:123456
192.168.100.208:6432:repmgr:repmgr:123456



修改pass文件权限
chmod 0600 .pgpass
注册主节点
repmgr primary register
--实际为repmgr -f /etc/repmgr.conf primary register

node2/node3克隆
--模拟
repmgr -h 192.168.100.206 -U repmgr -f /etc/repmgr.conf standby clone --dry-run
--真实运行
repmgr -h 192.168.100.206 -U repmgr -f /etc/repmgr.conf standby clone
克隆成功后拉起实例并注册
pg_ctl start
repmgr -f /etc/repmgr.conf standby register --force

查看集群状态
repmgr cluster show

手动切换switchover
repmgr -f /etc/repmgr.conf standby switchover -U repmgr --verbose
repmgr standby switchover -U repmgr --verbose

时间线问题
repmgr cluster show可以观察到每个节点的时间线,可以简单理解为集群主备发生过切换的次数(起始为1)
默认情况下,时间线在切换后的校准需要一定时间
重启实例可以加速这个过程

配置autofailover
将以下内容添加至/etc/repmgr.conf
failover='automatic'
promote_command='/home/postgres/pg14/bin/repmgr standby promote --config-file=/etc/repmgr.conf --log-to-file'
follow_command='/home/postgres/pg14/bin/repmgr standby follow --config-file=/etc/repmgr.conf --log-to-file --upstream-node-id=%n'
monitoring_history=yes
connection_check_type=ping
reconnect_attempts=6
reconnect_interval=10
standby_disconnect_on_failover=true
repmgrd_pid_file='/tmp/repmgrd.pid'
repmgrd_service_start_command='/home/postgres/pg14/bin/repmgrd -f /etc/repmgr.conf start'
repmgrd_service_stop_command='kill -9 `cat /tmp/repmgrd.pid`'

启动repmgrd守护进程 --三个节点执行
--测试启动
repmgr --config-file=/etc/repmgr.conf daemon start --dry-run
--测试启动met后运行
repmgr --config-file=/etc/repmgr.conf daemon start

关闭主节点查看自动选主
pg_ctl stop
repmgr cluster show

主节点重新加入

repmgr node rejoin -f /etc/repmgr.conf -d 'host=192.168.100.206 dbname=repmgr user=repmgr' --force-rewind --config-files=postgresql.conf --verbose


安装keepalived
yum -y install gcc openssl openssl‐devel libnl libnl‐devel ipvsadm
yum -y install libnl libnl-devel
cd /usr/local/src/
tar -zxf keepalived-2.2.8.tar.gz
cd keepalived-2.2.8
./configure --prefix=/usr/local/keepalived
make && make install
cd /usr/local/keepalived
ln -s /usr/local/src/keepalived-2.2.8/keepalived/etc/init.d/keepalived /etc/init.d/
ln -s /usr/local/keepalived/etc/sysconfig/keepalived /etc/sysconfig/
ln -s /usr/local/keepalived/sbin/keepalived /usr/local/sbin/
mkdir /etc/keepalived/
ln -s /usr/local/keepalived/etc/keepalived/keepalived.conf /etc/keepalived/
cat /etc/keepalived/keepalived.conf



vi /etc/keepalived/keepalived.conf
#node1 keepalived conf
#tcpdump -nn -i any net 224.0.0.0/18 来确认VRID
global_defs {
router_id node1
script_user root
enable_script_security
}
vrrp_script checkpg {
script "/etc/keepalived/scripts/checkpg.sh"
interval 15
fall 3
rise 1
}
vrrp_instance VI_pgusdp {
state BACKUP
interface ens192
virtual_router_id 100
priority 90
advert_int 1
nopreempt
authentication {
auth_type PASS
auth_pass 123456
}
track_script {
checkpg
}
notify_master "/etc/keepalived/scripts/master.sh"
notify_backup "/etc/keepalived/scripts/slave.sh"
virtual_ipaddress {
192.168.100.204/24 dev ens192 label ens192:1
}
}


vi /etc/keepalived/keepalived.conf --node2和node3 配置都有差异
#node2 keepalived conf
global_defs {
router_id node2
script_user root
enable_script_security
}
vrrp_script checkpg {
script "/etc/keepalived/scripts/checkpg.sh"
interval 15
fall 3
rise 1
}
vrrp_instance VI_pgusdp {
state BACKUP
interface ens192
virtual_router_id 100
priority 30
advert_int 1
nopreempt
authentication {
auth_type PASS
auth_pass 123456
}
track_script {
checkpg
}
notify_master "/etc/keepalived/scripts/master.sh"
notify_backup "/etc/keepalived/scripts/slave.sh"
virtual_ipaddress {
192.168.100.204/24 dev ens192 label ens192:1
}
}



vi /etc/keepalived/scripts/checkpg.sh

#!/bin/bash
export PGDATABASE=postgres
export PGPORT=6432
export PGUSER=postgres
export PGBIN=/home/postgres/pg14/bin
export PGDATA=/data/pg/pgdata
LOGFILE=/etc/keepalived/log/keepalived.log
nc -w 3 localhost 6432
a=`echo $?`
if [ $a -eq 1 ] ;then
exit 1
else
SQL1='SELECT pg_is_in_recovery from pg_is_in_recovery();'
db_role=`echo $SQL1 | ${PGBIN}/psql -d $PGDATABASE -U $PGUSER -At -w`
if [ $db_role == t ];then
exit 1
fi
fi


vi /etc/keepalived/scripts/master.sh

#!/bin/bash
LOGFILE=/etc/keepalived/log/keepalived.log
export PGDATABASE=postgres
export PGPORT=6432
export PGUSER=postgres
export PGBIN=/home/postgres/pg14/bin
export PGDATA=/data/pg/pgdata
LOGFILE=/etc/keepalived/log/keepalived.log
SQL1='SELECT pg_is_in_recovery from pg_is_in_recovery();'
db_role=`echo $SQL1 | ${PGBIN}/psql -d $PGDATABASE -U $PGUSER -At -w`
if [ $db_role == t ];then
echo -e `date +"%F %T"` "the current database is standby DB! " >> $LOGFILE
exit 1
else
echo -e `date +"%F %T"` "the current database is master DB!" >> $LOGFILE
fi



vi /etc/keepalived/scripts/slave.sh

#!/bin/bash
LOGFILE=/etc/keepalived/log/keepalived.log
export PGDATABASE=postgres
export PGPORT=6432
export PGUSER=postgres
export PGBIN=/home/postgres/pg14/bin
export PGDATA=/data/pg/pgdata
LOGFILE=/etc/keepalived/log/keepalived.log
SQL1='SELECT pg_is_in_recovery from pg_is_in_recovery();'
db_role=`echo $SQL1 | ${PGBIN}/psql -d $PGDATABASE -U $PGUSER -At -w`
if [ $db_role == t ];then
echo -e `date +"%F %T"` "the current database is standby DB! " >> $LOGFILE
else
echo -e `date +"%F %T"` "the current database is master DB!" >> $LOGFILE
fi




「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论