适用范围
磐维集中式,整个集群或集群中某个节点主机操作系统重装,磐维使用到的磁盘未受影响。
创建用户和组
groupadd -g 1101 dbgrp
useradd -g dbgrp -u 1101 -m omm
passwd omm
重新挂载磐维磁盘
创建目录(根据之前部署情况进行创建)
mkdir /database
mkdir /archive
mkdir /backup
挂载磁盘
mount /dev/mapper/database /database
mount /dev/mapper/archive /archive
mount /dev/mapper/backup /backup
永久挂载
vi /etc/fstab
/dev/mapper/database /database xfs defaults 0 0
/dev/mapper/archive /archive xfs defaults 0 0
/dev/mapper/ backup e /backup xfs defaults 0 0
安装依赖包
yum -y install libaio-devel flex bison ncurses-devel glibc-devel patch readline-devel python3 expect* bzip2 libnsl gcc gcc-c++ zlib-devel ncurses-devel expect bzip2 gcc
目录赋权
chown –R omm:dbgrp /database
chown –R omm:dbgrp /archive
chown –R omm:dbgrp /backup
chmod -R 755 /database
chmod -R 755 / archive
chmod -R 755 /database
chmod –R 700 /database/panweidb/data (这一步不做启库时会报错Permissions should be u=rwx (0700))
配置/etc/hosts文件
在/etc/hosts添加群集所有服务器的hostname与IP地址.
配置limit.conf文件
# panweidb
* soft nofile 1000000
* hard nofile 1000000
* soft nproc 655360
* hard nproc 655360
* soft memlock unlimited
* hard memlock unlimited
* soft core unlimited
* hard core unlimited
* soft stack unlimited
* hard stack unlimited
系统内核参数修改
fs.aio-max-nr=1048576
fs.file-max= 76724600
kernel.sem = 4096 2097152000 4096 512000
kernel.shmall = 26843545 # pages, 0.8 * MEM/PAGE_SIZE or higher
kernel.shmmax = 68719476736 # bytes, 0.5 * MEM or higher
kernel.shmmni = 819200
net.core.netdev_max_backlog = 10000
net.core.rmem_default = 262144
net.core.rmem_max = 4194304
net.core.wmem_default = 262144
net.core.wmem_max = 4194304
net.core.somaxconn = 4096
net.ipv4.tcp_fin_timeout = 5
vm.dirty_background_bytes = 409600000
vm.dirty_expire_centisecs = 3000
vm.dirty_ratio = 80
vm.dirty_writeback_centisecs = 50
vm.overcommit_memory = 0
vm.swappiness = 0
net.ipv4.ip_local_port_range = 40000 65535
fs.nr_open = 20480000
kernel.core_pattern = /database/panweidb/corefile/core-%e-%p-%t
立即生效
sysctl -p
sudo提权
visudo
在文件中找到root ALL=(ALL) ALL,在该行下方添加以下内容:
omm ALL=(ALL:ALL) NOPASSWD:/sbin/ip,/usr/sbin/arping,/sbin/iptables,/sbin/ifconfig
赋予/usr/bin/sudo命令执行权限。
chmod u+s /usr/bin/sudo
赋予各命令执行权限。
chmod +x /usr/sbin/ip
chmod +x /usr/sbin/arping
chmod +x /usr/sbin/iptables
chmod +x /usr/sbin/ifconfig
配置omm用户环境变量
vi /home/omm/.bashrc
# .bashrc
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
# User specific environment
PATH=“HOME/bin:$PATH”
export PATH
# Uncomment the following line if you don’t like systemctl’s auto-paging feature:
# export SYSTEMD_PAGER=
# User specific aliases and functions
export UNPACKPATH=/database/panweidb/soft
export PGDATA=/database/panweidb/data
export PGPORT=17700
export PGDATABASE=postgres
export SSH_AUTH_SOCK=/home/omm/gaussdb_tmp/gauss_socket_tmp
export SSH_AGENT_PID=3715810
export GPHOME=/database/panweidb/tool
export PATH=GPHOME/script:$PATH
export LD_LIBRARY_PATH=LD_LIBRARY_PATH
export LD_LIBRARY_PATH=LD_LIBRARY_PATH
export PYTHONPATH=$GPHOME/lib
export PATH=/root/gauss_om/omm/script:$PATH
export GAUSSHOME=/database/panweidb/app
export PATH=PATH
export LD_LIBRARY_PATH=GAUSSHOME/jre/lib/amd64:LD_LIBRARY_PATH
export S3_CLIENT_CRT_FILE=$GAUSSHOME/lib/client.crt
export PANWEIDB_PKG_ENV=panweidb
export GAUSS_VERSION=2.0
export PGHOST=/database/panweidb/tmp
export GAUSSLOG=/database/panweidb/log/omm
umask 077
export GAUSS_ENV=2
export GS_CLUSTER_NAME=panweidb
添加omm定时任务
*/1 * * * * source ~/.bashrc;python3 /database/panweidb/tool/script/local/CheckSshAgent.py >>/dev/null 2>&1 &
*/1 * * * * source /etc/profile;(if [ -f ~/.profile ];then source ~/.profile;fi);source ~/.bashrc;nohup /database/panweidb/app/bin/om_monitor -L /database/pa
nweidb/log/omm/cm/om_monitor >>/dev/null 2>&1 &
30 00 */3 * * source /home/omm/.bashrc; sh /backup/probackup/PanweiDB_primary_backup.sh >> /backup/probackup/log/backup_ pri_$(date +\%Y\%m\%d).log 2>&1 &
10 00 */4 * * source /home/omm/.bashrc; gs_probackup delete -B /backup/probackup/ --status=ERROR --instance=panweidb > /dev/null 2>&1 &
0 */2 * * * sh /home/omm/move_and_clear_archive.sh 9
30 05 * * * source /home/omm/.bashrc;sh /home/omm/backup_lastest_check_xjpt.sh >>/tmp/backup_latest_check_state.txt
0 */12 * * * source /home/omm/.bashrc;sh /home/omm/license_monitor_check_xjpt.sh >/tmp/license_monitor_check_state.txt
0 */2 * * * source /home/omm/.bashrc;sh /home/omm/gsomctl_monitor_check_xjpt.sh >/tmp/gsomctl_monitor_check_state.txt
创建定时任务中的脚本
vi /home/omm/PanweiDB_primary_backup.sh
#!/bin/bash
source /home/omm/.bashrc
fullbackupdir="/backup/probackup"
#-------------------------------------------------------------------------
prim_role=`gsql -d postgres -c “select pg_is_in_recovery();” -t|awk ‘NF’|awk ‘{$1=$1}1’`
if [[ “prim_role” == ‘0’ ]]; then
##备份
echo “`date +”%Y/%m/%d %H:%M:%S"` 开始备份流程"
gs_probackup backup --backup-path=${fullbackupdir} --instance=panweidb --backup-mode=full --delete-expired --pgdatabase=postgres
# 参数出错
if [ $? -ne 0 ]; then
echo “---------------------------------------------------------------------------------------”
echo “`date +”%Y/%m/%d %H:%M:%S"` 备份失败"
else
#获最新的参数
echo “`date +”%Y/%m/%d %H:%M:%S"` 备份节点为:"`hostname -i`
echo “`date +”%Y/%m/%d %H:%M:%S"`备份情况如下:"
gs_probackup show -B ${fullbackupdir} --instance=panweidb
echo “---------------------------------------------------------------------------------------”
echo “PanweiDB全量备份时间:”`date +"%Y-%m-%d %H:%M:%S"`
backup_id=`gs_probackup show -B ${fullbackupdir} --instance=panweidb|grep panweidb|head -n 1| awk ‘{print $3}’`
echo “备份ID为:”${backup_id}
backup_date=`gs_probackup show -B ${fullbackupdir} --instance=panweidb|grep panweidb|head -n 1| awk ‘{print $4}’`
echo “备份时间为:”${backup_date}
echo “`date +”%Y/%m/%d %H:%M:%S"`备份流程结束"
echo “---------------------------------------------------------------------------------------”
fi
else
echo “`date +”%Y/%m/%d %H:%M:%S"` 进行角色检查,此节点为备节点,不进行备份操作"
exit 1
fi
chmod +x /backup/probackup/PanweiDB_primary_backup.sh
vi /home/omm/move_and_clear_archive.sh
#!/bin/bash
fnum=$1
echo “begin to clear archive log…”
find /archive -mtime +${fnum} -type f -name “0000*” | xargs rm -f
echo “archive files clean successfully”
vi /home/omm/backup_lastest_check_xjpt.sh
#!/bin/bash
source /home/omm/.bashrc
fullbackupdir="/backup/probackup"
export PGPORT=17700
export PGUSER=omm
export PGDATABASE=postgres
######################
if [ -f /tmp/backup_latest_check_state.txt ];then
chmod 755 /tmp/backup_latest_check_state.txt
else
touch /tmp/backup_latest_check_state.txt
chmod 755 /tmp/backup_latest_check_state.txt
fi
######################
daychecks=`crontab -l|grep -v “#”|grep “PanweiDB_primary_backup.sh”|awk ‘{print $3}’`
if [[ “daychecks" = "\*/1" \]\] || \[\[ "daychecks” = “*” ]]; then
######################
prim_role=`gsql -d postgres -c “select pg_is_in_recovery();” -t|grep -v ‘Warning’|awk ‘NF’|awk ‘{$1=$1}1’`
if [[ “prim\_role" = 'f' \]\] || \[\[ "prim_role” = ‘0’ ]]; then
#查看前一天的备份
data_check=`date +"%Y-%m-%d" -d “-1 days”`
aaa=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "NF}’|wc -l`
if [ “$aaa” = ‘1’ ]; then
mdaa=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "NF}’`
echo $fullbackupdir " " $mdaa
else
echo $fullbackupdir’ panweidb IAM_PRIMARY '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB ERROR’
fi
else
echo $fullbackupdir’ panweidb IAM_standby '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB OK’
fi
######################
elif [[ “$daychecks” = “*/3” ]]; then
prim_role=`gsql -d postgres -c “select pg_is_in_recovery();” -t|grep -v ‘Warning’|awk ‘NF’|awk ‘{$1=$1}1’`
if [[ “prim\_role" = 'f' \]\] || \[\[ "prim_role” = ‘0’ ]]; then
#查看前三天的备份
data_check1=`date +"%Y-%m-%d" -d “-1 days”`
data_check2=`date +"%Y-%m-%d" -d “-2 days”`
data_check3=`date +"%Y-%m-%d" -d “-3 days”`
auc1=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check1}|awk ‘{print $1" “$3” "$4 " “$5” “$9” “$10” "NF}’|wc -l`
auc2=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check2}|awk ‘{print $1" “$3” "$4 " “$5” “$9” “$10” "NF}’|wc -l`
auc3=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check3}|awk ‘{print $1" “$3” "$4 " “$5” “$9” “$10” "NF}’|wc -l`
aucall=$((auc1+auc2+auc3))
if [ “$auc3” = ‘1’ ]; then
mdaa3=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check3}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "NF}’ `
echo $fullbackupdir " " $mdaa3
elif [ “$auc2” = ‘1’ ]; then
mdaa2=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check2}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "NF}’ `
echo $fullbackupdir " " $mdaa2
elif [ “$auc1” = ‘1’ ]; then
mdaa1=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check1}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "NF}’ `
echo $fullbackupdir " " $mdaa1
elif [ “$aucall” = ‘0’ ]; then
echo $fullbackupdir’ panweidb IAM_PRIMARY '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB ERROR’
fi
else
echo $fullbackupdir’ panweidb IAM_standby '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB OK’
fi
fi
chmod +x /home/omm/backup_lastest_check_xjpt.sh
vi /home/omm/license_monitor_check_xjpt.sh
#!/bin/bash
export PGUSER=omm
export PGPORT=17700
export PGDATABASE=postgres
export PGDATA=/database/panweidb/data
if [ -f /tmp/license_monitor_check_state.txt ];then
chmod 755 /tmp/license_monitor_check_state.txt
else
touch /tmp/license_monitor_check_state.txt
chmod 755 /tmp/license_monitor_check_state.txt
fi
if [ -z “$PGDATA” ]; then
echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, PGDATA environment variable not set\”}”
exit 1
fi
# 定义配置文件路径
CONF_FILE="$PGDATA/postgresql.conf"
if [ ! -f “$CONF_FILE” ]; then
echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, postgresql.conf not found in $PGDATA\”}”
exit 1
fi
# ------------------------- 主许可证检查逻辑 -------------------------
#1.0的版本不需要license,把license剩余时间直接置为99999
versiona=`gsql --pset=pager=off -qAt -c “select count(*) as version from pg_proc where proname=‘pw_version’;”`
if [ $versiona -eq 0 ];
then
echo “99999”
else
##2.0的03b85d1版本不需要license,把license剩余时间直接置为99999
versionab=`gsql --pset=pager=off -qAt -c “select pw_version();”| awk ‘{printf "%s ", $0}’| grep 03b85d1| wc -l`
if [ $versionab -eq 1 ];
then
echo “99999”
else
LICENSE_PATH=""
USE_TEMP_LICENSE=0 # 标记是否使用临时许可证
# 尝试从配置文件中提取license_path
LICENSE_LINE=(grep -E "^\[\[:space:\]\]\*license\_path\[\[:space:\]\]\*=" "CONF_FILE" | grep -v “^#” | tail -n 1)
if [ -n “$LICENSE_LINE” ]; then
# 清理路径参数
LICENSE_PATH=LICENSE_LINE" | sed -E ‘s/^[[:space:]]*license_path[[:space:]]*=[[:space:]]*//’ | sed -e “s/^[’\”]//" -e “s/[’\”]$//")
# 验证许可证目录
if [ ! -d “$LICENSE_PATH” ]; then
# echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, License directory $LICENSE_PATH does not exist\”}” >&2
echo “0”
exit 1
fi
# 检查.lic文件
if [ -z “LICENSE_PATH” -maxdepth 1 -type f -name ‘*.lic’ -print -quit 2>/dev/null)" ]; then
#echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, No .lic files in $LICENSE_PATH\”}” >&2
echo “0”
exit 1
fi
# 未配置license_path时检查默认路径
else
DEFAULT_PATH="/etc/panweidb/license"
if [ -d “DEFAULT\_PATH" \] && \[ -n "(find “$DEFAULT_PATH” -maxdepth 1 -type f -name ‘*.lic’ -print -quit 2>/dev/null)” ]; then
LICENSE_PATH="$DEFAULT_PATH"
else
USE_TEMP_LICENSE=1 # 标记需要检查临时许可证
fi
fi
# ------------------------- 临时许可证检查逻辑 -------------------------
if [ $USE_TEMP_LICENSE -eq 1 ]; then
OUTPUT=$(pw_licensetool --view -temporary 2>&1)
if [ $? -ne 0 ]; then
echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, No valid license found (both configured and default paths), and temp license check failed: $OUTPUT\”}”
echo “0”
exit 1
fi
# 提取临时许可证时间(示例输出格式)
# Temporary license information:
# Begins On: 2024-05-15 00:00:00
# Expires On: 2024-05-22 23:59:59
BEGIN_TIME=OUTPUT" | sed -n “s/.*Begins On:’\([^’]*\)’.*/\1/p”)
EXPIRE_TIME=OUTPUT" | sed -n “s/.*Expires On:’\([^’]*\)’.*/\1/p”)
if [ -z “BEGIN\_TIME" \] || \[ -z "EXPIRE_TIME” ]; then
echo “0”
exit 1
fi
# ------------------------- 正常许可证处理逻辑 -------------------------
else
# 执行正常许可证检查
OUTPUT=LICENSE_PATH" 2>&1)
if [ $? -ne 0 ]; then
echo “0”
exit 1
fi
# 提取时间信息(示例输出格式)
# License Begins On:‘2024-01-01’ Expires On:‘2025-01-01’
BEGIN_TIME=OUTPUT" | sed -n “s/.*Begins On:’\([^’]*\)’.*/\1/p”)
EXPIRE_TIME=OUTPUT" | sed -n “s/.*Expires On:’\([^’]*\)’.*/\1/p”)
if [ -z “BEGIN\_TIME" \] || \[ -z "EXPIRE_TIME” ]; then
echo “0”
exit 1
fi
fi
# ------------------------- 统一有效期计算逻辑 -------------------------
# 转换时间格式为timestamp(兼容两种格式)
current_timestamp=$(date +%s)
expire_timestamp=EXPIRE_TIME" “+%s” 2>/dev/null || date -d “${EXPIRE_TIME//-/}” “+%s” 2>/dev/null)
if [ -z “$expire_timestamp” ]; then
echo “0”
exit 1
fi
time_left=$(( expire_timestamp - current_timestamp ))
if [ $time_left -lt 0 ]; then
echo “0”
exit 1
else
days_left=$(( (time_left + 86399) / 86400 )) # 向上取整
if [ $USE_TEMP_LICENSE -eq 1 ]; then
echo “0”
else
echo $days_left
fi
fi
fi
fi
exit 0
vi /home/omm/gsomctl_monitor_check_xjpt.sh
#!/bin/bash
export PGDATA=/database/panweidb/data
if [ -f /tmp/gsomctl_monitor_check_state.txt ];then
chmod 755 /tmp/gsomctl_monitor_check_state.txt
else
touch /tmp/gsomctl_monitor_check_state.txt
chmod 755 /tmp/gsomctl_monitor_check_state.txt
fi
value_gsquery=`gs_ctl query -D $PGDATA 2> /dev/null`
if [ $? -eq 0 ];
then
aaa=0
else
aaa=1
fi
value_omquery=`gs_om -t query 2>/dev/null`
if [ $? -eq 0 ];
then
bbb=0
else
bbb=1
fi
value_omdetail=`gs_om -t status --detail 2> /dev/null`
if [ $? -eq 0 ];
then
ccc=0
else
ccc=1
fi
echo $((aaa+bbb+ccc))
chmod +x /home/omm/gsomctl_monitor_check_xjpt.sh
上传license文件
查看之前配置文件中license的位置
cat /database/panweidb/data/postgresql.conf|grep license_path
将license文件上传至license_path位置下,并给license文件赋权。
启动集群及检查
cm_ctl start
检查集群状态
gs_om –t status –detaill
在主节点检查vip
ip a




