暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

磐维数据库重装系统后集群恢复

徐伟东 2025-08-26
153

适用范围

磐维集中式,整个集群或集群中某个节点主机操作系统重装,磐维使用到的磁盘未受影响。

创建用户和组

groupadd -g 1101 dbgrp

useradd -g dbgrp -u 1101 -m omm

passwd omm

重新挂载磐维磁盘

创建目录(根据之前部署情况进行创建)

mkdir /database

mkdir /archive

mkdir /backup

挂载磁盘

mount /dev/mapper/database /database

mount /dev/mapper/archive /archive

mount /dev/mapper/backup /backup

永久挂载

vi /etc/fstab

/dev/mapper/database /database xfs defaults 0 0

/dev/mapper/archive /archive xfs defaults 0 0

/dev/mapper/ backup e /backup xfs defaults 0 0

安装依赖包

yum -y install libaio-devel flex bison ncurses-devel glibc-devel patch readline-devel python3 expect* bzip2 libnsl gcc gcc-c++ zlib-devel ncurses-devel expect bzip2 gcc

目录赋权

chown –R omm:dbgrp /database

chown –R omm:dbgrp /archive

chown –R omm:dbgrp /backup

chmod -R 755 /database

chmod -R 755 / archive

chmod -R 755 /database

chmod –R 700 /database/panweidb/data (这一步不做启库时会报错Permissions should be u=rwx (0700))

配置/etc/hosts文件

在/etc/hosts添加群集所有服务器的hostname与IP地址.

配置limit.conf文件

# panweidb

* soft nofile 1000000

* hard nofile 1000000

* soft nproc 655360

* hard nproc 655360

* soft memlock unlimited

* hard memlock unlimited

* soft core unlimited

* hard core unlimited

* soft stack unlimited

* hard stack unlimited

系统内核参数修改

fs.aio-max-nr=1048576

fs.file-max= 76724600

kernel.sem = 4096 2097152000 4096 512000

kernel.shmall = 26843545 # pages, 0.8 * MEM/PAGE_SIZE or higher

kernel.shmmax = 68719476736 # bytes, 0.5 * MEM or higher

kernel.shmmni = 819200

net.core.netdev_max_backlog = 10000

net.core.rmem_default = 262144

net.core.rmem_max = 4194304

net.core.wmem_default = 262144

net.core.wmem_max = 4194304

net.core.somaxconn = 4096

net.ipv4.tcp_fin_timeout = 5

vm.dirty_background_bytes = 409600000

vm.dirty_expire_centisecs = 3000

vm.dirty_ratio = 80

vm.dirty_writeback_centisecs = 50

vm.overcommit_memory = 0

vm.swappiness = 0

net.ipv4.ip_local_port_range = 40000 65535

fs.nr_open = 20480000

kernel.core_pattern = /database/panweidb/corefile/core-%e-%p-%t

立即生效

sysctl -p

sudo提权

visudo

在文件中找到root ALL=(ALL) ALL,在该行下方添加以下内容:

omm ALL=(ALL:ALL) NOPASSWD:/sbin/ip,/usr/sbin/arping,/sbin/iptables,/sbin/ifconfig

赋予/usr/bin/sudo命令执行权限。

chmod u+s /usr/bin/sudo

赋予各命令执行权限。

chmod +x /usr/sbin/ip

chmod +x /usr/sbin/arping

chmod +x /usr/sbin/iptables

chmod +x /usr/sbin/ifconfig

配置omm用户环境变量

vi /home/omm/.bashrc

# .bashrc

# Source global definitions

if [ -f /etc/bashrc ]; then

. /etc/bashrc

fi

# User specific environment

PATH=“HOME/.local/bin:HOME/.local/bin:HOME/bin:$PATH”

export PATH

# Uncomment the following line if you don’t like systemctl’s auto-paging feature:

# export SYSTEMD_PAGER=

# User specific aliases and functions

export UNPACKPATH=/database/panweidb/soft

export PGDATA=/database/panweidb/data

export PGPORT=17700

export PGDATABASE=postgres

export SSH_AUTH_SOCK=/home/omm/gaussdb_tmp/gauss_socket_tmp

export SSH_AGENT_PID=3715810

export GPHOME=/database/panweidb/tool

export PATH=GPHOME/script/gspylib/pssh/bin:GPHOME/script/gspylib/pssh/bin:GPHOME/script:$PATH

export LD_LIBRARY_PATH=GPHOME/script/gspylib/clib:GPHOME/script/gspylib/clib:LD_LIBRARY_PATH

export LD_LIBRARY_PATH=GPHOME/lib:GPHOME/lib:LD_LIBRARY_PATH

export PYTHONPATH=$GPHOME/lib

export PATH=/root/gauss_om/omm/script:$PATH

export GAUSSHOME=/database/panweidb/app

export PATH=GAUSSHOME/bin:GAUSSHOME/bin:PATH

export LD_LIBRARY_PATH=GAUSSHOME/lib:GAUSSHOME/lib:GAUSSHOME/jre/lib/amd64:GAUSSHOME/jre/lib/amd64/server:GAUSSHOME/jre/lib/amd64/server:LD_LIBRARY_PATH

export S3_CLIENT_CRT_FILE=$GAUSSHOME/lib/client.crt

export PANWEIDB_PKG_ENV=panweidb

export GAUSS_VERSION=2.0

export PGHOST=/database/panweidb/tmp

export GAUSSLOG=/database/panweidb/log/omm

umask 077

export GAUSS_ENV=2

export GS_CLUSTER_NAME=panweidb

添加omm定时任务

*/1 * * * * source ~/.bashrc;python3 /database/panweidb/tool/script/local/CheckSshAgent.py >>/dev/null 2>&1 &

*/1 * * * * source /etc/profile;(if [ -f ~/.profile ];then source ~/.profile;fi);source ~/.bashrc;nohup /database/panweidb/app/bin/om_monitor -L /database/pa

nweidb/log/omm/cm/om_monitor >>/dev/null 2>&1 &

30 00 */3 * * source /home/omm/.bashrc; sh /backup/probackup/PanweiDB_primary_backup.sh >> /backup/probackup/log/backup_ pri_$(date +\%Y\%m\%d).log 2>&1 &

10 00 */4 * * source /home/omm/.bashrc; gs_probackup delete -B /backup/probackup/ --status=ERROR --instance=panweidb > /dev/null 2>&1 &

0 */2 * * * sh /home/omm/move_and_clear_archive.sh 9

30 05 * * * source /home/omm/.bashrc;sh /home/omm/backup_lastest_check_xjpt.sh >>/tmp/backup_latest_check_state.txt

0 */12 * * * source /home/omm/.bashrc;sh /home/omm/license_monitor_check_xjpt.sh >/tmp/license_monitor_check_state.txt

0 */2 * * * source /home/omm/.bashrc;sh /home/omm/gsomctl_monitor_check_xjpt.sh >/tmp/gsomctl_monitor_check_state.txt

创建定时任务中的脚本

vi /home/omm/PanweiDB_primary_backup.sh

#!/bin/bash

source /home/omm/.bashrc

fullbackupdir="/backup/probackup"

#-------------------------------------------------------------------------

prim_role=`gsql -d postgres -c “select pg_is_in_recovery();” -t|awk ‘NF’|awk ‘{$1=$1}1’`

if [[ “prim_role"==f"prim\_role" == 'f' || "prim_role” == ‘0’ ]]; then

##备份

echo “`date +”%Y/%m/%d %H:%M:%S"` 开始备份流程"

gs_probackup backup --backup-path=${fullbackupdir} --instance=panweidb --backup-mode=full --delete-expired --pgdatabase=postgres

# 参数出错

if [ $? -ne 0 ]; then

echo “---------------------------------------------------------------------------------------”

echo “`date +”%Y/%m/%d %H:%M:%S"` 备份失败"

else

#获最新的参数

echo “`date +”%Y/%m/%d %H:%M:%S"` 备份节点为:"`hostname -i`

echo “`date +”%Y/%m/%d %H:%M:%S"`备份情况如下:"

gs_probackup show -B ${fullbackupdir} --instance=panweidb

echo “---------------------------------------------------------------------------------------”

echo “PanweiDB全量备份时间:”`date +"%Y-%m-%d %H:%M:%S"`

backup_id=`gs_probackup show -B ${fullbackupdir} --instance=panweidb|grep panweidb|head -n 1| awk ‘{print $3}’`

echo “备份ID为:”${backup_id}

backup_date=`gs_probackup show -B ${fullbackupdir} --instance=panweidb|grep panweidb|head -n 1| awk ‘{print $4}’`

echo “备份时间为:”${backup_date}

echo “`date +”%Y/%m/%d %H:%M:%S"`备份流程结束"

echo “---------------------------------------------------------------------------------------”

fi

else

echo “`date +”%Y/%m/%d %H:%M:%S"` 进行角色检查,此节点为备节点,不进行备份操作"

exit 1

fi

chmod +x /backup/probackup/PanweiDB_primary_backup.sh

vi /home/omm/move_and_clear_archive.sh

#!/bin/bash

fnum=$1

echo “begin to clear archive log…”

find /archive -mtime +${fnum} -type f -name “0000*” | xargs rm -f

echo “archive files clean successfully”

vi /home/omm/backup_lastest_check_xjpt.sh

#!/bin/bash

source /home/omm/.bashrc

fullbackupdir="/backup/probackup"

export PGPORT=17700

export PGUSER=omm

export PGDATABASE=postgres

######################

if [ -f /tmp/backup_latest_check_state.txt ];then

chmod 755 /tmp/backup_latest_check_state.txt

else

touch /tmp/backup_latest_check_state.txt

chmod 755 /tmp/backup_latest_check_state.txt

fi

######################

daychecks=`crontab -l|grep -v “#”|grep “PanweiDB_primary_backup.sh”|awk ‘{print $3}’`

if [[ “daychecks" = "\*/1" \]\] || \[\[ "daychecks” = “*” ]]; then

######################

prim_role=`gsql -d postgres -c “select pg_is_in_recovery();” -t|grep -v ‘Warning’|awk ‘NF’|awk ‘{$1=$1}1’`

if [[ “prim\_role" = 'f' \]\] || \[\[ "prim_role” = ‘0’ ]]; then

#查看前一天的备份

data_check=`date +"%Y-%m-%d" -d “-1 days”`

aaa=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "11""11" "NF}’|wc -l`

if [ “$aaa” = ‘1’ ]; then

mdaa=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "11""11" "NF}’`

echo $fullbackupdir " " $mdaa

else

echo $fullbackupdir’ panweidb IAM_PRIMARY '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB ERROR’

fi

else

echo $fullbackupdir’ panweidb IAM_standby '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB OK’

fi

######################

elif [[ “$daychecks” = “*/3” ]]; then

prim_role=`gsql -d postgres -c “select pg_is_in_recovery();” -t|grep -v ‘Warning’|awk ‘NF’|awk ‘{$1=$1}1’`

if [[ “prim\_role" = 'f' \]\] || \[\[ "prim_role” = ‘0’ ]]; then

#查看前三天的备份

data_check1=`date +"%Y-%m-%d" -d “-1 days”`

data_check2=`date +"%Y-%m-%d" -d “-2 days”`

data_check3=`date +"%Y-%m-%d" -d “-3 days”`

auc1=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check1}|awk ‘{print $1" “$3” "$4 " “$5” “$9” “$10” "11""11" "NF}’|wc -l`

auc2=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check2}|awk ‘{print $1" “$3” "$4 " “$5” “$9” “$10” "11""11" "NF}’|wc -l`

auc3=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check3}|awk ‘{print $1" “$3” "$4 " “$5” “$9” “$10” "11""11" "NF}’|wc -l`

aucall=$((auc1+auc2+auc3))

if [ “$auc3” = ‘1’ ]; then

mdaa3=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check3}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "11""11" "NF}’ `

echo $fullbackupdir " " $mdaa3

elif [ “$auc2” = ‘1’ ]; then

mdaa2=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check2}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "11""11" "NF}’ `

echo $fullbackupdir " " $mdaa2

elif [ “$auc1” = ‘1’ ]; then

mdaa1=`gs_probackup show -B $fullbackupdir --instance=panweidb| grep -v “Instance”|grep FULL|grep ${data_check1}|awk ‘{print $1" “$3” "$4 " “$5” “$6” “$9” “$10” "11""11" "NF}’ `

echo $fullbackupdir " " $mdaa1

elif [ “$aucall” = ‘0’ ]; then

echo $fullbackupdir’ panweidb IAM_PRIMARY '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB ERROR’

fi

else

echo $fullbackupdir’ panweidb IAM_standby '`date +"%Y-%m-%d %H:%M:%S" -d “-1 days”` ‘FULL 0s 0MB 0MB OK’

fi

fi

chmod +x /home/omm/backup_lastest_check_xjpt.sh

vi /home/omm/license_monitor_check_xjpt.sh

#!/bin/bash

export PGUSER=omm

export PGPORT=17700

export PGDATABASE=postgres

export PGDATA=/database/panweidb/data

if [ -f /tmp/license_monitor_check_state.txt ];then

chmod 755 /tmp/license_monitor_check_state.txt

else

touch /tmp/license_monitor_check_state.txt

chmod 755 /tmp/license_monitor_check_state.txt

fi

if [ -z “$PGDATA” ]; then

echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, PGDATA environment variable not set\”}”

exit 1

fi

# 定义配置文件路径

CONF_FILE="$PGDATA/postgresql.conf"

if [ ! -f “$CONF_FILE” ]; then

echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, postgresql.conf not found in $PGDATA\”}”

exit 1

fi

# ------------------------- 主许可证检查逻辑 -------------------------

#1.0的版本不需要license,把license剩余时间直接置为99999

versiona=`gsql --pset=pager=off -qAt -c “select count(*) as version from pg_proc where proname=‘pw_version’;”`

if [ $versiona -eq 0 ];

then

echo “99999”

else

##2.0的03b85d1版本不需要license,把license剩余时间直接置为99999

versionab=`gsql --pset=pager=off -qAt -c “select pw_version();”| awk ‘{printf "%s ", $0}’| grep 03b85d1| wc -l`

if [ $versionab -eq 1 ];

then

echo “99999”

else

LICENSE_PATH=""

USE_TEMP_LICENSE=0 # 标记是否使用临时许可证

# 尝试从配置文件中提取license_path

LICENSE_LINE=(grep -E "^\[\[:space:\]\]\*license\_path\[\[:space:\]\]\*=" "CONF_FILE" | grep -v “^#” | tail -n 1)

if [ -n “$LICENSE_LINE” ]; then

# 清理路径参数

LICENSE_PATH=(echo"(echo "LICENSE_LINE" | sed -E ‘s/^[[:space:]]*license_path[[:space:]]*=[[:space:]]*//’ | sed -e “s/^[’\”]//" -e “s/[’\”]$//")

# 验证许可证目录

if [ ! -d “$LICENSE_PATH” ]; then

# echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, License directory $LICENSE_PATH does not exist\”}” >&2

echo “0”

exit 1

fi

# 检查.lic文件

if [ -z “(find"(find "LICENSE_PATH” -maxdepth 1 -type f -name ‘*.lic’ -print -quit 2>/dev/null)" ]; then

#echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, No .lic files in $LICENSE_PATH\”}” >&2

echo “0”

exit 1

fi

# 未配置license_path时检查默认路径

else

DEFAULT_PATH="/etc/panweidb/license"

if [ -d “DEFAULT\_PATH" \] && \[ -n "(find “$DEFAULT_PATH” -maxdepth 1 -type f -name ‘*.lic’ -print -quit 2>/dev/null)” ]; then

LICENSE_PATH="$DEFAULT_PATH"

else

USE_TEMP_LICENSE=1 # 标记需要检查临时许可证

fi

fi

# ------------------------- 临时许可证检查逻辑 -------------------------

if [ $USE_TEMP_LICENSE -eq 1 ]; then

OUTPUT=$(pw_licensetool --view -temporary 2>&1)

if [ $? -ne 0 ]; then

echo “{\“result\”: \“err\”, \“status\”: 0, \“msg\”: \“error, No valid license found (both configured and default paths), and temp license check failed: $OUTPUT\”}”

echo “0”

exit 1

fi

# 提取临时许可证时间(示例输出格式)

# Temporary license information:

# Begins On: 2024-05-15 00:00:00

# Expires On: 2024-05-22 23:59:59

BEGIN_TIME=(echo"(echo "OUTPUT" | sed -n “s/.*Begins On:’\([^’]*\)’.*/\1/p”)

EXPIRE_TIME=(echo"(echo "OUTPUT" | sed -n “s/.*Expires On:’\([^’]*\)’.*/\1/p”)

if [ -z “BEGIN\_TIME" \] || \[ -z "EXPIRE_TIME” ]; then

echo “0”

exit 1

fi

# ------------------------- 正常许可证处理逻辑 -------------------------

else

# 执行正常许可证检查

OUTPUT=(pw_licensetooldump="(pw\_licensetool --dump="LICENSE_PATH" 2>&1)

if [ $? -ne 0 ]; then

echo “0”

exit 1

fi

# 提取时间信息(示例输出格式)

# License Begins On:‘2024-01-01’ Expires On:‘2025-01-01’

BEGIN_TIME=(echo"(echo "OUTPUT" | sed -n “s/.*Begins On:’\([^’]*\)’.*/\1/p”)

EXPIRE_TIME=(echo"(echo "OUTPUT" | sed -n “s/.*Expires On:’\([^’]*\)’.*/\1/p”)

if [ -z “BEGIN\_TIME" \] || \[ -z "EXPIRE_TIME” ]; then

echo “0”

exit 1

fi

fi

# ------------------------- 统一有效期计算逻辑 -------------------------

# 转换时间格式为timestamp(兼容两种格式)

current_timestamp=$(date +%s)

expire_timestamp=(dated"(date -d "EXPIRE_TIME" “+%s” 2>/dev/null || date -d “${EXPIRE_TIME//-/}” “+%s” 2>/dev/null)

if [ -z “$expire_timestamp” ]; then

echo “0”

exit 1

fi

time_left=$(( expire_timestamp - current_timestamp ))

if [ $time_left -lt 0 ]; then

echo “0”

exit 1

else

days_left=$(( (time_left + 86399) / 86400 )) # 向上取整

if [ $USE_TEMP_LICENSE -eq 1 ]; then

echo “0”

else

echo $days_left

fi

fi

fi

fi

exit 0

vi /home/omm/gsomctl_monitor_check_xjpt.sh

#!/bin/bash

export PGDATA=/database/panweidb/data

if [ -f /tmp/gsomctl_monitor_check_state.txt ];then

chmod 755 /tmp/gsomctl_monitor_check_state.txt

else

touch /tmp/gsomctl_monitor_check_state.txt

chmod 755 /tmp/gsomctl_monitor_check_state.txt

fi

value_gsquery=`gs_ctl query -D $PGDATA 2> /dev/null`

if [ $? -eq 0 ];

then

aaa=0

else

aaa=1

fi

value_omquery=`gs_om -t query 2>/dev/null`

if [ $? -eq 0 ];

then

bbb=0

else

bbb=1

fi

value_omdetail=`gs_om -t status --detail 2> /dev/null`

if [ $? -eq 0 ];

then

ccc=0

else

ccc=1

fi

echo $((aaa+bbb+ccc))

chmod +x /home/omm/gsomctl_monitor_check_xjpt.sh

上传license文件

查看之前配置文件中license的位置

cat /database/panweidb/data/postgresql.conf|grep license_path

将license文件上传至license_path位置下,并给license文件赋权。

启动集群及检查

cm_ctl start

检查集群状态

gs_om –t status –detaill

在主节点检查vip

ip a

最后修改时间:2025-08-31 12:52:06
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论