#!/bin/bash
# Script Name: gaussdb_upgrade.sh
# Author: Jeff.Cui
#######################################################################################
# REVISIONS: #
# Ver Date Author Description #
# ----- ------------ ------------- --------------------------------------- #
# 1.0 2023/09/08 Jeff.Cui 初始版本,用于Gauss 100 版本升级(含单机\主备) #
# Usage: #
# sh gaussdb_upgrade.sh root_password omm_password #
#######################################################################################
rootpwd=$1
ommpwd=$2
# 定义分割线及成功\失败输出提示
split1() {
echo -e "\033[1;40;33m\n\n ===================================== $(date +'%F %T') =====================================\n$1\033[0m"
}
result_err() {
echo -e "\033[31m\n (*>﹏<*) $(date +'%F %T') # $1 (*>﹏<*)\n\033[0m"
}
result_ok() {
echo -e "\n\033[32m (*^_^*) $(date +'%F %T') # $1 (*^_^*)\033[0m\n"
}
contain_str() { test $(grep "$1" $2 | wc -l ) -ge 1; }
# 判断 root 密码\omm 密码是否输入
if [ $# -ne 2 ];then
result_err "请正确输入参数,Usage:sh gauss-t_upgrade_precheck.sh root_password omm_password"
echo "
# Usage: #
# sh gaussdb_upgrade.sh root_password omm_password #"
exit 99
fi
# 检查升级目录、安装包等
if [ ! -d /opt/software/gauss_upgrade ];then
mkdir /opt/software/gauss_upgrade
result_err "请将升级包(GaussDB_T*CENTOS7*.gz)上传到 /opt/software/gauss_upgrade 目录"
exit 98
elif [ $(cd /opt/software/gauss_upgrade;ls | grep "CENTOS7" | grep "GaussDB_T" | wc -l) -eq 0 ];then
result_err "/opt/software/gauss_upgrade 目录不存在升级包(GaussDB_T*CENTOS7*.gz)"
exit 97
elif [ $(cd /opt/software/gauss_upgrade;ls |grep -v "CENTOS7" |wc -l) -ge 1 ];then
result_err "/opt/software/gauss_upgrade 目录下存在其他文件,请备份或者清理"
exit 96
fi
# 检查 /opt/software/config.xml 文件是否存在
if [ ! -f /opt/software/config.xml ];then
result_err "升级依赖的配置文件不存在,考虑如下命令复制文件:\nsu - omm -c 'cp \$GPHOME/script/config/clusterconfig.xml /opt/software/config.xml'"
exit 95
fi
# 1. 停止备份任务、监控
split1 "确认停掉备份任务、crontab等自动化任务"
# 2. 确认 /opt/software/config.xml 文件配置是否同 DB 现用配置一致
split1 "确认 /opt/software/config.xml 文件配置是否同 DB 现用配置一致"
sameconf=$(su - omm -c 'diff $GPHOME/script/config/clusterconfig.xml /opt/software/config.xml' | wc -l)
if [ $sameconf -ge 1 ];then
result_err "/opt/software/config.xml 文件同在用配置文件有差异,请手工检查"
exit 2
fi
sharedisk=$(grep share /opt/software/config.xml | wc -l)
# 3. 备份 crontab 定时任务
split1 "备份 crontab 定时任务"
su - omm -c "crontab -l >/tmp/omm_crontab_$(date +'%F')"
result_ok "crontab 定时任务配置备份至:/tmp/omm_crontab_$(date +'%F')"
cat /tmp/omm_crontab_$(date +'%F')
# 4. 检查集群服务状态
split1 "检查集群服务状态"
su - omm -c "gs_om -t status" >/tmp/omstatus.txt
cat /tmp/omstatus.txt
clstatus=$(grep ^cluster_state /tmp/omstatus.txt | awk '{print $NF}')
moduleerr=$(grep "STATUS:" /tmp/omstatus.txt |grep -v ONLINE | wc -l)
abdbnum=$(grep ^INSTANCE:DB /tmp/omstatus.txt | awk '{print $3}' |grep -v ONLINE | wc -l)
dbnum=$(grep ^INSTANCE:DB /tmp/omstatus.txt | wc -l)
if [ $dbnum -ge 1 ] && [ $abdbnum -eq 0 ] && [ "$clstatus" == "Normal" ] && [ $moduleerr -eq 0 ];then
result_ok "集群服务检查:DB节点数为-->$dbnum , 异常节点数-->$abdbnum , 集群状态-->$clstatus, 不是ONLINE组件数量-->$moduleerr"
else
result_err "当前集群或数据库实例状态异常\$(cat /tmp/omstatus.txt)"
exit 4
fi
# 5. 检查 Gauss-T 版本信息
split1 "检查 Gauss-T 版本信息"
DB_Now_Ver=$(su - omm -c "zengine -v "| awk '{print $1}' | awk -F"GaussDB_T_" '{print $2}')
result_ok "检查数据库版本:${DB_Now_Ver}"
sleep 2
# 6. 检查升级可能需要的备份空间(可能涉及数据库UNDO文件,sysaux文件,system文件,redo文件,ctrl文件)
split1 "检查升级可能需要的备份空间"
#DB 至少需要的备份空间 MB
DB_bak_size_mb=$(su - omm -c 'cd $GSDB_DATA/data;ls | grep -Ei "redo|system|sysaux|undo|cntl" | xargs du -cm | grep total' | awk '{print $1}')
#当前备份空间剩余空间 MB
left_size_mb=$(su - omm -c 'df -Pm $GAUSSTMP | grep -v Filesystem' | awk '{print $4}')
#判断剩余空间是否满足升级需要
if [ $((DB_bak_size_mb + 2048)) -ge $left_size_mb ];then
result_err "剩余空间不足,退出脚本, 不进行升级操作"
exit 6
else
echo "剩余空间足够,需要:$((DB_bak_size_mb + 2048))mb,剩余:${left_size_mb}mb"
fi
# 7. 解压升级包,并查看升级包版本
split1 "解压升级包,并查看升级包版本"
cd /opt/software/gauss_upgrade
tar -zxf GaussDB_T*CENTOS7*.gz
if [ $(ls -l | grep '^d' | wc -l) -eq 1 ];then
dir=$(ls -l | grep '^d' |awk '{print $NF}')
echo "mv ${dir}/* .;rm -rf ${dir}"
mv ${dir}/* .;rm -rf ${dir}
fi
tar -zxf GaussDB_T*-CLUSTER-CENTOS*.tar.gz
tar -zxf GaussDB_T*-DATABASE-CENTOS*.tar.gz
cd GaussDB_T*-DATABASE-CENTOS*
tar -zxf GaussDB_T*-RUN-CENTOS*.tar.gz
pkg_ver=$(grep ^version /opt/software/gauss_upgrade/GaussDB_T*-DATABASE-CENTOS*/GaussDB_T*-RUN-CENTOS*/package.xml | cut -d'"' -f 2 | awk -F"GaussDB_T_" '{print $2}')
result_ok "/opt/software/gauss_upgrade 目录下存在安装包的版本是:${pkg_ver}"
# 8. 判断升级包软件版本是否可以升级(大于现有版本号)
# 网上获取版本号比较函数,依赖sort -V(-V, --version-sort natural sort of (version) numbers within text)
function version_ge() { test "$(echo "$@" | tr " " "\n" | sort -rV | head -n 1)" == "$1"; }
function version_gt() { test "$(echo "$@" | tr " " "\n" | sort -V | head -n 1)" != "$1"; }
function version_le() { test "$(echo "$@" | tr " " "\n" | sort -V | head -n 1)" == "$1"; }
function version_lt() { test "$(echo "$@" | tr " " "\n" | sort -rV | head -n 1)" != "$1"; }
# 比较升级包版本号跟数据库现有版本号,判断是否可以升级
if version_le $(echo $pkg_ver | tr -d "SPCB") $(echo $DB_Now_Ver | tr -d "SPCB");then
result_err "安装包版本 ≤ DB当前版本, 不能进行升级"
exit 8
else
result_ok "安装包版本 > DB当前版本, 可以进行升级"
fi
# 9. 是否需要进行备份
split1 "判断是否需要备份,及手动继续升级"
read -r -p "Do you need to backup the full database? [Y/N]:" bkup
if [ "$bkup" == "y" ] || [ "$bkup" == "Y" ];then
echo "# You want to backup database(你指定了需要备份数据库)."
read -r -p "Please enter a directory(Make sure there is enough space to store the backup files) to store the backup [default /data01/gaussdb/bak]:" bkdir
if [ -n $bkdir ];then
bkdir=/data01/gaussdb/bak
fi
if [ ! -d "$bkdir" ];then
mkdir -p $bkdir
fi
chown omm $bkdir
su - omm -c "zsql / as sysdba -q -c 'backup database full format '$bkdir/full_$(date +'%Y%m%d_%H%M').bak' parallelism 2 buffer size 128M;'"
elif [ "$bkup" == "n" ] || [ "$bkup" == "N" ];then
echo "# You have specified that you do not need to back up the database(你指定了不需要备份数据库)."
else
echo "# Please enter the correct parameter(请输入正确的参数)"
fi
# 是否继续升级手工确认
echo "# 请人工确认是否需要继续升级数据库,输入'Y'后继续,输入'N'退出"
read -r -p "Do you want to continue upgrading the database? [Y/N]:" goonbak
if [ "$goonbak" == "n" ] || [ "$goonbak" == "N" ];then
echo "# 退出,不进行数据库upgrade操作"
exit 9
elif [ "$goonbak" == "y" ] || [ "$goonbak" == "Y" ];then
echo "# 继续数据库upgrade操作"
else
echo "# 输入参数异常,请重新属于,仅剩一次机会,否则直接退出"
read -r -p "Do you want to continue upgrading the database? [Y/N]:" gobak
if [ "$gobak" == "y" ] || [ "$gobak" == "Y" ];then
echo "# 继续数据库upgrade操作"
else
echo "# 退出,不进行数据库upgrade操作"
fi
fi
# 10. DB 升级预检查
split1 "升级预检查 gs_preinstall, 单机和主备命令一致"
preupg="/opt/software/gauss_upgrade/script/gs_preinstall -U omm -G dbgrp -X /opt/software/config.xml --alarm-type=1 --operation=upgrade"
cat >/tmp/preupg.sh<<preupgsh
#!/bin/bash
expect <<EOF
set timeout 300
spawn ${preupg}
expect {
"(yes/no)" {send "yes\r"; exp_continue}
# Please enter password for root.
# Password:
"Password:" {send "$rootpwd\r"}
# Please enter password for omm.
# Password:
"Password:" {send "$ommpwd\n"}
}
expect eof
EOF
preupgsh
chmod +x /tmp/preupg.sh
cat /tmp/preupg.sh
echo -e "\n\n##### sh /tmp/preupg.sh >/tmp/gs_preinstall_$(date +'%Y%m%d').log\n"
sh /tmp/preupg.sh >/tmp/gs_preinstall_$(date +'%Y%m%d').log
cat /tmp/gs_preinstall_$(date +'%Y%m%d').log
if contain_str "Preinstallation succeeded." /tmp/gs_preinstall_$(date +'%Y%m%d').log;then
result_ok "gs_preinstall Check result: OK."
else
result_err "gs_preinstall Check result: Abnormal. Please check the result"
exit 9
fi
# 11. DB 正式升级
split1 "升级预检查 gs_upgradectl, 单机和主备命令进行区分"
if [ $dbnum -ge 2 ];then
# 主备升级参数
upgtype="online-upgrade"
rolerec="--role-recovery"
else
# 单机升级参数
upgtype="offline-upgrade"
rolerec=
fi
su - omm -lc 'source ~/.bashrc'
# precheck
echo -e "\n\n##### su - omm -c \"gs_upgradectl -t precheck --upgrade-type=$upgtype\" >/tmp/gs_upgrade_precheck_$(date +'%Y%m%d').log"
su - omm -lc "gs_upgradectl -t precheck --upgrade-type=$upgtype" >/tmp/gs_upgrade_precheck_$(date +'%Y%m%d').log
cat /tmp/gs_upgrade_precheck_$(date +'%Y%m%d').log
if contain_str "Check result: OK." /tmp/gs_upgrade_precheck_$(date +'%Y%m%d').log;then
result_ok "precheck Check result: OK."
else
result_err "precheck Check result: Abnormal. Please check the result"
exit 101
fi
# upgrade
echo -e "\n\n##### su - omm -c \"gs_upgradectl -t $upgtype -X /opt/software/config.xml $rolerec\" >/tmp/gs_upgrade_upgrade_$(date +'%Y%m%d').log"
echo "### 升级日志:tail -100 $GAUSSLOG/om/gs_upgradectl.log
### 若 Do single run 报错查看dn升级日志:tail -100 $GSDB_DATA/log/upgrade.log"
su - omm -lc "gs_upgradectl -t $upgtype -X /opt/software/config.xml $rolerec" >/tmp/gs_upgrade_upgrade_$(date +'%Y%m%d').log
cat /tmp/gs_upgrade_upgrade_$(date +'%Y%m%d').log
if contain_str "systable-upgrade succeeded." /tmp/gs_upgrade_upgrade_$(date +'%Y%m%d').log;then
result_ok "upgrade Check result: OK."
else
result_err "upgrade Check result: Abnormal. Please check the result"
exit 102
fi
su - omm -lc 'source ~/.bashrc'
# postcheck
echo -e "\n\n##### su - omm -lc \"gs_upgradectl -t postcheck --upgrade-type=$upgtype\" >/tmp/gs_upgrade_postcheck_$(date +'%Y%m%d').log"
su - omm -lc "gs_upgradectl -t postcheck --upgrade-type=$upgtype" >/tmp/gs_upgrade_postcheck_$(date +'%Y%m%d').log
cat /tmp/gs_upgrade_postcheck_$(date +'%Y%m%d').log
if contain_str "Check result: OK." /tmp/gs_upgrade_postcheck_$(date +'%Y%m%d').log;then
result_ok "postcheck Check result: OK."
else
result_err "postcheck Check result: Abnormal. Please check the result"
exit 103
fi
# 12. 如果有共享盘,升级后执行提权操作步骤
if [ $dbnum -ge 2 ] && [ $sharedisk -ge 1 ];then
split1 "存在共享盘,所有节点执行升级后执行提权操作步骤"
#1. 设置权限, 主备所有节点均执行
for h in $(grep ^INSTANCE:DB /tmp/omstatus.txt | awk '{print $4}' | cut -d: -f2);do
if [ "$h" == "$(hostname)" ];then
su - omm -c 'getcap $CM_HOME/cm'
cmhome=$(su - omm -c 'echo $CM_HOME/cm')
setcap CAP_SYS_RAWIO=ep ${cmhome}
else
expect <<EOF
set timeout 300
spawn ssh -o StrictHostKeyChecking=no $h "su - omm -c 'getcap $CM_HOME/cm';cmhome=$(su - omm -c 'echo $CM_HOME/cm');setcap CAP_SYS_RAWIO=ep \${cmhome}"
expect {
"(yes/no)" {send "yes\r"}
"*assword" {send "$rootpwd\r"}
}
expect eof
EOF
if [ $? -ne 0 ]; then
result_err "ssh $ip to execute command Failed."
exit 11
else
result_ok "Test $ip to execute command Successful."
fi
fi
done
#2. 重启 etcd
su - omm -c 'gs_om -t startetcd'
fi
# 13. 升级后状态检查
split1 "升级后状态检查"
echo "# 升级后集群状态:"
su - omm -c 'gs_om -t status'
echo -e "\n# 升级后版本信息:"
su - omm -c 'zengine -v'
DB_Now_Ver=$(su - omm -c "zengine -v "| awk '{print $1}' | awk -F"GaussDB_T_" '{print $2}')
if [ "${DB_Now_Ver}" == "${pkg_ver}" ];then
result_ok "Gauss-T Successfully Upgraded To The Given Software Version $pkg_ver"
else
result_err "Gauss-T Failed Upgrade To The Given Software Version $pkg_ver"
exit 12
fi
# 14. 已知 lib 文件丢失导致 DPA 备份失败问题修复
cat >/tmp/dpa_lib_repair.sh<<dparep
#!/bin/bash
GSHOME=$(su - omm -lc 'echo $GAUSS_HOME')
if [ ! -f ${GSHOME}/lib/libgaussdbmml.so ];then
echo "### lib 文件 ${GSHOME}/lib/libgaussdbmml.so 不存在"
dpafile=$(find /usr /opt /home /etc /lib -type f -iname libgaussdbmml.so | head -1)
if [ -n "${dpafile}" ];then
cp ${dpafile} ${GSHOME}/lib/libgaussdbmml.so
chown omm:dbgrp ${GSHOME}/lib/libgaussdbmml.so
chmod 600 ${GSHOME}/lib/libgaussdbmml.so
fi
ls -lh ${GSHOME}/lib/libgaussdbmml.so
else
echo "### 存在 lib 文件 ${GSHOME}/lib/libgaussdbmml.so"
ls -lh ${GSHOME}/lib/libgaussdbmml.so
fi
dparep
chmod +x /tmp/dpa_lib_repair.sh
sh /tmp/dpa_lib_repair.sh
if [ $dbnum -ge 2 ];then
for h in $(grep ^INSTANCE:DB /tmp/omstatus.txt | awk '{print $4}' | cut -d: -f2 | grep -v $(hostname));do
scp /tmp/dpa_lib_repair.sh ${h}:/tmp
ssh ${h} "sh /tmp/dpa_lib_repair.sh"
done
fi
# End Script
最后修改时间:2023-11-15 09:35:09
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




