MogDB数据库在运行过程中,可能会因为各种原因导致数据文件产生坏块。针对数据文件产生坏块这个场景,我们通过dd命令模拟产生坏块以及如何修复坏块,来实际测试验证。
准备环境
1.数据库版本:MogDB 5.0.6版本,
2.数据库架构:主从架构
3.安装Toolkits-5.0.6-CentOS-x86_64.tar.gz插件
4.准备测试数据
MogDB=# drop table if exists tctest;
DROP TABLE
MogDB=# create table tctest(
MogDB(# id int ,
MogDB(# info varchar(500),
MogDB(# val_int int,
MogDB(# val_float decimal(12,2),
MogDB(# crt_date date,
MogDB(# crt_time timestamp,
MogDB(# remark char(50)
MogDB(# )
MogDB-# WITH (autovacuum_enabled = off, toast.autovacuum_enabled = off);
CREATE TABLE
MogDB=#
MogDB=# insert into tctest(id,info,val_int,val_float,crt_date,crt_time,remark)
MogDB-# select
MogDB-# generate_series(1,1999999999999999) id,
MogDB-# md5(random()::text) info,
MogDB-# generate_series(100,1999999999999999) val_int,
MogDB-# random()*(10^5) val_float,
MogDB-# sysdate crt_date,--clock_timestamp(),
MogDB-# now() crt_time,
MogDB-# 'first'||(random()*(10^3))::integer remark
MogDB-# --, 'second'||(random()*(10^3))::integer second2
MogDB-# limit 15000000;
INSERT 0 15000000
MogDB=# select now();
now
-------------------------------
2024-04-23 17:17:01.267921+08
(1 row)
MogDB=#
MogDB=# \dx
List of installed extensions
Name | Version | Schema | Description
-----------------+---------+------------+-----------------------------------------------------------
dist_fdw | 1.0 | pg_catalog | foreign-data wrapper for distfs access
file_fdw | 1.0 | pg_catalog | foreign-data wrapper for flat file access
hstore | 1.1 | pg_catalog | data type for storing sets of (key, value) pairs
log_fdw | 1.0 | pg_catalog | Foreign Data Wrapper for accessing logging data
pageinspect | 1.0 | public | inspect the contents of database pages at a low level
pg_freespacemap | 1.0 | public | examine the free space map (FSM)
plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language
security_plugin | 1.0 | pg_catalog | provides security functionality
tidrangescan | 1.0 | pg_catalog | example implementation for custom-scan-provider interface
(9 rows)
MogDB=#
MogDB=# select pg_relation_filepath('tctest');
pg_relation_filepath
----------------------
base/16257/159310
(1 row)
MogDB=# \! ls -ltrh $PGDATA/base/16257/159310*
-rw------- 1 omm omm 1.0G Apr 23 17:15 /data/mogdb5.0/data/base/16257/159310
-rw------- 1 omm omm 1.0G Apr 23 17:17 /data/mogdb5.0/data/base/16257/159310.1
-rw------- 1 omm omm 584K Apr 23 17:17 /data/mogdb5.0/data/base/16257/159310_fsm
-rw------- 1 omm omm 206M Apr 23 17:17 /data/mogdb5.0/data/base/16257/159310.2
MogDB=#
模拟坏块产生和修复
查看第0数据块,有52条数据记录数。
MogDB=# SELECT * FROM heap_page_items(get_raw_page('tctest', 0));
lp | lp_off | lp_flags | lp_len | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask | t_hoff | t_bits | t_oid
----+--------+----------+--------+--------+--------+----------+--------+-------------+------------+--------+--------+-------
1 | 8040 | 1 | 147 | 325271 | 0 | 0 | (0,1) | 7 | 2050 | 24 | |
2 | 7888 | 1 | 147 | 325271 | 0 | 0 | (0,2) | 7 | 2050 | 24 | |
3 | 7736 | 1 | 147 | 325271 | 0 | 0 | (0,3) | 7 | 2050 | 24 | |
4 | 7584 | 1 | 147 | 325271 | 0 | 0 | (0,4) | 7 | 2050 | 24 | |
5 | 7432 | 1 | 147 | 325271 | 0 | 0 | (0,5) | 7 | 2050 | 24 | |
6 | 7280 | 1 | 147 | 325271 | 0 | 0 | (0,6) | 7 | 2050 | 24 | |
7 | 7128 | 1 | 147 | 325271 | 0 | 0 | (0,7) | 7 | 2050 | 24 | |
8 | 6976 | 1 | 147 | 325271 | 0 | 0 | (0,8) | 7 | 2050 | 24 | |
9 | 6824 | 1 | 147 | 325271 | 0 | 0 | (0,9) | 7 | 2050 | 24 | |
10 | 6672 | 1 | 147 | 325271 | 0 | 0 | (0,10) | 7 | 2050 | 24 | |
11 | 6520 | 1 | 147 | 325271 | 0 | 0 | (0,11) | 7 | 2050 | 24 | |
12 | 6368 | 1 | 147 | 325271 | 0 | 0 | (0,12) | 7 | 2050 | 24 | |
13 | 6216 | 1 | 147 | 325271 | 0 | 0 | (0,13) | 7 | 2050 | 24 | |
14 | 6064 | 1 | 147 | 325271 | 0 | 0 | (0,14) | 7 | 2050 | 24 | |
15 | 5912 | 1 | 147 | 325271 | 0 | 0 | (0,15) | 7 | 2050 | 24 | |
16 | 5760 | 1 | 147 | 325271 | 0 | 0 | (0,16) | 7 | 2050 | 24 | |
17 | 5608 | 1 | 147 | 325271 | 0 | 0 | (0,17) | 7 | 2050 | 24 | |
18 | 5456 | 1 | 147 | 325271 | 0 | 0 | (0,18) | 7 | 2050 | 24 | |
19 | 5304 | 1 | 147 | 325271 | 0 | 0 | (0,19) | 7 | 2050 | 24 | |
20 | 5152 | 1 | 147 | 325271 | 0 | 0 | (0,20) | 7 | 2050 | 24 | |
21 | 5000 | 1 | 147 | 325271 | 0 | 0 | (0,21) | 7 | 2050 | 24 | |
22 | 4848 | 1 | 147 | 325271 | 0 | 0 | (0,22) | 7 | 2050 | 24 | |
23 | 4696 | 1 | 147 | 325271 | 0 | 0 | (0,23) | 7 | 2050 | 24 | |
24 | 4544 | 1 | 147 | 325271 | 0 | 0 | (0,24) | 7 | 2050 | 24 | |
25 | 4392 | 1 | 147 | 325271 | 0 | 0 | (0,25) | 7 | 2050 | 24 | |
26 | 4240 | 1 | 147 | 325271 | 0 | 0 | (0,26) | 7 | 2050 | 24 | |
27 | 4088 | 1 | 147 | 325271 | 0 | 0 | (0,27) | 7 | 2050 | 24 | |
28 | 3936 | 1 | 147 | 325271 | 0 | 0 | (0,28) | 7 | 2050 | 24 | |
29 | 3784 | 1 | 147 | 325271 | 0 | 0 | (0,29) | 7 | 2050 | 24 | |
30 | 3632 | 1 | 147 | 325271 | 0 | 0 | (0,30) | 7 | 2050 | 24 | |
31 | 3480 | 1 | 147 | 325271 | 0 | 0 | (0,31) | 7 | 2050 | 24 | |
32 | 3328 | 1 | 147 | 325271 | 0 | 0 | (0,32) | 7 | 2050 | 24 | |
33 | 3176 | 1 | 147 | 325271 | 0 | 0 | (0,33) | 7 | 2050 | 24 | |
34 | 3024 | 1 | 147 | 325271 | 0 | 0 | (0,34) | 7 | 2050 | 24 | |
35 | 2872 | 1 | 147 | 325271 | 0 | 0 | (0,35) | 7 | 2050 | 24 | |
36 | 2720 | 1 | 147 | 325271 | 0 | 0 | (0,36) | 7 | 2050 | 24 | |
37 | 2568 | 1 | 147 | 325271 | 0 | 0 | (0,37) | 7 | 2050 | 24 | |
38 | 2416 | 1 | 147 | 325271 | 0 | 0 | (0,38) | 7 | 2050 | 24 | |
39 | 2264 | 1 | 147 | 325271 | 0 | 0 | (0,39) | 7 | 2050 | 24 | |
40 | 2112 | 1 | 147 | 325271 | 0 | 0 | (0,40) | 7 | 2050 | 24 | |
41 | 1960 | 1 | 147 | 325271 | 0 | 0 | (0,41) | 7 | 2050 | 24 | |
42 | 1808 | 1 | 147 | 325271 | 0 | 0 | (0,42) | 7 | 2050 | 24 | |
43 | 1656 | 1 | 147 | 325271 | 0 | 0 | (0,43) | 7 | 2050 | 24 | |
44 | 1504 | 1 | 147 | 325271 | 0 | 0 | (0,44) | 7 | 2050 | 24 | |
45 | 1352 | 1 | 147 | 325271 | 0 | 0 | (0,45) | 7 | 2050 | 24 | |
46 | 1200 | 1 | 147 | 325271 | 0 | 0 | (0,46) | 7 | 2050 | 24 | |
47 | 1048 | 1 | 147 | 325271 | 0 | 0 | (0,47) | 7 | 2050 | 24 | |
48 | 896 | 1 | 147 | 325271 | 0 | 0 | (0,48) | 7 | 2050 | 24 | |
49 | 744 | 1 | 147 | 325271 | 0 | 0 | (0,49) | 7 | 2050 | 24 | |
50 | 592 | 1 | 147 | 325271 | 0 | 0 | (0,50) | 7 | 2050 | 24 | |
51 | 440 | 1 | 147 | 325271 | 0 | 0 | (0,51) | 7 | 2050 | 24 | |
52 | 288 | 1 | 147 | 325271 | 0 | 0 | (0,52) | 7 | 2050 | 24 | |
(52 rows)
MogDB=# select count(*),now(),get_hostname() from tctest;
count | now | get_hostname
----------+-------------------------------+--------------
15000000 | 2024-04-23 17:47:52.381779+08 | mogdb114
(1 row)
使用dd模拟第0个数据块损坏,使用dd模拟数据块损坏需要加conv=notrunc 会阻止 dd 截断表的其余部分。如果不加conv=notrunc,将会丢失大量数据。
MogDB=# \! dd bs=8192 seek=0 count=1 of=/data/mogdb5.0/data/base/16257/159310 if=/dev/urandom conv=notrunc 1+0 records in 1+0 records out 8192 bytes (8.2 kB) copied, 0.000223079 s, 36.7 MB/s MogDB=# MogDB=# select * from tctest where id=17; WARNING: page verification failed, calculated checksum 25552 but expected 28289, the block num is 0 WARNING: invalid page in block 0 of relation base/16257/159310, try to remote read id | info | val_int | val_float | crt_date | crt_time | remark ----+----------------------------------+---------+-----------+---------------------+----------------------------+---------------------------------------------------- 17 | f515553b743c64fa6698afafdf663213 | 116 | 89336.24 | 2024-04-23 17:12:25 | 2024-04-23 17:12:25.216547 | first283 (1 row) MogDB=# select * from local_bad_block_info(); node_name | spc_node | db_node | rel_node | bucket_node | fork_num | block_num | file_path | check_time | repair_time -------------------+----------+---------+----------+-------------+----------+-----------+-------------------+-------------------------------+------------------------------- dn_6001_6002_6003 | 1663 | 16257 | 159310 | -1 | 0 | 0 | base/16257/159310 | 2024-04-23 19:12:16.262506+08 | 2024-04-23 19:12:16.282364+08 (1 row) MogDB=# MogDB=# select count(*),now(),get_hostname() from tctest; count | now | get_hostname ----------+-------------------------------+-------------- 15000000 | 2024-04-23 19:14:29.989648+08 | mogdb114 (1 row) MogDB=# MogDB=# select * from local_clear_bad_block_info(); <<<<<<<<<<清理local_bad_block_info中已修复页面的数据 result -------- t (1 row) MogDB=# select * from local_bad_block_info(); node_name | spc_node | db_node | rel_node | bucket_node | fork_num | block_num | file_path | check_time | repair_time -----------+----------+---------+----------+-------------+----------+-----------+-----------+------------+------------- (0 rows) MogDB=#
小结
MogDB主库如果发生了坏块,触发到坏块的操作会自动从备库获取坏块并修复主库损坏的坏块,local_bad_block_info()函数会记录坏块对应的数据文件和坏块的编号,建议该函数加入到告警,及时发现主库是否有坏块。
最后修改时间:2024-04-25 23:16:06
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




