暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

Postgres WAL 知识点归纳

原创 Yongtao 2023-04-19
702

零、常用术语

  • IUD:Insert;Upadte;Delete。
  • WAL:Write Ahead Log,指的事务日志,或者将行为写入事务日志的实现机制。
  • XLOG记录 约等于 WAL数据:PG将所有修改(IUD)作为历史数据写入持久化存储(硬盘)。
  • 日志序列号(LSN,Log Sequence Number)。

一、概述

1.1 WAL的流程简要说明

当IUD发生时,PG将XLOG记录写入内存中的WAL缓存区。
当事务提交时,PG将XLOG记录立即写入到持久化存储(硬盘)的WAL段文件中。

简单说就是两步骤,第一步写入内存,第二步写入硬盘。

1.2 WAL存在的必要性

记录历史,用于出现故障后的数据恢复。
也是时间点恢复(Point-inTime Recovery)和流复制(Stream Replication)的实现基础。

1.3 没有WAL可能会出现的问题(复述问题出现的流程)
  1. 发起IUD到TableA
  2. PG将TableA的页面加载到内存的共享缓冲池
  3. IUD修改此页面,形成脏页
  4. 断电,此时脏页尚未持久化回硬盘,数据丢失

二、WAL段文件

2.1 WAL段文件概述

WAL段文件的大小为16MB,可以通过wal_segsize配置,默认大小为16MB。

gpadmin@gpadmin-virtual-machine ~/pg1212> cd $PG_HOME/data/pg_wal/ gpadmin@gpadmin-virtual-machine ~/p/d/pg_wal> pwd /home/gpadmin/pg1212/data/pg_wal gpadmin@gpadmin-virtual-machine ~/p/d/pg_wal> ls -lh total 17M -rw------- 1 gpadmin gpadmin 16M 8月 23 17:38 000000010000000000000001 drwx------ 2 gpadmin gpadmin 4.0K 8月 22 17:53 archive_status/ gpadmin@gpadmin-virtual-machine ~/p/d/pg_wal> pg_waldump 000000010000000000000001
2.2 WAL段文件内部布局

WAL段文件默认大小16MB,内包含2000个8KB的页面。

其中,XLogRecord包含两个重要变量xl_rmid(资源管理器)和xl_info。

/* * The overall layout of an XLOG record is: * Fixed-size header (XLogRecord struct) * XLogRecordBlockHeader struct * XLogRecordBlockHeader struct * ... * XLogRecordDataHeader[Short|Long] struct * block data * block data * ... * main data * * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of * rmgr-specific data not associated with a block. XLogRecord structs * always start on MAXALIGN boundaries in the WAL files, but the rest of * the fields are not aligned. * * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's * used to distinguish between block references, and the main data structs. */ typedef struct XLogRecord { uint32 xl_tot_len; /* total len of entire record */ TransactionId xl_xid; /* xact id */ XLogRecPtr xl_prev; /* ptr to previous record in log */ uint8 xl_info; /* flag bits, see below */ RmgrId xl_rmid; /* resource manager for this record */ /* 2 bytes of padding here, initialize to zero */ pg_crc32c xl_crc; /* CRC for this record */ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ } XLogRecord;
语句 XlogRecord 故障恢复执行函数
insert xl_rmid = RM_HEAP; xl_info = XLOG_HEAP_INSERT heap_xlog_insert()
update xl_rmid = RM_HEAP; xl_info = XLOG_HEAP_UPDATE heap_xlog_update()
事务提交 xl_rmid = RM_XACT; xl_info = XLOG_XACT_COMMIT xact_redo_commit()
/* * List of resource manager entries. Note that order of entries defines the * numerical values of each rmgr's ID, which is stored in WAL records. New * entries should be added at the end, to avoid changing IDs of existing * entries. * * Changes to this list possibly need an XLOG_PAGE_MAGIC bump. */ /* symbol name, textual name, redo, desc, identify, startup, cleanup */ PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL) PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL) PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL) PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL) PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL) PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL) PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL, NULL) PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL, NULL) PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL) PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask) PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask) PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, NULL, NULL, btree_mask) PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask) PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask) PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask) PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask) PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask) PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask) PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL) PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL)
/* * WAL record definitions for heapam.c's WAL operations * * XLOG allows to store some information in high 4 bits of log * record xl_info field. We use 3 for opcode and one for init bit. */ #define XLOG_HEAP_INSERT 0x00 #define XLOG_HEAP_DELETE 0x10 #define XLOG_HEAP_UPDATE 0x20 #define XLOG_HEAP_TRUNCATE 0x30 #define XLOG_HEAP_HOT_UPDATE 0x40 #define XLOG_HEAP_CONFIRM 0x50 #define XLOG_HEAP_LOCK 0x60 #define XLOG_HEAP_INPLACE 0x70 #define XLOG_HEAP_OPMASK 0x70 /* * When we insert 1st item on new page in INSERT, UPDATE, HOT_UPDATE, * or MULTI_INSERT, we can (and we do) restore entire page in redo */ #define XLOG_HEAP_INIT_PAGE 0x80

三、WAL日志的写入和写入进程

3.1 发生的时间
  • 运行中的事务提交或者中止
  • WAL缓冲区被写入的元组填满(与wal_buffers有关)
  • WAL写入进程周期性执行写入操作
3.2 WAL写入进程
  • 后台进程,每200ms检查WAL缓冲区并写入(wal_write_delay = 200ms)
3.3 WAL记录的写入流程(以insert为例子)
调用栈 功能
exec_simple_query() psql执行IUD
ExtendCLOG() 将当前事务的状态IN_PROGRESS写入内存CLOG
heap_insert() 插入元组
XLogInsert() 将XLOG记录写入WAL缓冲区(内存)
XLogWrite() 将XLOG记录写入WAL段文件(硬盘)
TransactionIdCommitTree() 将CLOG状态从IN_PROGRESS变成COMMITED
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate) { TransactionId xid = GetCurrentTransactionId(); /* 省略若干行 */ if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation)) { uint8 info = XLOG_HEAP_INSERT; // 写入日志前的准备 XLogBeginInsert(); // 注册生成日志相关的数据 XLogRegisterData((char *) &xlrec, SizeOfHeapInsert); // 执行日志插入 recptr = XLogInsert(RM_HEAP_ID, info); // 设置page的LSN PageSetLSN(page, recptr); } }

引用文献

-《PostgresSQL指南-内幕探索》铃木启修

最后修改时间:2023-04-27 11:26:16
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论