零、常用术语
- IUD:Insert;Upadte;Delete。
- WAL:Write Ahead Log,指的事务日志,或者将行为写入事务日志的实现机制。
- XLOG记录 约等于 WAL数据:PG将所有修改(IUD)作为历史数据写入持久化存储(硬盘)。
- 日志序列号(LSN,Log Sequence Number)。
一、概述
1.1 WAL的流程简要说明
当IUD发生时,PG将XLOG记录写入内存中的WAL缓存区。
当事务提交时,PG将XLOG记录立即写入到持久化存储(硬盘)的WAL段文件中。
简单说就是两步骤,第一步写入内存,第二步写入硬盘。
1.2 WAL存在的必要性
记录历史,用于出现故障后的数据恢复。
也是时间点恢复(Point-inTime Recovery)和流复制(Stream Replication)的实现基础。
1.3 没有WAL可能会出现的问题(复述问题出现的流程)
- 发起IUD到TableA
- PG将TableA的页面加载到内存的共享缓冲池
- IUD修改此页面,形成脏页
- 断电,此时脏页尚未持久化回硬盘,数据丢失
二、WAL段文件
2.1 WAL段文件概述
WAL段文件的大小为16MB,可以通过wal_segsize配置,默认大小为16MB。
gpadmin@gpadmin-virtual-machine ~/pg1212> cd $PG_HOME/data/pg_wal/
gpadmin@gpadmin-virtual-machine ~/p/d/pg_wal> pwd
/home/gpadmin/pg1212/data/pg_wal
gpadmin@gpadmin-virtual-machine ~/p/d/pg_wal> ls -lh
total 17M
-rw------- 1 gpadmin gpadmin 16M 8月 23 17:38 000000010000000000000001
drwx------ 2 gpadmin gpadmin 4.0K 8月 22 17:53 archive_status/
gpadmin@gpadmin-virtual-machine ~/p/d/pg_wal> pg_waldump 000000010000000000000001
2.2 WAL段文件内部布局
WAL段文件默认大小16MB,内包含2000个8KB的页面。
其中,XLogRecord包含两个重要变量xl_rmid(资源管理器)和xl_info。
/*
* The overall layout of an XLOG record is:
* Fixed-size header (XLogRecord struct)
* XLogRecordBlockHeader struct
* XLogRecordBlockHeader struct
* ...
* XLogRecordDataHeader[Short|Long] struct
* block data
* block data
* ...
* main data
*
* There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
* rmgr-specific data not associated with a block. XLogRecord structs
* always start on MAXALIGN boundaries in the WAL files, but the rest of
* the fields are not aligned.
*
* The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
* XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
* used to distinguish between block references, and the main data structs.
*/
typedef struct XLogRecord
{
uint32 xl_tot_len; /* total len of entire record */
TransactionId xl_xid; /* xact id */
XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */
/* 2 bytes of padding here, initialize to zero */
pg_crc32c xl_crc; /* CRC for this record */
/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
} XLogRecord;
| 语句 | XlogRecord | 故障恢复执行函数 |
|---|---|---|
| insert | xl_rmid = RM_HEAP; xl_info = XLOG_HEAP_INSERT | heap_xlog_insert() |
| update | xl_rmid = RM_HEAP; xl_info = XLOG_HEAP_UPDATE | heap_xlog_update() |
| 事务提交 | xl_rmid = RM_XACT; xl_info = XLOG_XACT_COMMIT | xact_redo_commit() |
/*
* List of resource manager entries. Note that order of entries defines the
* numerical values of each rmgr's ID, which is stored in WAL records. New
* entries should be added at the end, to avoid changing IDs of existing
* entries.
*
* Changes to this list possibly need an XLOG_PAGE_MAGIC bump.
*/
/* symbol name, textual name, redo, desc, identify, startup, cleanup */
PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL)
PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL)
PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL)
PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL)
PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL)
PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL)
PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL, NULL)
PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL, NULL)
PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL)
PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask)
PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask)
PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, NULL, NULL, btree_mask)
PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask)
PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask)
PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask)
PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask)
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask)
PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask)
PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL)
PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL)
PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask)
PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL)
/*
* WAL record definitions for heapam.c's WAL operations
*
* XLOG allows to store some information in high 4 bits of log
* record xl_info field. We use 3 for opcode and one for init bit.
*/
#define XLOG_HEAP_INSERT 0x00
#define XLOG_HEAP_DELETE 0x10
#define XLOG_HEAP_UPDATE 0x20
#define XLOG_HEAP_TRUNCATE 0x30
#define XLOG_HEAP_HOT_UPDATE 0x40
#define XLOG_HEAP_CONFIRM 0x50
#define XLOG_HEAP_LOCK 0x60
#define XLOG_HEAP_INPLACE 0x70
#define XLOG_HEAP_OPMASK 0x70
/*
* When we insert 1st item on new page in INSERT, UPDATE, HOT_UPDATE,
* or MULTI_INSERT, we can (and we do) restore entire page in redo
*/
#define XLOG_HEAP_INIT_PAGE 0x80
三、WAL日志的写入和写入进程
3.1 发生的时间
- 运行中的事务提交或者中止
- WAL缓冲区被写入的元组填满(与wal_buffers有关)
- WAL写入进程周期性执行写入操作
3.2 WAL写入进程
- 后台进程,每200ms检查WAL缓冲区并写入(wal_write_delay = 200ms)
3.3 WAL记录的写入流程(以insert为例子)
| 调用栈 | 功能 |
|---|---|
| exec_simple_query() | psql执行IUD |
| ExtendCLOG() | 将当前事务的状态IN_PROGRESS写入内存CLOG |
| heap_insert() | 插入元组 |
| XLogInsert() | 将XLOG记录写入WAL缓冲区(内存) |
| XLogWrite() | 将XLOG记录写入WAL段文件(硬盘) |
| TransactionIdCommitTree() | 将CLOG状态从IN_PROGRESS变成COMMITED |
void
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
int options, BulkInsertState bistate)
{
TransactionId xid = GetCurrentTransactionId();
/* 省略若干行 */
if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation))
{
uint8 info = XLOG_HEAP_INSERT;
// 写入日志前的准备
XLogBeginInsert();
// 注册生成日志相关的数据
XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
// 执行日志插入
recptr = XLogInsert(RM_HEAP_ID, info);
// 设置page的LSN
PageSetLSN(page, recptr);
}
}
引用文献
-《PostgresSQL指南-内幕探索》铃木启修
-
postgresql 12.12 源码
-
WAL日志博客
①- 日志格式
②- 日志初始化
③- 日志的注册
④- 日志组装
⑤- 日志写入WAL Buffer
⑥- 等待日志完成WAL Buffer写入
⑦- 日志落盘上次函数XLogFlush
⑧ - 日志真正落盘函数 XLogWrite
⑨ - 从insert记录看日志写入整体流程
⑩ - 全页写机制
最后修改时间:2023-04-27 11:26:16
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




