openGauss每日一练第19天 openGauss收集统计信息、打印执行计划、垃圾收集和checkpoint
1.准备数据
Create schema tpcds;
CREATE TABLE tpcds.customer_address
(
ca_address_sk integer NOT NULL ,
ca_address_id character(16),
ca_street_number character(10) ,
ca_street_name character varying(60) ,
ca_street_type character(15) ,
ca_suite_number character(10) ,
ca_city character varying(60) ,
ca_county character varying(30) ,
ca_state character(2) ,
ca_zip character(10) ,
ca_country character varying(20) ,
ca_gmt_offset numeric(5,2) ,
ca_location_type character(20)
);
insert into tpcds.customer_address values
(1, ‘AAAAAAAABAAAAAAA’, ‘18’, ‘Jackson’, ‘Parkway’, ‘Suite 280’, ‘Fairfield’, ‘Maricopa County’, ‘AZ’, ‘86192’ ,‘United States’, -7.00, ‘condo’),
(2, ‘AAAAAAAACAAAAAAA’, ‘362’, ‘Washington 6th’, ‘RD’, ‘Suite 80’, ‘Fairview’, ‘Taos County’, ‘NM’, ‘85709’, ‘United States’, -7.00, ‘condo’),
(3, ‘AAAAAAAADAAAAAAA’, ‘585’, ‘Dogwood Washington’, ‘Circle’, ‘Suite Q’, ‘Pleasant Valley’, ‘York County’, ‘PA’, ‘12477’, ‘United States’, -5.00, ‘single family’);
–使用序列的generate_series(1,N)函数对表插入数据
insert into tpcds.customer_address values(generate_series(10, 10000));
2.收集统计信息
–查看系统表中表的统计信息
select relname, relpages, reltuples from pg_class where relname = ‘customer_address’;
—使用ANALYZE VERBOSE语句更新统计信息,并输出表的相关信息
analyze VERBOSE tpcds.customer_address;
–查看系统表中表的统计信息
select relname, relpages, reltuples from pg_class where relname = ‘customer_address’;
3.打印执行计划
–使用默认的打印格式
SET explain_perf_mode=normal;
–显示表简单查询的执行计划
EXPLAIN SELECT * FROM tpcds.customer_address;
–以JSON格式输出的执行计划(explain_perf_mode为normal时)
EXPLAIN(FORMAT JSON) SELECT * FROM tpcds.customer_address;
–禁止开销估计的执行计划
EXPLAIN(COSTS FALSE)SELECT * FROM tpcds.customer_address;
–带有聚集函数查询的执行计划
EXPLAIN SELECT SUM(ca_address_sk) FROM tpcds.customer_address WHERE ca_address_sk<100;
–有索引条件的执行计划
create index customer_address_idx on tpcds.customer_address(ca_address_sk);
EXPLAIN SELECT * FROM tpcds.customer_address WHERE ca_address_sk<100;
4.垃圾收集
–VACUUM回收表或B-Tree索引中已经删除的行所占据的存储空间
update tpcds.customer_address set ca_address_sk = ca_address_sk + 1 where ca_address_sk <100;
VACUUM (VERBOSE, ANALYZE) tpcds.customer_address;
5.事务日志检查点
–检查点(CHECKPOINT)是一个事务日志中的点,所有数据文件都在该点被更新以反映日志中的信息,所有数据文件都将被刷新到磁盘
CHECKPOINT;
6.清理数据
drop schema tpcds cascade;
课后作业
1.创建分区表,并用generate_series(1,N)函数对表插入数据
omm=# create schema my_schema;
CREATE SCHEMA
omm=# create table my_schema.product
omm-# (
omm(# product_id integer,
omm(# product_name char(30)
omm(# )
omm-# partition by range(product_id)
omm-# (
omm(# partition p0 values less than (500),
omm(# partition p1 values less than (1000),
omm(# partition p2 values less than (2000),
omm(# partition p3 values less than (maxvalue)
omm(# );
CREATE TABLE
omm=# insert into my_schema.product values(generate_series(1,10000));
INSERT 0 10000
omm=# select * from my_schema.product limit 10;
product_id | product_name
------------±-------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
(10 rows)
2.收集表统计信息
omm=# analyze VERBOSE my_schema.product;
INFO: analyzing “my_schema.product”(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 3 of 3 pages, containing 499 live rows and 0 dead rows; 499 rows in sample, 499 estimated total rows(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 3 of 3 pages, containing 500 live rows and 0 dead rows; 500 rows in sample, 500 estimated total rows(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 5 of 5 pages, containing 1000 live rows and 0 dead rows; 1000 rows in sample, 1000 estimated total rows(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 36 of 36 pages, containing 8001 live rows and 0 dead rows; 8001 rows in sample, 8001 estimated total rows(gaussdb pid=1)
ANALYZE
omm=# select relname, relpages, reltuples from pg_class where relname = ‘product’;
relname | relpages | reltuples
---------±---------±----------
product | 47 | 10000
(1 row)
3.显示简单查询的执行计划;建立索引并显示有索引条件的执行计划
omm=# SET explain_perf_mode=normal;
SET
omm=# explain select * from my_schema.product;
QUERY PLAN
omm=# Partition Iterator (cost=0.00…147.00 rows=10000 width=128)
Iterations: 4
-> Partitioned Seq Scan on product (cost=0.00…147.00 rows=10000 width=128)
Selected Partitions: 1…4
(4 rows)
omm=# create index product_id_index on my_schema.product(product_id);
CREATE INDEX
omm=# explain select * from my_schema.product where product_id > 5000;
QUERY PLAN
Index Scan using product_id_index on product (cost=0.00…112.75 rows=5000 width=128)
Index Cond: (product_id > 5000)
(2 rows)
4.更新表数据,并做垃圾收集
omm=# update my_schema.product set product_id = product_id - 100 where product_id > 3000;
UPDATE 7000
omm=# VACUUM (VERBOSE, ANALYZE) my_schema.product;
INFO: vacuuming “my_schema.product”(gaussdb pid=1)
DETAIL: 0 dead row versions cannot be removed yet.
There were 0 unused item pointers.
0 pages are entirely empty.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: index “product_id_index” now contains 499 row versions in 68 pages(gaussdb pid=1)
DETAIL: 0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: “product”: found 0 removable, 499 nonremovable row versions in 3 out of 3 pages(gaussdb pid=1)INFO: vacuuming “my_schema.product”(gaussdb pid=1)
INFO: index “product_id_index” now contains 500 row versions in 68 pages(gaussdb pid=1)
DETAIL: 0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: “product”: found 0 removable, 500 nonremovable row versions in 3 out of 3 pages(gaussdb pid=1)
DETAIL: 0 dead row versions cannot be removed yet.
There were 0 unused item pointers.
0 pages are entirely empty.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: vacuuming “my_schema.product”(gaussdb pid=1)
INFO: index “product_id_index” now contains 1000 row versions in 68 pages(gaussdb pid=1)
DETAIL: 0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: “product”: found 0 removable, 1000 nonremovable row versions in 5 out of 5 pages(gaussdb pid=1)
DETAIL: 0 dead row versions cannot be removed yet.
There were 0 unused item pointers.
0 pages are entirely empty.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: vacuuming “my_schema.product”(gaussdb pid=1)
INFO: index “product_id_index” now contains 15001 row versions in 68 pages(gaussdb pid=1)
DETAIL: 0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: “product”: found 0 removable, 15001 nonremovable row versions in 67 out of 67 pages(gaussdb pid=1)
DETAIL: 7000 dead row versions cannot be removed yet.
There were 0 unused item pointers.
0 pages are entirely empty.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: scanned index “product_id_index” to remove 0.000000 invisible rows(gaussdb pid=1)
DETAIL: CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: analyzing “my_schema.product”(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 3 of 3 pages, containing 499 live rows and 0 dead rows; 499 rows in sample, 499 estimated total rows(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 3 of 3 pages, containing 500 live rows and 0 dead rows; 500 rows in sample, 500 estimated total rows(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 5 of 5 pages, containing 1000 live rows and 0 dead rows; 1000 rows in sample, 1000 estimated total rows(gaussdb pid=1)
INFO: ANALYZE INFO : “product”: scanned 67 of 67 pages, containing 8001 live rows and 7000 dead rows; 8001 rows in sample, 8001 estimated total rows(gaussdb pid=1)
VACUUM
omm=# CHECKPOINT;
CHECKPOINT
5.清理数据
omm=# drop schema my_schema cascade;
NOTICE: drop cascades to table my_schema.product
DROP SCHEMA




