openGauss每日一练第 19 天 | 系统操作

原创海潮 2021-12-31

270

👉openGauss SQL学习参考

学习目标

学习openGauss收集统计信息、打印执行计划、垃圾收集和 checkpoint

课程学习

连接openGauss

root@modb:~# su - omm
omm@modb:~$ gsql -r

1.准备数据

mydb=# Create schema tpcds;
CREATE SCHEMA
mydb=# CREATE TABLE tpcds.customer_address
(
  ca_address_sk     integer NOT NULL ,
  ca_address_id     character(16),
  ca_street_number  character(10) ,
  ca_street_name    character varying(60) ,
  ca_street_type    character(15) ,
  ca_suite_number   character(10) ,
  ca_city           character varying(60) ,
  ca_county         character varying(30) ,
  ca_state          character(2) ,
  ca_zip            character(10) ,
  ca_country        character varying(20) ,
  ca_gmt_offset     numeric(5,2) ,
  ca_location_type  character(20)
);
CREATE TABLE
mydb=# insert into tpcds.customer_address values
(1, 'AAAAAAAABAAAAAAA', '18', 'Jackson', 'Parkway', 'Suite 280', 'Fairfield', 'Maricopa County', 'AZ', '86192' ,'United States', -7.00, 'condo'),
(2, 'AAAAAAAACAAAAAAA', '362', 'Washington 6th', 'RD', 'Suite 80', 'Fairview', 'Taos County', 'NM', '85709', 'United States', -7.00, 'condo'),
(3, 'AAAAAAAADAAAAAAA', '585', 'Dogwood Washington', 'Circle', 'Suite Q', 'Pleasant Valley', 'York County', 'PA', '12477', 'United States', -5.00, 'single family');

mydb=# select * from tpcds.customer_address;
 ca_address_sk |  ca_address_id   | ca_street_number |   ca_street_name   | ca_street_type  | ca_suite_number |     ca_city     |    ca_county    | ca_sta
te |   ca_zip   |  ca_country   | ca_gmt_offset |   ca_location_type   
---------------+------------------+------------------+--------------------+-----------------+-----------------+-----------------+-----------------+-------
---+------------+---------------+---------------+----------------------
             1 | AAAAAAAABAAAAAAA | 18               | Jackson            | Parkway         | Suite 280       | Fairfield       | Maricopa County | AZ    
   | 86192      | United States |         -7.00 | condo               
             2 | AAAAAAAACAAAAAAA | 362              | Washington 6th     | RD              | Suite 80        | Fairview        | Taos County     | NM    
   | 85709      | United States |         -7.00 | condo               
             3 | AAAAAAAADAAAAAAA | 585              | Dogwood Washington | Circle          | Suite Q         | Pleasant Valley | York County     | PA    
   | 12477      | United States |         -5.00 | single family       
(3 rows)

mydb=# 

--使用序列的generate_series(1,N)函数对表插入数据
mydb=# insert into tpcds.customer_address values(generate_series(10, 10000));
INSERT 0 9991
mydb=#

2.收集统计信息

––查看系统表中表的统计信息
mydb=# select relname, relpages, reltuples from pg_class where relname = 'customer_address';
     relname      | relpages | reltuples 
------------------+----------+-----------
 customer_address |        0 |         0
(1 row)

mydb=#

--使用ANALYZE VERBOSE语句更新统计信息，并输出表的相关信息
mydb=# analyze VERBOSE tpcds.customer_address;
INFO:  analyzing "tpcds.customer_address"(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "customer_address": scanned 55 of 55 pages, containing 9994 live rows and 0 dead rows; 9994 rows in sample, 9994 estimated total rows(dn_6001 pid=9824)
ANALYZE
mydb=# 

–查看系统表中表的统计信息
mydb=# select relname, relpages, reltuples from pg_class where relname = 'customer_address';
     relname      | relpages | reltuples 
------------------+----------+-----------
 customer_address |       55 |      9994
(1 row)

mydb=#

3.打印执行计划

--使用默认的打印格式
mydb=# SET explain_perf_mode=normal;
SET
mydb=# 

--显示表简单查询的执行计划
mydb=#  EXPLAIN SELECT * FROM tpcds.customer_address;
                              QUERY PLAN                               
-----------------------------------------------------------------------
 Seq Scan on customer_address  (cost=0.00..154.94 rows=9994 width=151)
(1 row)

mydb=# 

--以JSON格式输出的执行计划（ explain_perf_mode 为 normal 时）
mydb=# EXPLAIN(FORMAT JSON) SELECT * FROM tpcds.customer_address;
                 QUERY PLAN                 
--------------------------------------------
 [                                         +
   {                                       +
     "Plan": {                             +
       "Node Type": "Seq Scan",            +
       "Relation Name": "customer_address",+
       "Alias": "customer_address",        +
       "Startup Cost": 0.00,               +
       "Total Cost": 154.94,               +
       "Plan Rows": 9994,                  +
       "Plan Width": 151                   +
     }                                     +
   }                                       +
 ]
(1 row)

mydb=# 

--禁止开销估计的执行计划
mydb=# EXPLAIN(COSTS FALSE) SELECT * FROM tpcds.customer_address;
          QUERY PLAN          
------------------------------
 Seq Scan on customer_address
(1 row)

mydb=# 

--带有聚集函数查询的执行计划
mydb=# EXPLAIN SELECT SUM(ca_address_sk) FROM tpcds.customer_address WHERE ca_address_sk<100;
                               QUERY PLAN                                
-------------------------------------------------------------------------
 Aggregate  (cost=180.16..180.17 rows=1 width=12)
   ->  Seq Scan on customer_address  (cost=0.00..179.93 rows=94 width=4)
         Filter: (ca_address_sk < 100)
(3 rows)

mydb=# 

--有索引条件的执行计划
mydb=# create index customer_address_idx on tpcds.customer_address(ca_address_sk);
CREATE INDEX
mydb=# EXPLAIN SELECT * FROM tpcds.customer_address WHERE ca_address_sk<100;
                                           QUERY PLAN                                           
------------------------------------------------------------------------------------------------
 [Bypass]
 Index Scan using customer_address_idx on customer_address  (cost=0.00..9.89 rows=94 width=151)
   Index Cond: (ca_address_sk < 100)
(3 rows)

mydb=#

4.垃圾收集

--VACUUM回收表或 B-Tree 索引中已经删除的行所占据的存储空间
mydb=# update tpcds.customer_address set ca_address_sk = ca_address_sk + 1 where ca_address_sk <100;
UPDATE 93
--更新了93行

mydb=# VACUUM (VERBOSE, ANALYZE) tpcds.customer_address;
INFO:  vacuuming "tpcds.customer_address"(dn_6001 pid=9824)
INFO:  index "customer_address_idx" now contains 10087 row versions in 31 pages(dn_6001 pid=9824)
DETAIL:  0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  "customer_address": found 0 removable, 10087 nonremovable row versions in 55 out of 55 pages(dn_6001 pid=9824)
DETAIL:  93 dead row versions cannot be removed yet. There were 0 unused item pointers. 0 pages are entirely empty. CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  analyzing "tpcds.customer_address"(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "customer_address": scanned 55 of 55 pages, containing 9994 live rows and 93 dead rows; 9994 rows in sample, 9994 estimated total rows(dn_6001 pid=9824)
VACUUM
mydb=# --发现了 93 dead row

5.事务日志检查点

--检查点（CHECKPOINT）是一个事务日志中的点，所有数据文件都在该点被更新以反映日志中的信息，所有数据文件都将被刷新到磁盘
mydb=# CHECKPOINT;
CHECKPOINT
mydb=#

6.清理数据

mydb=# drop schema tpcds cascade;
NOTICE:  drop cascades to table tpcds.customer_address
DROP SCHEMA
mydb=#

课后作业

1.创建分区表，并用 generate_series(1,N) 函数对表插入数据

create table yhc_part1 (c1 serial, c2 timestamp)
partition by range (c2)
(
        partition p1 values less than (date '2022-01-01'),
        partition p2 values less than (date '2022-02-01'),
        partition p3 values less than (date '2022-03-01')
);

mydb=# insert into yhc_part1(c2) values(generate_series('2021-12-01'::date,'2022-02-28', '1 days'));
INSERT 0 90
mydb=# select * from yhc_part1 partition (p1) limit 2; 
 c1 |         c2          
----+---------------------
 92 | 2021-12-01 00:00:00
 93 | 2021-12-02 00:00:00
(2 rows)

mydb=# select * from yhc_part1 partition (p2) limit 2;
 c1  |         c2          
-----+---------------------
 123 | 2022-01-01 00:00:00
 124 | 2022-01-02 00:00:00
(2 rows)

mydb=# select * from yhc_part1 partition (p3) limit 2; 
 c1  |         c2          
-----+---------------------
 154 | 2022-02-01 00:00:00
 155 | 2022-02-02 00:00:00
(2 rows)

2.收集表统计信息

mydb=# analyze VERBOSE yhc_part1;
INFO:  analyzing "public.yhc_part1"(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "yhc_part1": scanned 1 of 1 pages, containing 31 live rows and 31 dead rows; 31 rows in sample, 31 estimated total rows(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "yhc_part1": scanned 1 of 1 pages, containing 31 live rows and 31 dead rows; 31 rows in sample, 31 estimated total rows(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "yhc_part1": scanned 1 of 1 pages, containing 28 live rows and 28 dead rows; 28 rows in sample, 28 estimated total rows(dn_6001 pid=9824)
ANALYZE
mydb=# select relname, relpages, reltuples from pg_class where relname = 'yhc_part1';
  relname  | relpages | reltuples 
-----------+----------+-----------
 yhc_part1 |        3 |        90
(1 row)

3.显示简单查询的执行计划；建立索引并显示有索引条件的执行计划

mydb=# EXPLAIN SELECT * FROM yhc_part1;
                                 QUERY PLAN                                  
-----------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..3.90 rows=90 width=12)
   Iterations: 3
   ->  Partitioned Seq Scan on yhc_part1  (cost=0.00..3.90 rows=90 width=12)
         Selected Partitions:  1..3
(4 rows)

mydb=# create index yhc_part1_idx1 on yhc_part1(c2);
CREATE INDEX
mydb=# explain select * from yhc_part1 where c2 >= date '2022-01-01' and c2 < date '2022-02-01' ;
                                                                    QUERY PLAN                                                                    
--------------------------------------------------------------------------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..2.35 rows=31 width=12)
   Iterations: 1
   ->  Partitioned Seq Scan on yhc_part1  (cost=0.00..2.35 rows=31 width=12)
         Filter: ((c2 >= '2022-01-01 00:00:00'::timestamp(0) without time zone) AND (c2 < '2022-02-01 00:00:00'::timestamp(0) without time zone))
         Selected Partitions:  2
(5 rows)

mydb=#

4.更新表数据，并做垃圾收集

mydb=# update yhc_part1 set c1 = c1 +1 where c2 >= date '2022-01-01' and c2 < date '2022-02-01' ;
UPDATE 31
mydb=# VACUUM (VERBOSE, ANALYZE) yhc_part1;
INFO:  vacuuming "public.yhc_part1"(dn_6001 pid=9824)
INFO:  scanned index "yhc_part1_idx1" to remove 31 row versions(dn_6001 pid=9824)
DETAIL:  CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  index "yhc_part1_idx1" now contains 31 row versions in 2 pages(dn_6001 pid=9824)
DETAIL:  0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  "yhc_part1": found 31 removable, 31 nonremovable row versions in 1 out of 1 pages(dn_6001 pid=9824)
DETAIL:  0 dead row versions cannot be removed yet. There were 0 unused item pointers. 0 pages are entirely empty. CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  vacuuming "public.yhc_part1"(dn_6001 pid=9824)
INFO:  scanned index "yhc_part1_idx1" to remove 31 row versions(dn_6001 pid=9824)
DETAIL:  CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  index "yhc_part1_idx1" now contains 62 row versions in 2 pages(dn_6001 pid=9824)
DETAIL:  0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  "yhc_part1": found 31 removable, 62 nonremovable row versions in 1 out of 1 pages(dn_6001 pid=9824)
DETAIL:  31 dead row versions cannot be removed yet. There were 0 unused item pointers. 0 pages are entirely empty. CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  vacuuming "public.yhc_part1"(dn_6001 pid=9824)
INFO:  scanned index "yhc_part1_idx1" to remove 28 row versions(dn_6001 pid=9824)
DETAIL:  CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  index "yhc_part1_idx1" now contains 28 row versions in 2 pages(dn_6001 pid=9824)
DETAIL:  0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  "yhc_part1": found 28 removable, 28 nonremovable row versions in 1 out of 1 pages(dn_6001 pid=9824)
DETAIL:  0 dead row versions cannot be removed yet. There were 0 unused item pointers. 0 pages are entirely empty. CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  scanned index "yhc_part1_idx1" to remove 0.000000 invisible rows(dn_6001 pid=9824)
DETAIL:  CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  analyzing "public.yhc_part1"(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "yhc_part1": scanned 1 of 1 pages, containing 31 live rows and 0 dead rows; 31 rows in sample, 31 estimated total rows(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "yhc_part1": scanned 1 of 1 pages, containing 31 live rows and 31 dead rows; 31 rows in sample, 31 estimated total rows(dn_6001 pid=9824)
INFO:  ANALYZE INFO : "yhc_part1": scanned 1 of 1 pages, containing 28 live rows and 0 dead rows; 28 rows in sample, 28 estimated total rows(dn_6001 pid=9824)
VACUUM
mydb=#

5.清理数据

mydb=# drop table yhc_part1;
DROP TABLE
mydb=#

opengauss

最后修改时间：2022-01-04 23:30:23

「喜欢这篇文章，您的关注和赞赏是给作者最好的鼓励」

关注作者

openGauss每日一练第 19 天 | 系统操作

学习目标

课程学习

连接openGauss

1.准备数据

2.收集统计信息

3.打印执行计划

4.垃圾收集

5.事务日志检查点

6.清理数据

课后作业

1.创建分区表，并用 generate_series(1,N) 函数对表插入数据

2.收集表统计信息

3.显示简单查询的执行计划；建立索引并显示有索引条件的执行计划

4.更新表数据，并做垃圾收集

5.清理数据

评论