Doris数据库查询入门：SQL语句与函数使用

数码百科 2024-05-01

448

一、语法整体结构

SELECT
[ALL | DISTINCT | DISTINCTROW ]            -- 对查询字段的结果是否需要去重，还是全部保留等参数
select_expr [, select_expr ...]            -- select的查询字段
[FROM table_references
[PARTITION partition_list]                 -- from 哪个库里面的那张表甚至哪一个(几个)分区
[WHERE where_condition]                    -- WHERE 查询
[GROUP BY {col_name | expr | position}     -- group by  聚合
[ASC | DESC], ... [WITH ROLLUP]]
[HAVING where_condition]                   -- having 针对聚合函数的再一次过滤
[ORDER BY {col_name | expr | position}     -- 对结果数据按照字段进行排序
[ASC | DESC], ...]                                       -- 排序规则
[LIMIT {[offset,] row_count | row_count OFFSET offset}]  -- 限制输出多少行内容
[INTO OUTFILE 'file_name']                 -- 将查询的结果导出到文件中

二、函数

1、条件函数

1、if条件函数

if(boolean condition, type valueTrue, type valueFalseOrNull)
--如果表达式 condition 成立，返回结果 valueTrue；否则，返回结果 valueFalseOrNull
--返回值类型：valueTrue 表达式结果的类型

mysql> select  user_id, if(user_id = 1, "true", "false") as test_if from test;
+---------+---------+
| user_id | test_if |
+---------+---------+
| 1       | true    |
| 2       | false   |
+---------+---------+

2、ifnull,nvl,coalesce,nullif函数

ifnull(expr1, expr2)
--如果 expr1 的值不为 NULL 则返回 expr1，否则返回 expr2

nvl(expr1, expr2)
--如果 expr1 的值不为 NULL 则返回 expr1，否则返回 expr2

coalesce(expr1, expr2, ...., expr_n))
--返回参数中的第一个非空表达式（从左向右）

nullif(expr1, expr2)
-- 如果两个参数相等，则返回NULL。否则返回第一个参数的值


mysql> select ifnull(1,0);
+--------------+
| ifnull(1, 0) |
+--------------+
|            1 |
+--------------+

mysql> select nvl(null,10);
+------------------+
| nvl(null,10)     |
+------------------+
|               10 |
+------------------+

mysql> select coalesce(NULL, '1111', '0000');
+--------------------------------+
| coalesce(NULL, '1111', '0000') |
+--------------------------------+
| 1111                           |
+--------------------------------+

mysql> select coalesce(NULL, NULL,NULL,'0000', NULL);
+----------------------------------------+
| coalesce(NULL, NULL,NULL,'0000', NULL) |
+----------------------------------------+
| 0000                                   |
+----------------------------------------+

mysql> select nullif(1,1);
+--------------+
| nullif(1, 1) |
+--------------+
|         NULL |
+--------------+

mysql> select nullif(1,0);
+--------------+
| nullif(1, 0) |
+--------------+
|            1 |
+--------------+

3、case

-- 方式一
CASE expression
    WHEN condition1 THEN result1
    [WHEN condition2 THEN result2]
    ...
    [WHEN conditionN THEN resultN]
    [ELSE result]
END

-- 方式二
CASE WHEN condition1 THEN result1
    [WHEN condition2 THEN result2]
    ...
    [WHEN conditionN THEN resultN]
    [ELSE result]
END

-- 将表达式和多个可能的值进行比较，当匹配时返回相应的结果


mysql> select user_id, 
case user_id 
when 1 then 'user_id = 1' 
when 2 then 'user_id = 2' 
else 'user_id not exist' 
end as test_case 
from test;
+---------+-------------+
| user_id | test_case   |
+---------+-------------+
| 1       | user_id = 1 |
| 2       | user_id = 2 |
| 3       | 'user_id not exist' |
+---------+-------------+
 
mysql> select user_id, 
case 
when user_id = 1 then 'user_id = 1' 
when user_id = 2 then 'user_id = 2' 
else 'user_id not exist' 
end as test_case 
from test;
+---------+-------------+
| user_id | test_case   |
+---------+-------------+
| 1       | user_id = 1 |
| 2       | user_id = 2 |
+---------+-------------+

2、聚合函数

1、min,max,sum,avg,count

2、min_by和max_by

MAX_BY(expr1, expr2)
返回expr2最大值所在行的 expr1 字段值 （求分组top1的简介函数）

MySQL > select * from tbl;
+------+------+------+------+
| k1   | k2   | k3   | k4   |
+------+------+------+------+
|    0 | 3    | 2    |  100 |
|    1 | 2    | 3    |    4 |
|    4 | 3    | 2    |    2 |
|    3 | 4    | 2    |    1 |
+------+------+------+------+

MySQL > select max_by(k1, k4) from tbl;

select max_by(k1, k4) from tbl;
--取k4这个列中的最大值对应的k1这个列的值
+--------------------+
| max_by(`k1`, `k4`) |
+--------------------+
|                  0 |
+--------------------+

-- 求每门课程成绩最高分的那个人
select  
subject,max_by(name,score) as name
from score
group by subject


+---------+------+
| subject | name |
+---------+------+
| English | www  |
| math    | lss  |
| chinese | www  |
+---------+------+

3、group_concat

简单来说：就是分组聚合的指定字段进行聚合，默认以 “，”进行分隔

求：每一个人有考试成绩的所有科目
select name， group_concat(subject,‘,’) as all_subject from score group by name

VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR 列名[, VARCHAR sep]

该函数是类似于 sum() 的聚合函数，group_concat 将结果集中的多行结果连接成一个字符串

-- group_concat对于收集的字段只能是string，varchar，char类型  
--当不指定分隔符的时候，默认使用 ','

VARCHAR ：代表GROUP_CONCAT函数返回值类型
[DISTINCT]：可选参数，针对需要拼接的列的值进行去重  
[, VARCHAR sep]：拼接成字符串的分隔符，默认是 ','

select 
gender,
group_concat(name,',') as gc_name
from example 
group by gender;
+--------+---------------+
| gender | gc_name       |
+--------+---------------+
| female | zl,lss        |
| male   | zss,ww,tq,mly |
+--------+---------------+

select 
gender,
group_concat(DISTINCT cast(age as string)) as gc_age
from example 
group by gender;

+--------+------------+
| gender | gc_age     |
+--------+------------+
| female | 48, 28     |
| male   | 58, 38, 18 |
+--------+------------+

4、collect_list，collect_set

ARRAY<T> collect_list(expr)
--返回一个包含 expr 中所有元素(不包括NULL)的数组，数组中元素顺序是不确定的。

ARRAY<T> collect_set(expr)
--返回一个包含 expr 中所有去重后元素(不包括NULL)的数组，数组中元素顺序是不确定的。

3、日期函数

1、获取当前时间

curdate,current_date，now，curtime,current_time，current_timestamp

select current_date();
+----------------+
| current_date() |
+----------------+
| 2022-11-25     |
+----------------+

select curdate();
+------------+
| curdate()  |
+------------+
| 2022-11-25 |
+------------+

 select now();
+---------------------+
| now()               |
+---------------------+
| 2022-11-25 00:55:15 |
+---------------------+

select curtime();
+-----------+
| curtime() |
+-----------+
| 00:42:13  |
+-----------+

select current_timestamp();
+---------------------+
| current_timestamp() |
+---------------------+
| 2022-11-25 00:42:30 |
+---------------------+

2、last_day

DATE last_day(DATETIME date) 
-- 返回输入日期中月份的最后一天； 
--'28'(非闰年的二月份), 
--'29'(闰年的二月份), 
--'30'(四月，六月，九月，十一月), 
--'31'(一月，三月，五月，七月，八月，十月，十二月)


select last_day('2000-03-03 01:00:00'); -- 给我返回这个月份中的最后一天的日期  年月日

3、unix_timestamp

UNIX_TIMESTAMP(), 
UNIX_TIMESTAMP(DATETIME date), 
UNIX_TIMESTAMP(DATETIME date, STRING fmt) -- 给一个日期，指定这个日期的格式
-- 将日期转换成时间戳，返回值是一个int类型

-- 获取当前日期的时间戳
select unix_timestamp();
+------------------+
| unix_timestamp() |
+------------------+
|       1669309722 |
+------------------+

-- 获取指定日期的时间戳
select unix_timestamp('2022-11-26 01:09:01');
+---------------------------------------+
| unix_timestamp('2022-11-26 01:09:01') |
+---------------------------------------+
|                            1669396141 |
+---------------------------------------+

-- 给定一个特殊日期格式的时间戳，指定格式
select unix_timestamp('2022-11-26 01:09-01', '%Y-%m-%d %H:%i-%s');
+------------------------------------------------------------+
| unix_timestamp('2022-11-26 01:09-01', '%Y-%m-%d %H:%i-%s') |
+------------------------------------------------------------+
|                                                 1669396141 |
+------------------------------------------------------------+

4、to_date

DATE TO_DATE(DATETIME)
--返回 DATETIME 类型中的日期部分。

select to_date("2022-11-20 00:00:00");     
+--------------------------------+
| to_date('2022-11-20 00:00:00') |
+--------------------------------+
| 2022-11-20                     |
+--------------------------------+

5、date_add，date_sub,datediff

DATE_ADD(DATETIME date,INTERVAL expr type)

DATE_SUB(DATETIME date,INTERVAL expr type)

DATEDIFF(DATETIME expr1,DATETIME expr2)
-- 计算两个日期相差多少天，结果精确到天。

-- 向日期添加指定的时间间隔。
-- date 参数是合法的日期表达式。
-- expr 参数是您希望添加的时间间隔。
-- type 参数可以是下列值：YEAR, MONTH, DAY, HOUR, MINUTE, SECOND

elect date_add('2010-11-30 23:59:59', INTERVAL 2 DAY);
+-------------------------------------------------+
| date_add('2010-11-30 23:59:59', INTERVAL 2 DAY) |
+-------------------------------------------------+
| 2010-12-02 23:59:59                             |
+-------------------------------------------------+


--传一个负数进去也就等同于date_sub
select date_add('2010-11-30 23:59:59', INTERVAL -2 DAY);
+--------------------------------------------------+
| date_add('2010-11-30 23:59:59', INTERVAL -2 DAY) |
+--------------------------------------------------+
| 2010-11-28 23:59:59                              |
+--------------------------------------------------+


mysql> select datediff('2022-11-27 22:51:56','2022-11-24 22:50:56');
+--------------------------------------------------------+
| datediff('2022-11-27 22:51:56', '2022-11-24 22:50:56') |
+--------------------------------------------------------+
|                                                      3 |
+--------------------------------------------------------+

6、date_format

VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)
--将日期类型按照format的类型转化为字符串

select date_format('2007-10-04 22:23:00', '%H:%i:%s');
+------------------------------------------------+
| date_format('2007-10-04 22:23:00', '%H:%i:%s') |
+------------------------------------------------+
| 22:23:00                                       |
+------------------------------------------------+

select date_format('2007-10-04 22:23:00', 'yyyy-MM-dd');
+------------------------------------------------+
| date_format('2007-10-04 22:23:00', '%Y-%m-%d') |
+------------------------------------------------+
| 2007-10-04                                     |
+------------------------------------------------+

4、字符串函数

1、length，lower，upper，reverse
获取到字符串的长度，对字符串转大小写和字符串的反转

2、lpad，rpad

VARCHAR rpad(VARCHAR str, INT len, VARCHAR pad)

VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)

-- 返回 str 中长度为 len（从首字母开始算起）的字符串。
--如果 len 大于 str 的长度，则在 str 的后面不断补充 pad  字符，
--直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度，
--该函数相当于截断 str 字符串，只返回长度为 len  的字符串。
--len 指的是字符长度而不是字节长度。

-- 向左边补齐
SELECT lpad("1", 5, "0");
+---------------------+
| lpad("1", 5, "0") |
+---------------------+
| 00001             |
+---------------------+

-- 向右边补齐
SELECT rpad('11', 5, '0');
+---------------------+
| rpad('11', 5, '0')  |
+---------------------+
| 11000               |
+---------------------+

3、concat，concat_ws

select concat("a", "b");
+------------------+
| concat('a', 'b') |
+------------------+
| ab               |
+------------------+

select concat("a", "b", "c");
+-----------------------+
| concat('a', 'b', 'c') |
+-----------------------+
| abc                   |
+-----------------------+

-- concat中，如果有一个值为null，那么得到的结果就是null
mysql> select concat("a", null, "c");
+------------------------+
| concat('a', NULL, 'c') |
+------------------------+
| NULL                   |
+------------------------+


--使用第一个参数 sep 作为连接符
--将第二个参数以及后续所有参数(或ARRAY中的所有字符串)拼接成一个字符串。
-- 如果分隔符是 NULL，返回 NULL。concat_ws函数不会跳过空字符串，会跳过 NULL 值。
mysql> select concat_ws("_", "a", "b");
+----------------------------+
| concat_ws("_", "a", "b")   |
+----------------------------+
| a_b                        |
+----------------------------+

mysql> select concat_ws(NULL, "d", "is");
+----------------------------+
| concat_ws(NULL, 'd', 'is') |
+----------------------------+
| NULL                       |
+----------------------------+

4、substr

-求子字符串，返回第一个参数描述的字符串中从start开始长度为len的部分字符串。
--首字母的下标为1。
mysql> select substr("Hello doris", 2, 1);
+-----------------------------+
| substr('Hello doris', 2, 1) |
+-----------------------------+
| e                           |
+-----------------------------+
mysql> select substr("Hello doris", 1, 2);
+-----------------------------+
| substr('Hello doris', 1, 2) |
+-----------------------------+
| He                          |
+-----------------------------+

5、ends_with，starts_with

BOOLEAN ENDS_WITH (VARCHAR str, VARCHAR suffix)
--如果字符串以指定后缀结尾，返回true。否则，返回false。
--任意参数为NULL，返回NULL。

BOOLEAN STARTS_WITH (VARCHAR str, VARCHAR prefix)
--如果字符串以指定前缀开头，返回true。否则，返回false。
--任意参数为NULL，返回NULL。

mysql> select ends_with("Hello doris", "doris");
+-----------------------------------+
| ends_with('Hello doris', 'doris') |
+-----------------------------------+
|                                 1 | 
+-----------------------------------+

mysql> select ends_with("Hello doris", "Hello");
+-----------------------------------+
| ends_with('Hello doris', 'Hello') |
+-----------------------------------+
|                                 0 | 
+-----------------------------------+


MySQL [(none)]> select starts_with("hello world","hello");
+-------------------------------------+
| starts_with('hello world', 'hello') |
+-------------------------------------+
|                                   1 |
+-------------------------------------+

MySQL [(none)]> select starts_with("hello world","world");
+-------------------------------------+
| starts_with('hello world', 'world') |
+-------------------------------------+
|                                   0 |
+-------------------------------------+

6、trim，ltrim，rtrim

VARCHAR trim(VARCHAR str)
-- 将参数 str 中左侧和右侧开始部分连续出现的空格去掉
mysql> SELECT trim('   ab d   ') str;
+------+
| str  |
+------+
| ab d |
+------+

VARCHAR ltrim(VARCHAR str)
-- 将参数 str 中从左侧部分开始部分连续出现的空格去掉
mysql> SELECT ltrim('   ab d') str;
+------+
| str  |
+------+
| ab d |
+------+

VARCHAR rtrim(VARCHAR str)
--将参数 str 中从右侧部分开始部分连续出现的空格去掉

mysql> SELECT rtrim('ab d   ') str;
+------+
| str  |
+------+
| ab d |
+------+

7、null_or_empty，not_null_or_empty

BOOLEAN NULL_OR_EMPTY (VARCHAR str)

-- 如果字符串为空字符串或者NULL，返回true。否则，返回false。
MySQL [(none)]> select null_or_empty(null);
+---------------------+
| null_or_empty(NULL) |
+---------------------+
|                   1 |
+---------------------+

MySQL [(none)]> select null_or_empty("");
+-------------------+
| null_or_empty('') |
+-------------------+
|                 1 |
+-------------------+

MySQL [(none)]> select null_or_empty("a");
+--------------------+
| null_or_empty('a') |
+--------------------+
|                  0 |
+--------------------+

BOOLEAN NOT_NULL_OR_EMPTY (VARCHAR str)
如果字符串为空字符串或者NULL，返回false。否则，返回true。

MySQL [(none)]> select not_null_or_empty(null);
+-------------------------+
| not_null_or_empty(NULL) |
+-------------------------+
|                       0 |
+-------------------------+

MySQL [(none)]> select not_null_or_empty("");
+-----------------------+
| not_null_or_empty('') |
+-----------------------+
|                     0 |
+-----------------------+

MySQL [(none)]> select not_null_or_empty("a");
+------------------------+
| not_null_or_empty('a') |
+------------------------+
|                      1 |
+------------------------+

8、replace

VARCHAR REPLACE (VARCHAR str, VARCHAR old, VARCHAR new)
-- 将str字符串中的old子串全部替换为new串

mysql> select replace("http://www.baidu.com:9090", "9090", "");
+------------------------------------------------------+
| replace('http://www.baidu.com:9090', '9090', '') |
+------------------------------------------------------+
| http://www.baidu.com:                                |
+------------------------------------------------------+

9、split_part

VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)
-- 根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。

mysql> select split_part("hello world", " ", 1);
+----------------------------------+
| split_part('hello world', ' ', 1) |
+----------------------------------+
| hello                            |
+----------------------------------+


mysql> select split_part("hello world", " ", 2);
+----------------------------------+
| split_part('hello world', ' ', 2) |
+----------------------------------+
| world                             |
+----------------------------------+

mysql> select split_part("2019年7月8号", "月", 1);
+-----------------------------------------+
| split_part('2019年7月8号', '月', 1)     |
+-----------------------------------------+
| 2019年7                                 |
+-----------------------------------------+

mysql> select split_part("abca", "a", 1);
+----------------------------+
| split_part('abca', 'a', 1) |
+----------------------------+
|                            |
+----------------------------+

10、money_format

VARCHAR money_format(Number)
-- 将数字按照货币格式输出，整数部分每隔3位用逗号分隔，小数部分保留2位

mysql> select money_format(17014116);
+------------------------+
| money_format(17014116) |
+------------------------+
| 17,014,116.00          |
+------------------------+

mysql> select money_format(1123.456);
+------------------------+
| money_format(1123.456) |
+------------------------+
| 1,123.46               |
+------------------------+

mysql> select money_format(1123.4);
+----------------------+
| money_format(1123.4) |
+----------------------+
| 1,123.40             |
+----------------------+

5、数字函数

1、ceil和floor
向上取整和向下取整

BIGINT ceil(DOUBLE x) 
-- 返回大于或等于x的最小整数值.
mysql> select ceil(1);

+-----------+
| ceil(1.0) |
+-----------+
|         1 |
+-----------+
mysql> select ceil(2.4);
+-----------+
| ceil(2.4) |
+-----------+
|         3 |
+-----------+
mysql> select ceil(-10.3);
+-------------+
| ceil(-10.3) |
+-------------+
|         -10 |
+-------------+

BIGINT floor(DOUBLE x) 
-- 返回小于或等于x的最大整数值.
mysql> select floor(1);
+------------+
| floor(1.0) |
+------------+
|          1 |
+------------+
mysql> select floor(2.4);
+------------+
| floor(2.4) |
+------------+
|          2 |
+------------+
mysql> select floor(-10.3);
+--------------+
| floor(-10.3) |
+--------------+
|          -11 |
+--------------+

2、round

四舍五入

round(x), round(x, d) 
-- 将x四舍五入后保留d位小数，d默认为0。
-- 如果d为负数，则小数点左边d位为0。如果x或d为null，返回null。

mysql> select round(2.4);
+------------+
| round(2.4) |
+------------+
|          2 |
+------------+
mysql> select round(2.5);
+------------+
| round(2.5) |
+------------+
|          3 |
+------------+
mysql> select round(-3.4);
+-------------+
| round(-3.4) |
+-------------+
|          -3 |
+-------------+
mysql> select round(-3.5);
+-------------+
| round(-3.5) |
+-------------+
|          -4 |
+-------------+
mysql> select round(1667.2725, 2);
+---------------------+
| round(1667.2725, 2) |
+---------------------+
|             1667.27 |
+---------------------+
mysql> select round(1667.2725, -2);
+----------------------+
| round(1667.2725, -2) |
+----------------------+
|                 1700 |
+----------------------+

3、abs
绝对值

数值类型 abs(数值类型 x) 
-- 返回x的绝对值.

mysql> select abs(-2);
+---------+
| abs(-2) |
+---------+
|       2 |
+---------+
mysql> select abs(3.254655654);
+------------------+
| abs(3.254655654) |
+------------------+
|      3.254655654 |
+------------------+
mysql> select abs(-3254654236547654354654767);
+---------------------------------+
| abs(-3254654236547654354654767) |
+---------------------------------+
| 3254654236547654354654767       |
+---------------------------------+

4、pow
求幂

DOUBLE pow(DOUBLE a, DOUBLE b) 
-- 求幂次：返回a的b次方.

mysql> select pow(2,0);
+---------------+
| pow(2.0, 0.0) |
+---------------+
|             1 |
+---------------+
mysql> select pow(2,3);
+---------------+
| pow(2.0, 3.0) |
+---------------+
|             8 |
+---------------+
mysql> select round(pow(3,2.4),2);
+--------------------+
| pow(3.0, 2.4)      |
+--------------------+
| 13.966610165238235 |
+--------------------+

5、greatest和 least

greatest(col_a, col_b, …, col_n)
-- 返回一行中 n个column的最大值.若column中有NULL，则返回NULL.

least(col_a, col_b, …, col_n)
-- 返回一行中 n个column的最小值.若column中有NULL，则返回NULL.

mysql> select greatest(-1, 0, 5, 8);
+-----------------------+
| greatest(-1, 0, 5, 8) |
+-----------------------+
|                     8 |
+-----------------------+
mysql> select greatest(-1, 0, 5, NULL);
+--------------------------+
| greatest(-1, 0, 5, NULL) |
+--------------------------+
| NULL                     |
+--------------------------+
mysql> select greatest(6.3, 4.29, 7.6876);
+-----------------------------+
| greatest(6.3, 4.29, 7.6876) |
+-----------------------------+
|                      7.6876 |
+-----------------------------+
mysql> select greatest("2022-02-26 20:02:11","2020-01-23 20:02:11","2020-06-22 20:02:11");
+-------------------------------------------------------------------------------+
| greatest('2022-02-26 20:02:11', '2020-01-23 20:02:11', '2020-06-22 20:02:11') |
+-------------------------------------------------------------------------------+
| 2022-02-26 20:02:11                                                           |
+-------------------------------------------------------------------------------+

6、数组函数

仅支持向量化引擎中使用

1、array

ARRAY<T> array(T, ...)
-- 把多个字段构造成一个数组

mysql> set enable_vectorized_engine=true;

mysql> select array("1", 2, 1.1);
+----------------------+
| array('1', 2, '1.1') |
+----------------------+
| ['1', '2', '1.1']    |
+----------------------+
1 row in set (0.00 sec)


mysql> select array(null, 1);
+----------------+
| array(NULL, 1) |
+----------------+
| [NULL, 1]      |
+----------------+
1 row in set (0.00 sec)

mysql> select array(1, 2, 3);
+----------------+
| array(1, 2, 3) |
+----------------+
| [1, 2, 3]      |
+----------------+
1 row in set (0.00 sec)

2、array_min，array_max，array_avg，array_sum，array_size

求数组中的最小值，最大值，平均值，数组中所有元素的和，数组的长度
-- 数组中的NULL值会被跳过。空数组以及元素全为NULL值的数组，结果返回NULL值。

3、array_remove

ARRAY<T> array_remove(ARRAY<T> arr, T val)
-- 返回移除所有的指定元素后的数组，如果输入参数为NULL，则返回NULL

mysql> set enable_vectorized_engine=true;

mysql> select array_remove(['test', NULL, 'value'], 'value');
+-----------------------------------------------------+
| array_remove(ARRAY('test', NULL, 'value'), 'value') |
+-----------------------------------------------------+
| [test, NULL]                                        |
+-----------------------------------------------------+

mysql> select k1, k2, array_remove(k2, 1) from array_type_table_1;
+------+--------------------+-----------------------+
| k1   | k2                 | array_remove(`k2`, 1) |
+------+--------------------+-----------------------+
|    1 | [1, 2, 3]          | [2, 3]                |
|    2 | [1, 3]             | [3]                   |
|    3 | NULL               | NULL                  |
|    4 | [1, 3]             | [3]                   |
|    5 | [NULL, 1, NULL, 2] | [NULL, NULL, 2]       |
+------+--------------------+-----------------------+

mysql> select k1, k2, array_remove(k2, k1) from array_type_table_1;
+------+--------------------+--------------------------+
| k1   | k2                 | array_remove(`k2`, `k1`) |
+------+--------------------+--------------------------+
|    1 | [1, 2, 3]          | [2, 3]                   |
|    2 | [1, 3]             | [1, 3]                   |
|    3 | NULL               | NULL                     |
|    4 | [1, 3]             | [1, 3]                   |
|    5 | [NULL, 1, NULL, 2] | [NULL, 1, NULL, 2]       |
+------+--------------------+--------------------------+

4、array_sort

ARRAY<T> array_sort(ARRAY<T> arr)
-- 返回按升序排列后的数组，如果输入数组为NULL，则返回NULL。 
-- 如果数组元素包含NULL, 则输出的排序数组会将NULL放在最前面。

mysql> set enable_vectorized_engine=true;
mysql> select k1, k2, array_sort(k2) array_test;
+------+-----------------------------+-----------------------------+
| k1   | k2                          | array_sort(`k2`)            |
+------+-----------------------------+-----------------------------+
|  1   | [1, 2, 3, 4, 5]             | [1, 2, 3, 4, 5]             |
|  2   | [6, 7, 8]                   | [6, 7, 8]                   |
|  3   | []                          | []                          |
|  4   | NULL                        | NULL                        |
|  5   | [1, 2, 3, 4, 5, 4, 3, 2, 1] | [1, 1, 2, 2, 3, 3, 4, 4, 5] |
|  6   | [1, 2, 3, NULL]             | [NULL, 1, 2, 3]             |
|  7   | [1, 2, 3, NULL, NULL]       | [NULL, NULL, 1, 2, 3]       |
|  8   | [1, 1, 2, NULL, NULL]       | [NULL, NULL, 1, 1, 2]       |
|  9   | [1, NULL, 1, 2, NULL, NULL] | [NULL, NULL, NULL, 1, 1, 2] |
+------+-----------------------------+-----------------------------

5、array_contains

BOOLEAN array_contains(ARRAY<T> arr, T value)

-- 判断数组中是否包含value。返回结果如下：
-- 1    - value在数组arr中存在；
-- 0    - value不存在数组arr中；
-- NULL - arr为NULL时。

mysql> set enable_vectorized_engine=true;

mysql> SELECT id,c_array,array_contains(c_array, 5) FROM `array_test`;
+------+-----------------+------------------------------+
| id   | c_array         | array_contains(`c_array`, 5) |
+------+-----------------+------------------------------+
|    1 | [1, 2, 3, 4, 5] |                            1 |
|    2 | [6, 7, 8]       |                            0 |
|    3 | []              |                            0 |
|    4 | NULL            |                         NULL |
+------+-----------------+------------------------------+

6、array_except

ARRAY<T> array_except(ARRAY<T> array1, ARRAY<T> array2)
-- 返回一个数组，包含所有在array1内但不在array2内的元素，会对返回的结果数组去重
-- 类似于取差集，将返回的差集结果数组去重

mysql> set enable_vectorized_engine=true;

mysql> select k1,k2,k3,array_except(k2,k3) from array_type_table;
+------+-----------------+--------------+--------------------------+
| k1   | k2              | k3           | array_except(`k2`, `k3`) |
+------+-----------------+--------------+--------------------------+
|    1 | [1, 2, 3]       | [2, 4, 5]    | [1, 3]                   |
|    2 | [2, 3]          | [1, 5]       | [2, 3]                   |
|    3 | [1, 1, 1]       | [2, 2, 2]    | [1]                      |
+------+-----------------+--------------+--------------------------+

7、array_intersect

ARRAY<T> array_intersect(ARRAY<T> array1, ARRAY<T> array2)

-- 返回一个数组，包含array1和array2的交集中的所有元素，不包含重复项
-- 两个数组去交集后。将返回的结果去重

mysql> set enable_vectorized_engine=true;

mysql> select k1,k2,k3,array_intersect(k2,k3) from array_type_table;
+------+-----------------+--------------+-----------------------------+
| k1   | k2              | k3           | array_intersect(`k2`, `k3`) |
+------+-----------------+--------------+-----------------------------+
|    1 | [1, 2, 3]       | [2, 4, 5]    | [2]                         |
|    2 | [2, 3]          | [1, 5]       | []                          |
|    3 | [1, 1, 1]       | [2, 2, 2]    | []                          |
+------+-----------------+--------------+-----------------------------+

mysql> select k1,k2,k3,array_intersect(k2,k3) from array_type_table_nullable;
+------+-----------------+--------------+-----------------------------+
| k1   | k2              | k3           | array_intersect(`k2`, `k3`) |
+------+-----------------+--------------+-----------------------------+
|    1 | [1, NULL, 3]    | [1, 3, 5]    | [1, 3]                      |
|    2 | [NULL, NULL, 2] | [2, NULL, 4] | [NULL, 2]                   |
|    3 | NULL            | [1, 2, 3]    | NULL                        |
+------+-----------------+--------------+-----------------------------+

8、array_union
union自动去重

ARRAY<T> array_union(ARRAY<T> array1, ARRAY<T> array2)
-- 返回一个数组，包含array1和array2的并集中的所有元素，不包含重复项
-- 取两个数组的并集，将返回的结果去重

mysql> set enable_vectorized_engine=true;

mysql> select k1,k2,k3,array_union(k2,k3) from array_type_table;
+------+-----------------+--------------+-------------------------+
| k1   | k2              | k3           | array_union(`k2`, `k3`) |
+------+-----------------+--------------+-------------------------+
|    1 | [1, 2, 3]       | [2, 4, 5]    | [1, 2, 3, 4, 5]         |
|    2 | [2, 3]          | [1, 5]       | [2, 3, 1, 5]            |
|    3 | [1, 1, 1]       | [2, 2, 2]    | [1, 2]                  |
+------+-----------------+--------------+-------------------------+

9、array_distinct

ARRAY<T> array_distinct(ARRAY<T> arr)

-- 返回去除了重复元素的数组，如果输入数组为NULL，则返回NULL。

mysql> set enable_vectorized_engine=true;

mysql> select k1, k2, array_distinct(k2) from array_test;
+------+-----------------------------+---------------------------+
| k1   | k2                          | array_distinct(k2)        |
+------+-----------------------------+---------------------------+
| 1    | [1, 2, 3, 4, 5]             | [1, 2, 3, 4, 5]           |
| 2    | [6, 7, 8]                   | [6, 7, 8]                 |
| 3    | []                          | []                        |
| 4    | NULL                        | NULL                      |
| 5    | [1, 2, 3, 4, 5, 4, 3, 2, 1] | [1, 2, 3, 4, 5]           |
| 6    | [1, 2, 3, NULL]             | [1, 2, 3, NULL]           |
| 7    | [1, 2, 3, NULL, NULL]       | [1, 2, 3, NULL]           |
+------+-----------------------------+---------------------------+

7、json函数

1、get_json_double，get_json_int，get_json_string

语法：
DOUBLE get_json_int(VARCHAR json_str, VARCHAR json_path)
INT get_json_int(VARCHAR json_str, VARCHAR json_path)
VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path)

-- 解析并获取 json 字符串内指定路径的double，int，string 类型的内容。 
-- 其中 json_path 必须以 $ 符号作为开头，使用 . 作为路径分割符。
-- 如果路径中包含 . ，则可以使用双引号包围。 
-- 使用 [ ] 表示数组下标，从 0 开始。 
-- path 的内容不能包含 ", [ 和 ]。 
-- 如果 json_string 格式不对，或 json_path 格式不对，或无法找到匹配项，则返回 NULL。

--1.获取到k1对应的value的值
mysql> select  id, get_json_string(json_string,'$.k1') as k1 from test_json;
+------+------+
| id   | k1   |
+------+------+
|    2 | v32  |
|    4 | v31  |
|    5 | v31  |
|    6 | v31  |
|    1 | v31  |
|    3 | v33  |
+------+------+

--2.获取到key 为a1 里面的数组
mysql> select  id, get_json_string(json_string,'$.a1') as arr from test_json;
+------+------------------------------------+
| id   | arr                                |
+------+------------------------------------+
|    1 | [{"k1":"v41","k2":400},1,"a",3.14] |
|    3 | [{"k1":"v41","k2":400},3,"a",5.14] |
|    2 | [{"k1":"v41","k2":400},2,"a",4.14] |
|    4 | NULL                               |
|    5 | NULL                               |
|    6 | []                                 |
+------+------------------------------------+


--3.获取到key 为a1 里面的数组中第一个元素的值
mysql> select  id, get_json_string(json_string,'$.a1[0]') as arr from test_json;
+------+-----------------------+
| id   | arr                   |
+------+-----------------------+
|    2 | {"k1":"v41","k2":400} |
|    1 | {"k1":"v41","k2":400} |
|    3 | {"k1":"v41","k2":400} |
|    4 | NULL                  |
|    5 | NULL                  |
|    6 | NULL                  |
+------+-----------------------+

--4.获取到key 为a1 里面的数组中第一个元素的值（这个值是一个json串，再次获取到这个字符串中）
select  id, get_json_string(get_json_string(json_string,'$.a1[0]'),'$.k1') as arr from test_json; 
+------+------+
| id   | arr  |
+------+------+
|    2 | v41  |
|    1 | v41  |
|    3 | v41  |
|    4 | NULL |
|    5 | NULL |
|    6 | NULL |
+------+------+
6 rows in set (0.02 sec)

2、json_object

VARCHAR json_object(VARCHAR,...)
-- 生成一个包含指定Key-Value对的json object, 
-- 传入的参数是key,value对，且key不能为null

87

MySQL> select json_object('time',curtime());
+--------------------------------+
| json_object('time', curtime()) |
+--------------------------------+
| {"time": "10:49:18"}           |
+--------------------------------+


MySQL> SELECT json_object('id', 87, 'name', 'carrot');
+-----------------------------------------+
| json_object('id', 87, 'name', 'carrot') |
+-----------------------------------------+
| {"id": 87, "name": "carrot"}            |
+-----------------------------------------+

json_object('id', 87, 'name', 'carrot');

MySQL> select json_object('username',null);
+---------------------------------+
| json_object('username', 'NULL') |
+---------------------------------+
| {"username": NULL}              |
+---------------------------------+

8、窗口函数

over为进行怎么开窗，over前面放置开窗之后的处理函数

1、ROW_NUMBER()，DENSE_RANK()，RANK()

elect x, y, rank() over(partition by x order by y) as rank from int_t;

| x  | y    | rank     |
|----|------|----------|
| 1  | 1    | 1        |
| 1  | 2    | 2        |
| 1  | 2    | 2        |
| 2  | 1    | 1        |
| 2  | 2    | 2        |
| 2  | 3    | 3        |
| 3  | 1    | 1        |
| 3  | 1    | 1        |
| 3  | 2    | 3        |


-- 测试dense_rank(),名词相同会并列排名，比如两个第一名，就是1 1 然后第二名会显示2
select x, y, dense_rank() over(partition by x order by y) as rank from int_t;
 | x  | y    | rank     |
 |----|------|----------|
 | 1  | 1    | 1        |
 | 1  | 2    | 2        |
 | 1  | 2    | 2        |
 | 2  | 1    | 1        |
 | 2  | 2    | 2        |
 | 2  | 3    | 3        |
 | 3  | 1    | 1        |
 | 3  | 1    | 1        |
 | 3  | 2    | 2        |
 
 -- 测试ROW_NUMBER() 按照分组排序要求，返回的编号依次底层，1 2 3 4 5 ，
 -- 不会有重复值，也不会有空缺值，就是连续递增的整数，从1 开始
 select x, y, row_number() over(partition by x order by y) as rank from int_t;

| x | y    | rank     |
|---|------|----------|
| 1 | 1    | 1        |
| 1 | 2    | 2        |
| 1 | 2    | 3        |
| 2 | 1    | 1        |
| 2 | 2    | 2        |
| 2 | 3    | 3        |
| 3 | 1    | 1        |
| 3 | 1    | 2        |
| 3 | 2    | 3        |

2、min，max，sum，avg，count

min(x)over()   -- 取窗口中x列的最小值
max(x)over()   -- 取窗口中x列的最大值
sum(x)over()   -- 取窗口中x列的数据总和
avg(x)over()   -- 取窗口中x列的数据平均值
count(x)over() -- 取窗口中x列有多少行

3、LEAD() ，LAG()

-- LAG() 方法用来计算当前行向前数若干行的值。
LAG(expr, offset, default) OVER (partition_by_clause order_by_clause)

-- LEAD() 方法用来计算当前行向后数若干行的值。
LEAD(expr, offset, default]) OVER (partition_by_clause order_by_clause)

文章转载自数码百科，如果涉嫌侵权，请发送邮件至：contact@modb.pro进行举报，并提供相关证据，一经查实，墨天轮将立刻删除相关内容。