通过docker运行clickhouse,并导入一些sample数据,运行一些查询示例。速度很给力。也通过python将csv文件导入clickhouse。
创建表
CREATE TABLE xy.readings
(
time DateTime,
device_id String,
battery_level Float64,
battery_status String,
battery_temperature Float64,
bssid String,
cpu_avg_1min Float64,
cpu_avg_5min Float64,
cpu_avg_15min Float64,
mem_free Float64,
mem_used Float64,
rssi Float64,
ssid String
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(time)
ORDER BY (device_id, time)
SETTINGS index_granularity = 8192
导入数据
docker cp ./devices_med_readings.csv xy-clickhouse-server:/var/lib/clickhouse/
clickhouse-client --query "INSERT INTO xy.readings FORMAT CSV" --max_insert_block_size=100000 < devices_med_readings.csv
通过代码导入,安装驱动pip install clickhouse-driver
from clickhouse_driver import Client
import time
import csv
from datetime import datetime
f = csv.reader(open('D:/PostgreSQL/test/devices_med_readings2.csv', 'r'))
time_start = time.time()
client = Client(host='192.168.1.71', port='9123', database='xy', user='default', password='')
record_list = []
temp_count = 0
for i in f:
i[0] = datetime.strptime(i[0], '%Y-%m-%d %H:%M:%S')
i[2] = float(i[2])
i[4] = float(i[4])
i[6] = float(i[6])
i[7] = float(i[7])
i[8] = float(i[8])
i[9] = float(i[9])
i[10] = float(i[10])
i[11] = float(i[11])
record_list.append(i)
temp_count += 1
if temp_count > 1000:
print(record_list)
client.execute('INSERT INTO readings VALUES', record_list, types_check=True)
record_list.clear()
temp_count = 0
print('插入1000条记录')
if len(record_list) > 0:
client.execute('INSERT INTO readings VALUES', record_list, types_check=True)
print('插入记录', len(record_list))
time_end = time.time()
print('total cost', time_end - time_start)
导入1000w条数据,耗时total cost 401.68127632141113
单表查询测试
为方便本地连接portainer管理访问docker。访问portainer http://192.168.101.XXX:9000/
select count(*) from readings;
SELECT time, device_id, battery_temperature
FROM readings
WHERE battery_status = 'charging'
ORDER BY time DESC LIMIT 10;
SELECT formatDateTime(time,'%Y-%m-%d %H') "hour",
min(battery_level) min_battery_level,
max(battery_level) max_battery_level
FROM readings r
WHERE r.time between toDateTime('2016-11-01 00:00:00') and toDateTime('2016-11-16 23:59:59')
GROUP BY "hour" ORDER BY "hour" ASC LIMIT 12;
日期时间函数参见 https://clickhouse.tech/docs/zh/sql-reference/functions/date-time-functions/
join查询测试
clickhouse-client -m 进入sql交互界面
use hmfm;
CREATE TABLE xy.device_info
(
device_id String,
api_version String,
manufacturer String,
model String,
os_name String
)
ENGINE = MergeTree()
ORDER BY (device_id);
docker cp ./devices_med_device_info.csv xy-clickhouse-server:/var/lib/clickhouse/
clickhouse-client --query "INSERT INTO xy.device_info FORMAT CSV" --max_insert_block_size=100000 < devices_med_device_info.csv
SELECT time, readings.device_id, cpu_avg_1min,
battery_level, battery_status, device_info.model
FROM readings
JOIN device_info ON readings.device_id = device_info.device_id
WHERE battery_level < 33 AND battery_status = 'discharging'
ORDER BY cpu_avg_1min DESC, time DESC LIMIT 5;
// 5 rows in set. Elapsed: 0.662 sec. Processed 10.01 million rows, 588.91 MB (15.11 million rows/s., 889.56 MB/s.)
SELECT formatDateTime(time,'%Y-%m-%d %H') "hour",
min(battery_level) min_battery_level,
max(battery_level) max_battery_level
FROM readings r
WHERE r.device_id IN (
SELECT DISTINCT device_id FROM device_info
WHERE model = 'pinto' OR model = 'focus'
) GROUP BY "hour" ORDER BY "hour" ASC LIMIT 12;
// 12 rows in set. Elapsed: 0.363 sec. Processed 10.00 million rows, 309.96 MB (27.55 million rows/s., 854.01 MB/s.)
文章转载自架构学习圈,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




