1. 测试准备
在日常开发中,我们经常需要处理用户的多选数据(如权限设置、功能开关等)。面对这种需求,开发者往往会面临多种存储方案的选择。本文将全面对比四种常见的MySQL多选存储方案,通过严谨的性能测试,为您揭示各种方案的优劣。
测试环境与方法论
测试环境配置
硬件配置:
处理器:第13代英特尔酷睿i5-13500H
内存:16GB DDR4
存储:高性能SSD
软件环境:
MySQL版本:5.7.26(InnoDB引擎)
字符集:utf8
测试方法论
我们设计了四种存储方案,每种方案都存储相同的32个布尔选项,通过百万级数据测试进行全方位对比:
32个BOOL字段方案 - 传统列式存储
JSON数组方案 - 现代半结构化存储
CSV字符串方案 - 传统字符串存储
BIT(32)方案 - 二进制位存储
2. 测试表结构设计
方案1:32个BOOL字段(经典列式存储)
CREATE TABLE bool_columns (id INT AUTO_INCREMENT PRIMARY KEY,-- 基础选项 (1-10)option1 BOOLEAN DEFAULT 0 COMMENT '系统管理员标志',option2 BOOLEAN DEFAULT 0 COMMENT '内容审核员标志',option3 BOOLEAN DEFAULT 0 COMMENT 'VIP用户标志',option4 BOOLEAN DEFAULT 0 COMMENT '账户已验证',option5 BOOLEAN DEFAULT 0 COMMENT '账户已锁定',option6 BOOLEAN DEFAULT 0 COMMENT '允许接收邮件通知',option7 BOOLEAN DEFAULT 0 COMMENT '允许接收短信通知',option8 BOOLEAN DEFAULT 0 COMMENT '启用双重认证',option9 BOOLEAN DEFAULT 0 COMMENT '显示在线状态',option10 BOOLEAN DEFAULT 0 COMMENT '允许数据收集',-- 内容偏好选项 (11-20)option11 BOOLEAN DEFAULT 0 COMMENT '使用暗黑模式',option12 BOOLEAN DEFAULT 0 COMMENT '使用紧凑布局',option13 BOOLEAN DEFAULT 0 COMMENT '自动播放视频',option14 BOOLEAN DEFAULT 0 COMMENT '显示NSFW内容',option15 BOOLEAN DEFAULT 0 COMMENT '显示缩略图',option16 BOOLEAN DEFAULT 0 COMMENT '启用键盘快捷键',option17 BOOLEAN DEFAULT 0 COMMENT '使用高级编辑器',option18 BOOLEAN DEFAULT 0 COMMENT '显示时间戳',option19 BOOLEAN DEFAULT 0 COMMENT '显示用户位置',option20 BOOLEAN DEFAULT 0 COMMENT '显示相关推荐',-- 功能开关选项 (21-30)option21 BOOLEAN DEFAULT 0 COMMENT '启用实验功能',option22 BOOLEAN DEFAULT 0 COMMENT '加入用户体验计划',option23 BOOLEAN DEFAULT 0 COMMENT '允许个性化广告',option24 BOOLEAN DEFAULT 0 COMMENT '接受营销信息',option25 BOOLEAN DEFAULT 0 COMMENT '接受隐私政策',option26 BOOLEAN DEFAULT 0 COMMENT '启用自动保存',option27 BOOLEAN DEFAULT 0 COMMENT '显示教程提示',option28 BOOLEAN DEFAULT 0 COMMENT '同步跨设备数据',option29 BOOLEAN DEFAULT 0 COMMENT '启用语音控制',option30 BOOLEAN DEFAULT 0 COMMENT '使用生物识别登录',-- 系统标志选项 (31-32)option31 BOOLEAN DEFAULT 0 COMMENT '系统内部使用标志1',option32 BOOLEAN DEFAULT 0 COMMENT '系统内部使用标志2',-- 索引配置(为常用查询条件建立索引)INDEX idx_option1 (option1) COMMENT '系统管理员查询索引',INDEX idx_option2 (option2) COMMENT '内容审核员查询索引',INDEX idx_option3 (option3) COMMENT 'VIP用户查询索引',INDEX idx_option4 (option4) COMMENT '已验证账户查询索引',INDEX idx_option6_option7 (option6, option7) COMMENT '通知偏好联合索引',INDEX idx_option11_option12 (option11, option12) COMMENT '界面偏好联合索引',INDEX idx_option21_option22 (option21, option22) COMMENT '实验功能联合索引') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='32个布尔选项标志表';
优点:
查询性能最优
支持完善的索引策略
字段含义明确
缺点:
表结构固定,扩展性差
占用存储空间较大
方案2:JSON数组(现代方案)
CREATE TABLE json_array (id INT AUTO_INCREMENT PRIMARY KEY,options JSON NOT NULL COMMENT '存储32个布尔选项的JSON数组') ENGINE=InnoDB COMMENT='使用JSON数组存储32个布尔选项';-- 为特定JSON路径创建虚拟列并索引ALTER TABLE json_arrayADD COLUMN option1 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[0]')) VIRTUAL,ADD COLUMN option2 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[1]')) VIRTUAL,ADD COLUMN option3 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[2]')) VIRTUAL,ADD COLUMN option4 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[3]')) VIRTUAL,ADD INDEX idx_option1 (option1),ADD INDEX idx_option2 (option2),ADD INDEX idx_option3 (option3),ADD INDEX idx_option4 (option4);
优点:
结构灵活,易于扩展
支持部分索引
现代MySQL对JSON有良好支持
缺点:
查询性能略低于BOOL方案
更新操作较复杂
方案3:CSV字符串(传统方案)
CREATE TABLE csv_string (id INT AUTO_INCREMENT PRIMARY KEY,options VARCHAR(255),INDEX idx_options (options));
优点:
实现简单
存储紧凑
缺点:
查询性能最差
难以维护和扩展
缺乏类型安全
方案4:BIT(32)
CREATE TABLE bit_mask (id INT AUTO_INCREMENT PRIMARY KEY,options BIT(32),INDEX idx_options (options));
优点:
存储空间最小
位运算性能高
缺点:
可读性差
查询复杂度高
扩展性有限
3. 完整的测试数据生成存储过程
CREATE DEFINER=`root`@`localhost` PROCEDURE `generate_test_data1`()BEGINDECLARE i INT DEFAULT 0;DECLARE batch_size INT DEFAULT 1000;DECLARE commit_counter INT DEFAULT 0;DECLARE bit_str VARCHAR(64);-- 禁用自动提交以提高性能SET autocommit = 0;WHILE i < 1000000 DO-- BOOL方案 - 完整32个字段插入INSERT INTO bool_columns (option1, option2, option3, option4, option5,option6, option7, option8, option9, option10,option11, option12, option13, option14, option15,option16, option17, option18, option19, option20,option21, option22, option23, option24, option25,option26, option27, option28, option29, option30,option31, option32) VALUES (ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()));-- JSON方案 - 使用完全展开的CONCAT构建紧凑JSONSET @json_data = CONCAT('[',ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',ROUND(RAND()), ',', ROUND(RAND()),']');-- 最佳实践: 使用参数化查询PREPARE stmt FROM 'INSERT INTO json_array (options) VALUES (?)';EXECUTE stmt USING @json_data;DEALLOCATE PREPARE stmt;-- CSV方案 - 完整32个值的CSV字符串INSERT INTO csv_string (options) VALUES (CONCAT_WS(',',ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND())));-- BIT方案 - 完整32位二进制字符串SET bit_str = CONCAT(ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),ROUND(RAND()), ROUND(RAND()));INSERT INTO bit_mask (options) VALUES (CAST(CONV(bit_str, 2, 10) AS UNSIGNED));SET i = i + 1;SET commit_counter = commit_counter + 1;-- 分批提交(每1000行)IF commit_counter >= batch_size THENCOMMIT;SET commit_counter = 0;END IF;END WHILE;-- 提交剩余事务COMMIT;SET autocommit = 1;END
存储空间测量
1. 精确测量表大小
SELECTtable_name,ROUND(data_length/1024/1024, 2) AS 'Data Size (MB)',ROUND(index_length/1024/1024, 2) AS 'Index Size (MB)',ROUND((data_length+index_length)/1024/1024, 2) AS 'Total Size (MB)'FROM information_schema.TABLESWHERE table_schema = DATABASE();
查询性能测试
1. 测试查询设计
测试1:单条件查询
-- 启用性能分析SET profiling = 1;-- BOOL方案SELECT COUNT(*) FROM bool_columns WHERE option1 = 1;-- JSON方案SELECT COUNT(*) FROM json_array WHERE JSON_EXTRACT(options, '$[0]') = 1;-- CSV方案SELECT COUNT(*) FROM csv_string WHERE FIND_IN_SET('1', options) > 0;-- BIT方案SELECT COUNT(*) FROM bit_mask WHERE options & b'1' = b'1';-- 查看耗时SHOW PROFILES;

测试2:多条件组合查询
-- 启用性能分析SET profiling = 1;-- BOOL方案SELECT COUNT(*) FROM bool_columns WHERE option1 = 1 AND option5 = 1;-- JSON方案SELECT COUNT(*) FROM json_arrayWHERE JSON_EXTRACT(options, '$[0]') = 1AND JSON_EXTRACT(options, '$[4]') = 1;-- CSV方案SELECT COUNT(*) FROM csv_stringWHERE FIND_IN_SET('1', options) > 0AND SUBSTRING_INDEX(SUBSTRING_INDEX(options, ',', 5), ',', -1) = '1';-- BIT方案SELECT COUNT(*) FROM bit_maskWHERE options & b'10001' = b'10001';-- 查看耗时SHOW PROFILES;

更新性能测试
-- 启用性能分析SET profiling = 1;-- BOOL方案单字段更新UPDATE bool_columns SET option1 = 0 WHERE id < 1000;-- JSON方案单字段更新UPDATE json_array SET options = JSON_SET(options, '$[0]', 0) WHERE id < 1000;-- BOOL方案多字段更新UPDATE bool_columns SET option1 = 1, option5 = 0, option10 = 1 WHERE id < 1000;-- JSON方案多字段更新UPDATE json_arraySET options = JSON_SET(JSON_SET(JSON_SET(options, '$[0]', 1),'$[4]', 0),'$[9]', 1)WHERE id < 1000;-- 查看耗时SHOW PROFILES;

结论
经过全面测试,我们可以得出以下结论:
BOOL字段方案在查询和更新性能上全面领先,适合对性能要求高的核心系统
JSON方案在保持较好性能的同时提供了灵活性,是现代应用的不错选择
BIT方案存储空间最优,适合存储海量标记数据
CSV方案在各测试中表现最差,不推荐在新项目中使用
MySQL官方文档参考
MySQL BOOLEAN类型
https://dev.mysql.com/doc/refman/8.0/en/boolean-literals.html官方对BOOL/BOOLEAN类型的说明,实际存储为TINYINT(1)。MySQL JSON数据类型
https://dev.mysql.com/doc/refman/8.0/en/json.html包含JSON数据的存储、查询和索引优化方法。生成列(Generated Columns)
https://dev.mysql.com/doc/refman/8.0/en/create-table-generated-columns.htmlJSON方案中虚拟列的实现原理。BIT数据类型
https://dev.mysql.com/doc/refman/8.0/en/bit-type.htmlBIT类型的存储格式和位运算函数。
性能优化参考
MySQL索引最佳实践
https://dev.mysql.com/doc/refman/8.0/en/optimization-indexes.html解释了BOOL方案中多列索引的优势。
扩展阅读
数据库设计模式(Martin Fowler)
https://martinfowler.com/eaaCatalog/包含Flag和Metadata Mapping等相关模式。Stack Overflow讨论:BIT vs BOOL
https://stackoverflow.com/questions/2897277/bit-vs-boolean-tinyint-vs-enum开发者社区对几种方案的实践讨论。

最后修改时间:2025-06-18 15:39:15
文章转载自程序员极光,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




