vlambda博客
学习文章列表

Mysql对JSON对象的解析

       JSON(JavaScript Object Notation)主要用于互联网应用服务之间的数据交换。MySQL 从5.7版本开始支持RFC 7159定义的 JSON 规范,主要有 JSON 对象  JSON 数组 两种类型,8.x系列使得支持更加成熟。JSON 类型的另一个好处是无须预定义字段 ,字段可以无限扩展。而传统关系型数据库的列都需预先定义,想要扩展需要执行 ALTER TABLE ... ADD COLUMN ... 这样比较重的操作。

  • 使用 JSON 数据类型,推荐用 MySQL 8.0.17 以上的版本,性能更好,同时也支持 Multi-Valued Indexes;
  • JSON 数据类型的好处是无须预先定义列,数据本身就具有很好的描述性;
  • 不要将有明显关系型的数据用 JSON 存储,如用户余额、用户姓名、用户身份证等,这些都是每个用户必须包含的数据;
  • JSON 数据类型推荐使用在不经常更新的静态数据存储。
  • JSON分析常用函数

一. 数据生成

create database json_db;use json_db;DROP TABLE IF EXISTS UserLogin;
CREATE TABLE UserLogin ( userId BIGINT NOT NULL, loginInfo JSON, PRIMARY KEY(userId)); desc UserLogin;+-----------+------------+------+-----+---------+-------+| Field | Type | Null | Key | Default | Extra |+-----------+------------+------+-----+---------+-------+| userId | bigint(20) | NO | PRI | NULL | || loginInfo | json | YES | | NULL | |+-----------+------------+------+-----+---------+-------+2 rows in set (0.07 sec)
# 插入数据SET @a = '{ "cellphone" : "13918888888", "wxchat" : "破产码农", "QQ" : "82946772"}';
SET @b = '{ "cellphone" : "15026888888"}';
INSERT INTO UserLogin VALUES (1,@a);INSERT INTO UserLogin VALUES (2,@b);
select * from UserLogin;+--------+--------------------------------------------------------------------------+| userId | loginInfo |+--------+--------------------------------------------------------------------------+| 1 | {"QQ": "82946772", "wxchat": "破产码农", "cellphone": "13918888888"} || 2 | {"cellphone": "15026888888"} |+--------+--------------------------------------------------------------------------+2 rows in set (0.00 sec)
# 插入json 数组DROP TABLE IF EXISTS UserTag;CREATE TABLE UserTag ( userId bigint NOT NULL, userTags JSON, PRIMARY KEY (userId));
INSERT INTO UserTag VALUES (1,'[2,6,8,10]');INSERT INTO UserTag VALUES (2,'[3,10,12]');select * from UserTag;+--------+---------------+| userId | userTags |+--------+---------------+| 1 | [2, 6, 8, 10] || 2 | [3, 10, 12] |+--------+---------------+2 rows in set (0.00 sec)

二. 数据分析

SELECT userId, JSON_UNQUOTE(JSON_EXTRACT(loginInfo,"$.cellphone")) cellphone, JSON_UNQUOTE(JSON_EXTRACT(loginInfo,"$.wxchat")) wxchatFROM UserLogin;+--------+-------------+--------------+| userId | cellphone | wxchat |+--------+-------------+--------------+| 1 | 13918888888 | 破产码农 || 2 | 15026888888 | NULL |+--------+-------------+--------------+2 rows in set (0.00 sec)
# 每次写 JSON_EXTRACT、JSON_UNQUOTE 非常麻烦,MySQL 还提供了 ->> 表达式,和上述 SQL 效果完全一样:SELECT userId, loginInfo->>"$.cellphone" cellphone, loginInfo->>"$.wxchat" wxchatFROM UserLogin;+--------+-------------+--------------+| userId | cellphone | wxchat |+--------+-------------+--------------+| 1 | 13918888888 | 破产码农 || 2 | 15026888888 | NULL |+--------+-------------+--------------+2 rows in set (0.00 sec)
# 当 JSON数据量非常大,用户希望对 JSON 数据进行有效检索时,# 可以利用 MySQL的函数索引功能对JSON的某个字段进行索引。# 通过计算,先创建一个虚拟列cellphone,为后续建立索引ALTER TABLE UserLogin ADD COLUMN cellphone VARCHAR(255AS (loginInfo->>"$.cellphone");select * from UserLogin;+--------+--------------------------------------------------------------------------+-------------+| userId | loginInfo | cellphone |+--------+--------------------------------------------------------------------------+-------------+| 1 | {"QQ": "82946772", "wxchat": "破产码农", "cellphone": "13918888888"} | 13918888888 || 2 | {"cellphone": "15026888888"} | 15026888888 |+--------+--------------------------------------------------------------------------+-------------+2 rows in set (0.00 sec)# 添加唯一索引ALTER TABLE UserLogin ADD UNIQUE INDEX idx_cellphone(cellphone);# 查看优化器,查询可以走idx_cellphoneexplain SELECT * FROM UserLogin WHERE cellphone = '13918888888';+----+-------------+-----------+------------+-------+---------------+---------------+---------+-------+------+----------+-------+| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |+----+-------------+-----------+------------+-------+---------------+---------------+---------+-------+------+----------+-------+| 1 | SIMPLE | UserLogin | NULL | const | idx_cellphone | idx_cellphone | 258 | const | 1 | 100.00 | NULL |+----+-------------+-----------+------------+-------+---------------+---------------+---------+-------+------+----------+-------+1 row in set, 1 warning (0.00 sec)
# MySQL 8.0.17 版本开始支持 Multi-Valued Indexes,# 用于在 JSON 数组上创建索引,并通过函数 member of、json_contains、# json_overlaps 来快速检索索引数据。# 所以可以在表 UserTag 上创建 Multi-Valued Indexes:ALTER TABLE UserTagADD INDEX idx_user_tags ((cast((userTags->"$") as unsigned array)));# 查询用户画像为常看电影的用户,可以使用函数 MEMBER OF:EXPLAIN SELECT * FROM UserTag WHERE 10 MEMBER OF(userTags->"$")\G# 查询画像为 80后,且常看电影的用户,可以使用函数 JSON_CONTAINS:EXPLAIN SELECT * FROM UserTag WHERE JSON_CONTAINS(userTags->"$"'[2,10]')\G# 查询画像为 80 后、90 后,且常看电影的用户,则可以使用函数 JSON_OVERLAP:EXPLAIN SELECT * FROM UserTag WHERE JSON_OVERLAPS(userTags->"$"'[2,3,10]')+--------+---------------+| userId | userTags |+--------+---------------+| 1 | [2, 6, 8, 10] || 2 | [3, 10, 12] |+--------+---------------+2 rows in set (0.00 sec)

三.案例实操

  • 数据准备

# 查看glibc版本为2.17ldd --versionldd (GNU libc) 2.17# 下载安装最新版本mysql-8.0.28# https://repo.huaweicloud.com/mysql/Downloads/MySQL-8.0/# 解压xz格式的压缩包。tar -Jxf mysql-8.0.28-linux-glibc2.17-x86_64-minimal.tar.xz -C /usr/local/mysqld --verbose --help# 对于最新的8.0.28 初始化命令由原来的mysql_install_db 变为了mysqld。mysqld --initialize-insecureservice mysqld startALTER USER 'root'@'localhost' IDENTIFIED BY 'Test!123'; create user 'wgw@%' identified by '123456';grant all on *.* to 'wgw'@'%';flush privileges;# 创建库。create database approbeuse approbe;DROP TABLE IF EXISTS stainfo;CREATE TABLE stainfo ( Id bigint auto_increment NOT NULL, content JSON, PRIMARY KEY (Id));# 添加导入目录secure-file-priv = /data/mysqldbset global secure_file_priv='/opt/mysql/my3306/';ERROR 1238 (HY000): Variable 'secure_file_priv' is a read only variable# 分析去重后的mac地址数量cut -c 1-50 rmsdb1.stainfo.2022-02-22.0|egrep "\"MacAddress\":\"([0-9A-Z]){12}\"" -o|sort -u|wc -l116617  load data infile "/opt/mysql/my3306/data/rmsdb1.stainfo.2022-02-22.0" into table stainfo fields terminated by ',' OPTIONALLY ENCLOSED BY '"'; load data infile "/data/mysqldb/rms_10" into table stainfo (content);
  • 常用函数

# 提取json信息的函数select json_keys(content) from stainfo limit 2;+------------------------------+| json_keys(content) |+------------------------------+| ["MAC", "data", "timestamp"] || ["MAC", "data", "timestamp"] |+------------------------------+# 是否包含子文档select Id,content->>"$.MAC" mac,content->>"$.timestamp" createtime from stainfo  where json_contains(content,'{"MAC":"8C73A04738A0"}');+----+--------------+---------------+| Id | mac | createtime |+----+--------------+---------------+| 2 | 8C73A04738A0 | 1645509973681 |+----+--------------+---------------+1 row in set (0.00 sec)# 返回json串长度select json_length(contentfrom stainfo# 返回json的深度。select json_depth(content)  from stainfo;# 返回json值得类型select json_type(content) from stainfo ;# 判断是否为合法json文档select json_valid(content) from stainfo ;
  • 简单数据分析

1. json解析

SELECT  Id, content->>"$.MAC" mac, content->>"$.timestamp" createtime, content->>"$.data.STANumber" STANumber, content->>"$.data.Devices[1].RSSI" RSSI, content->>"$.data.Devices[1].Radio" Radio, content->>"$.data.Devices[1].STAType" STAType, content->>"$.data.Devices[1].HostName" HostName, content->>"$.data.Devices[1].UpTime" UpTime, content->>"$.data.Devices[1].AverRxRate" AverRxRate, content->>"$.data.Devices[1].AverTxRate" AverTxRateFROM stainfo;

2. 取出数组中的成员值。

SELECT  Id, content->>"$.MAC" mac, content->>"$.timestamp" createtime, content->>"$.data.STANumber" STANumber, json_extract(content,'$.data.Devices[*].STAType') STAType, content->>"$.data.Devices[*].AverTxRate" AverTxRateFROM stainfo;

3.查询mac为8C73A04738A0 的终端信息

SELECT  Id, content->>"$.MAC" mac, content->>"$.timestamp" createtime, content->>"$.data.STANumber" STANumber, content->>"$.data.Devices[1].RSSI" RSSI, content->>"$.data.Devices[1].Radio" Radio, content->>"$.data.Devices[1].STAType" STAType, content->>"$.data.Devices[1].HostName" HostName, content->>"$.data.Devices[1].UpTime" UpTime, content->>"$.data.Devices[1].AverRxRate" AverRxRate, content->>"$.data.Devices[1].AverTxRate" AverTxRateFROM stainfo WHERE '8C73A04738A0' MEMBER OF(content->"$.MAC");+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+| Id | mac | createtime | STANumber | RSSI | Radio | STAType | HostName | UpTime | AverRxRate | AverTxRate |+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+| 2 | 8C73A04738A0 | 1645509973681 | 4 | -67 | 2.4G | Phone | HONOR_Play3-bd1ee10b1fad1 | 9272 | 0.00 | 0.00 |+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+1 row in set (0.00 sec)

4.查询mac为3085EB953060 时间为1645509973251的终端

SELECT  Id, content->>"$.MAC" mac, content->>"$.timestamp" createtime, content->>"$.data.STANumber" STANumber, content->>"$.data.Devices[1].RSSI" RSSI, content->>"$.data.Devices[1].Radio" Radio, content->>"$.data.Devices[1].STAType" STAType, content->>"$.data.Devices[1].HostName" HostName, content->>"$.data.Devices[1].UpTime" UpTime, content->>"$.data.Devices[1].AverRxRate" AverRxRate, content->>"$.data.Devices[1].AverTxRate" AverTxRateFROM stainfo WHERE JSON_CONTAINS(content->"$", '[1645509973251,"3085EB953060"]');

5. 查询mac为8C73A04738A0,A87484F20DDC的终端

SELECT  Id, content->>"$.MAC" mac, content->>"$.timestamp" createtime, content->>"$.data.STANumber" STANumber, content->>"$.data.Devices[1].RSSI" RSSI, content->>"$.data.Devices[1].Radio" Radio, content->>"$.data.Devices[1].STAType" STAType, content->>"$.data.Devices[1].HostName" HostName, content->>"$.data.Devices[1].UpTime" UpTime, content->>"$.data.Devices[1].AverRxRate" AverRxRate, content->>"$.data.Devices[1].AverTxRate" AverTxRateFROM stainfo WHERE JSON_OVERLAPS(content->"$.MAC", '["8C73A04738A0","A87484F20DDC"]');+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+| Id | mac | createtime | STANumber | RSSI | Radio | STAType | HostName | UpTime | AverRxRate | AverTxRate |+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+| 2 | 8C73A04738A0 | 1645509973681 | 4 | -67 | 2.4G | Phone | HONOR_Play3-bd1ee10b1fad1 | 9272 | 0.00 | 0.00 || 9 | A87484F20DDC | 1645509974429 | 1 | NULL | NULL | NULL | NULL | NULL | NULL | NULL |+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+2 rows in set (0.00 sec)

6.平均每户下挂终端数及运行时长

select t.Radio,avg(t.STANumber) STANumber ,avg(t.UpTime) UpTime from(SELECT  Id, content->>"$.MAC" mac, content->>"$.timestamp" createtime, content->>"$.data.STANumber" STANumber, content->>"$.data.Devices[1].RSSI" RSSI,    content->>"$.data.Devices[1].Radio" Radio,        content->>"$.data.Devices[1].UpTime" UpTimeFROM stainfo)tgroup by Radio;+-------+--------------------+--------------------+| Radio | STANumber | UpTime |+-------+--------------------+--------------------+| NULL | 0.5600578871201157 | NULL || 2.4G | 2.9991568296795954 | 202308.10708263068 || | 2.991228070175439 | 716069.4166666666 || 5G | 2.925925925925926 | 118172.07407407407 |+-------+--------------------+--------------------+