Mysql对JSON对象的解析
JSON(JavaScript Object Notation)主要用于互联网应用服务之间的数据交换。MySQL 从5.7版本开始支持RFC 7159定义的 JSON 规范,主要有 JSON 对象 和 JSON 数组 两种类型,8.x系列使得支持更加成熟。JSON 类型的另一个好处是无须预定义字段 ,字段可以无限扩展。而传统关系型数据库的列都需预先定义,想要扩展需要执行 ALTER TABLE ... ADD COLUMN ... 这样比较重的操作。
-
使用 JSON 数据类型,推荐用 MySQL 8.0.17 以上的版本,性能更好,同时也支持 Multi-Valued Indexes; -
JSON 数据类型的好处是无须预先定义列,数据本身就具有很好的描述性; -
不要将有明显关系型的数据用 JSON 存储,如用户余额、用户姓名、用户身份证等,这些都是每个用户必须包含的数据; -
JSON 数据类型推荐使用在不经常更新的静态数据存储。 -
JSON分析常用函数
一. 数据生成
create database json_db;
use json_db;
DROP TABLE IF EXISTS UserLogin;
CREATE TABLE UserLogin (
userId BIGINT NOT NULL,
loginInfo JSON,
PRIMARY KEY(userId)
);
desc UserLogin;
+-----------+------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-----------+------------+------+-----+---------+-------+
| userId | bigint(20) | NO | PRI | NULL | |
| loginInfo | json | YES | | NULL | |
+-----------+------------+------+-----+---------+-------+
2 rows in set (0.07 sec)
# 插入数据
SET @a = '
{
"cellphone" : "13918888888",
"wxchat" : "破产码农",
"QQ" : "82946772"
}
';
SET @b = '
{
"cellphone" : "15026888888"
}
';
INSERT INTO UserLogin VALUES (1,@a);
INSERT INTO UserLogin VALUES (2,@b);
select * from UserLogin;
+--------+--------------------------------------------------------------------------+
| userId | loginInfo |
+--------+--------------------------------------------------------------------------+
| 1 | {"QQ": "82946772", "wxchat": "破产码农", "cellphone": "13918888888"} |
| 2 | {"cellphone": "15026888888"} |
+--------+--------------------------------------------------------------------------+
2 rows in set (0.00 sec)
# 插入json 数组
DROP TABLE IF EXISTS UserTag;
CREATE TABLE UserTag (
userId bigint NOT NULL,
userTags JSON,
PRIMARY KEY (userId)
);
INSERT INTO UserTag VALUES (1,'[2,6,8,10]');
INSERT INTO UserTag VALUES (2,'[3,10,12]');
select * from UserTag;
+--------+---------------+
| userId | userTags |
+--------+---------------+
| 1 | [2, 6, 8, 10] |
| 2 | [3, 10, 12] |
+--------+---------------+
2 rows in set (0.00 sec)
二. 数据分析
SELECT
userId,
JSON_UNQUOTE(JSON_EXTRACT(loginInfo,"$.cellphone")) cellphone,
JSON_UNQUOTE(JSON_EXTRACT(loginInfo,"$.wxchat")) wxchat
FROM UserLogin;
+--------+-------------+--------------+
| userId | cellphone | wxchat |
+--------+-------------+--------------+
| 1 | 13918888888 | 破产码农 |
| 2 | 15026888888 | NULL |
+--------+-------------+--------------+
2 rows in set (0.00 sec)
# 每次写 JSON_EXTRACT、JSON_UNQUOTE 非常麻烦,MySQL 还提供了 ->> 表达式,和上述 SQL 效果完全一样:
SELECT
userId,
loginInfo->>"$.cellphone" cellphone,
loginInfo->>"$.wxchat" wxchat
FROM UserLogin;
+--------+-------------+--------------+
| userId | cellphone | wxchat |
+--------+-------------+--------------+
| 1 | 13918888888 | 破产码农 |
| 2 | 15026888888 | NULL |
+--------+-------------+--------------+
2 rows in set (0.00 sec)
# 当 JSON数据量非常大,用户希望对 JSON 数据进行有效检索时,
# 可以利用 MySQL的函数索引功能对JSON的某个字段进行索引。
# 通过计算,先创建一个虚拟列cellphone,为后续建立索引
ALTER TABLE UserLogin ADD COLUMN cellphone VARCHAR(255) AS (loginInfo->>"$.cellphone");
select * from UserLogin;
+--------+--------------------------------------------------------------------------+-------------+
| userId | loginInfo | cellphone |
+--------+--------------------------------------------------------------------------+-------------+
| 1 | {"QQ": "82946772", "wxchat": "破产码农", "cellphone": "13918888888"} | 13918888888 |
| 2 | {"cellphone": "15026888888"} | 15026888888 |
+--------+--------------------------------------------------------------------------+-------------+
2 rows in set (0.00 sec)
# 添加唯一索引
ALTER TABLE UserLogin ADD UNIQUE INDEX idx_cellphone(cellphone);
# 查看优化器,查询可以走idx_cellphone
explain SELECT * FROM UserLogin WHERE cellphone = '13918888888';
+----+-------------+-----------+------------+-------+---------------+---------------+---------+-------+------+----------+-------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-----------+------------+-------+---------------+---------------+---------+-------+------+----------+-------+
| 1 | SIMPLE | UserLogin | NULL | const | idx_cellphone | idx_cellphone | 258 | const | 1 | 100.00 | NULL |
+----+-------------+-----------+------------+-------+---------------+---------------+---------+-------+------+----------+-------+
1 row in set, 1 warning (0.00 sec)
# MySQL 8.0.17 版本开始支持 Multi-Valued Indexes,
# 用于在 JSON 数组上创建索引,并通过函数 member of、json_contains、
# json_overlaps 来快速检索索引数据。
# 所以可以在表 UserTag 上创建 Multi-Valued Indexes:
ALTER TABLE UserTag
ADD INDEX idx_user_tags ((cast((userTags->"$") as unsigned array)));
# 查询用户画像为常看电影的用户,可以使用函数 MEMBER OF:
EXPLAIN SELECT * FROM UserTag WHERE 10 MEMBER OF(userTags->"$")\G
# 查询画像为 80后,且常看电影的用户,可以使用函数 JSON_CONTAINS:
EXPLAIN SELECT * FROM UserTag WHERE JSON_CONTAINS(userTags->"$", '[2,10]')\G
# 查询画像为 80 后、90 后,且常看电影的用户,则可以使用函数 JSON_OVERLAP:
EXPLAIN SELECT * FROM UserTag WHERE JSON_OVERLAPS(userTags->"$", '[2,3,10]')
+--------+---------------+
| userId | userTags |
+--------+---------------+
| 1 | [2, 6, 8, 10] |
| 2 | [3, 10, 12] |
+--------+---------------+
2 rows in set (0.00 sec)
三.案例实操
数据准备
# 查看glibc版本为2.17
ldd --version
ldd (GNU libc) 2.17
# 下载安装最新版本mysql-8.0.28
# https://repo.huaweicloud.com/mysql/Downloads/MySQL-8.0/
# 解压xz格式的压缩包。
tar -Jxf mysql-8.0.28-linux-glibc2.17-x86_64-minimal.tar.xz -C /usr/local/
mysqld --verbose --help
# 对于最新的8.0.28 初始化命令由原来的mysql_install_db 变为了mysqld。
mysqld --initialize-insecure
service mysqld start
ALTER USER 'root'@'localhost' IDENTIFIED BY 'Test!123';
create user 'wgw@%' identified by '123456';
grant all on *.* to 'wgw'@'%';
flush privileges;
# 创建库。
create database approbe
use approbe;
DROP TABLE IF EXISTS stainfo;
CREATE TABLE stainfo (
Id bigint auto_increment NOT NULL,
content JSON,
PRIMARY KEY (Id)
);
# 添加导入目录
/data/mysqldb =
'/opt/mysql/my3306/'; =
ERROR 1238 (HY000): Variable 'secure_file_priv' is a read only variable
# 分析去重后的mac地址数量
cut -c 1-50 rmsdb1.stainfo.2022-02-22.0|egrep "\"MacAddress\":\"([0-9A-Z]){12}\"" -o|sort -u|wc -l
116617
load data infile "/opt/mysql/my3306/data/rmsdb1.stainfo.2022-02-22.0" into table stainfo fields terminated by ',' OPTIONALLY ENCLOSED BY '"';
load data infile "/data/mysqldb/rms_10" into table stainfo (content);
常用函数
select json_keys(content) from stainfo limit 2;
+------------------------------+
| json_keys(content) |
+------------------------------+
| ["MAC", "data", "timestamp"] |
| ["MAC", "data", "timestamp"] |
+------------------------------+
select
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime
from stainfo where json_contains(content,'{"MAC":"8C73A04738A0"}');
+----+--------------+---------------+
| Id | mac | createtime |
+----+--------------+---------------+
| 2 | 8C73A04738A0 | 1645509973681 |
+----+--------------+---------------+
1 row in set (0.00 sec)
# 返回json串长度
select json_length(content) from stainfo
# 返回json的深度。
select json_depth(content) from stainfo;
select json_type(content) from stainfo ;
select json_valid(content) from stainfo ;
简单数据分析
1. json解析
SELECT
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime,
content->>"$.data.STANumber" STANumber,
content->>"$.data.Devices[1].RSSI" RSSI,
content->>"$.data.Devices[1].Radio" Radio,
content->>"$.data.Devices[1].STAType" STAType,
content->>"$.data.Devices[1].HostName" HostName,
content->>"$.data.Devices[1].UpTime" UpTime,
content->>"$.data.Devices[1].AverRxRate" AverRxRate,
content->>"$.data.Devices[1].AverTxRate" AverTxRate
FROM stainfo;
2. 取出数组中的成员值。
SELECT
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime,
content->>"$.data.STANumber" STANumber,
json_extract(content,'$.data.Devices[*].STAType') STAType,
content->>"$.data.Devices[*].AverTxRate" AverTxRate
FROM stainfo;
3.查询mac为8C73A04738A0 的终端信息
SELECT
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime,
content->>"$.data.STANumber" STANumber,
content->>"$.data.Devices[1].RSSI" RSSI,
content->>"$.data.Devices[1].Radio" Radio,
content->>"$.data.Devices[1].STAType" STAType,
content->>"$.data.Devices[1].HostName" HostName,
content->>"$.data.Devices[1].UpTime" UpTime,
content->>"$.data.Devices[1].AverRxRate" AverRxRate,
content->>"$.data.Devices[1].AverTxRate" AverTxRate
FROM stainfo WHERE '8C73A04738A0' MEMBER OF(content->"$.MAC");
+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+
| Id | mac | createtime | STANumber | RSSI | Radio | STAType | HostName | UpTime | AverRxRate | AverTxRate |
+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+
| 2 | 8C73A04738A0 | 1645509973681 | 4 | -67 | 2.4G | Phone | HONOR_Play3-bd1ee10b1fad1 | 9272 | 0.00 | 0.00 |
+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+
1 row in set (0.00 sec)
4.查询mac为3085EB953060 时间为1645509973251的终端
SELECT
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime,
content->>"$.data.STANumber" STANumber,
content->>"$.data.Devices[1].RSSI" RSSI,
content->>"$.data.Devices[1].Radio" Radio,
content->>"$.data.Devices[1].STAType" STAType,
content->>"$.data.Devices[1].HostName" HostName,
content->>"$.data.Devices[1].UpTime" UpTime,
content->>"$.data.Devices[1].AverRxRate" AverRxRate,
content->>"$.data.Devices[1].AverTxRate" AverTxRate
FROM stainfo WHERE JSON_CONTAINS(content->"$", '[1645509973251,"3085EB953060"]');
5. 查询mac为8C73A04738A0,A87484F20DDC的终端
SELECT
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime,
content->>"$.data.STANumber" STANumber,
content->>"$.data.Devices[1].RSSI" RSSI,
content->>"$.data.Devices[1].Radio" Radio,
content->>"$.data.Devices[1].STAType" STAType,
content->>"$.data.Devices[1].HostName" HostName,
content->>"$.data.Devices[1].UpTime" UpTime,
content->>"$.data.Devices[1].AverRxRate" AverRxRate,
content->>"$.data.Devices[1].AverTxRate" AverTxRate
FROM stainfo WHERE JSON_OVERLAPS(content->"$.MAC", '["8C73A04738A0","A87484F20DDC"]');
+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+
| Id | mac | createtime | STANumber | RSSI | Radio | STAType | HostName | UpTime | AverRxRate | AverTxRate |
+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+
| 2 | 8C73A04738A0 | 1645509973681 | 4 | -67 | 2.4G | Phone | HONOR_Play3-bd1ee10b1fad1 | 9272 | 0.00 | 0.00 |
| 9 | A87484F20DDC | 1645509974429 | 1 | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
+----+--------------+---------------+-----------+------+-------+---------+---------------------------+--------+------------+------------+
2 rows in set (0.00 sec)
6.平均每户下挂终端数及运行时长
select t.Radio,avg(t.STANumber) STANumber ,avg(t.UpTime) UpTime from(
SELECT
Id,
content->>"$.MAC" mac,
content->>"$.timestamp" createtime,
content->>"$.data.STANumber" STANumber,
content->>"$.data.Devices[1].RSSI" RSSI,
content->>"$.data.Devices[1].Radio" Radio,
content->>"$.data.Devices[1].UpTime" UpTime
FROM stainfo)t
group by Radio;
+-------+--------------------+--------------------+
| Radio | STANumber | UpTime |
+-------+--------------------+--------------------+
| NULL | 0.5600578871201157 | NULL |
| 2.4G | 2.9991568296795954 | 202308.10708263068 |
| | 2.991228070175439 | 716069.4166666666 |
| 5G | 2.925925925925926 | 118172.07407407407 |
+-------+--------------------+--------------------+