数据去重方案

oracle列转行函数LISTAGG

背景:在订单操作历史表中查询出最早操作改订单的人.

思考:百度搜索oracle列转行函数LISTAGG,然后再截取,达到去重。可是mysql库没有这样的函数。在去搜索mysql的类似方式,发现用GROUP_CONCAT 、GROUP和 ORFER BY 关键字即可实现。发现oracle 去重也有另一个方法去实现行去重 。

1.1 oracle列转行函数LISTAGG用

-- Create table
create table TBL_A
(
  col_id           NVARCHAR2(32) not null,
  col_order_id     NVARCHAR2(64),
  col_creator      NVARCHAR2(64),
  col_created_time DATE
)tablespace USERS;
-- Create/Recreate primary, unique and foreign key constraints 
alter table TBL_A
  add constraint COL_ID primary key (COL_ID)
  using index 
  tablespace USERS;
  insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1001', 'd1001', '张三', to_date('15-07-2022 16:43:54', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1002', 'd1001', '李四', to_date('16-07-2022 16:44:51', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1003', 'd1001', '王五', to_date('17-07-2022 16:45:42', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1004', 'd1002', '诸葛', to_date('18-07-2022 16:43:54', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1005', 'd1002', '赵云', to_date('16-07-2022 16:44:51', 'dd-mm-yyyy hh24:mi:ss'));
insert into tbl_a (COL_ID, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME)
values ('1006', 'd1002', '曹操', to_date('17-07-2022 16:45:42', 'dd-mm-yyyy hh24:mi:ss'));

listagg 最基本用法

SELECT listagg(to_char(col_creator),',') within group(order by col_created_time) name from tbl_a;

-- 结果:  name 
--       张三,李四,赵云,曹操,王五,诸葛

分组用法

SELECT col_order_id,listagg(to_char(col_creator),',') within group(order by col_created_time) NAME from tbl_a group by col_order_id;
-- 结果
  row_number col_order_id       NAME
-- 1	     d1001	         张三,李四,王五
-- 2	     d1002	         赵云,曹操,诸葛

查询出最早操作改订单的人

SELECT M, COL_ORDER_ID, COL_CREATOR, COL_CREATED_TIME
  FROM (SELECT ROW_NUMBER() OVER(PARTITION BY COL_ORDER_ID ORDER BY COL_CREATED_TIME DESC) M,
               COL_ORDER_ID,
               COL_CREATOR,
               COL_CREATED_TIME  FROM TBL_A)
WHERE M = 1

mysql 替代方式

GROUP_CONCAT行转列

select col_order_id,GROUP_CONCAT(col_creator order by col_created_time) col_creator
from tbl_a group by col_order_id;

-- 结果
col_order_id        col_creator
d1001,              "张三,李四,王五"
d1002,              "赵云,曹操,诸葛"

实现数据去重

select col_order_id,SUBSTRING_INDEX(GROUP_CONCAT(col_creator order by col_created_time),',',1) col_creator
from tbl_a group by col_order_id;


-- 结果
col_order_id  col_creator
d1001,        张三
d1002,        赵云

建表语句

create table tbl_a
(
    col_id           varchar(32) not null
        primary key,
    col_order_id     varchar(64) null,
    col_creator      varchar(64) null,
    col_created_time datetime    null
)
    collate = utf8_bin;
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1001', 'd1001', '张三', '2022-07-15 16:43:54');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1002', 'd1001', '李四', '2022-07-16 16:44:51');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1003', 'd1001', '王五', '2022-07-17 16:45:42');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1004', 'd1002', '诸葛', '2022-07-18 16:43:54');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1005', 'd1002', '赵云', '2022-07-16 16:44:51');
INSERT INTO allowpublickeyretrieval.tbl_a (col_id, col_order_id, col_creator, col_created_time) VALUES ('1006', 'd1002', '曹操', '2022-07-17 16:45:42');
Logo

DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。

更多推荐