MYSQL按照分隔符拆分字段
问题建表和数据功能实现与分析遗漏了重复数据最恰当方法问题
有俩张表,表1 Td,表2 Tc,Td表的case_num字段包含Tc的case_num字段,查找td的case_num字段的值全部在Tc状态为1中。
建表和数据
create table Td (
id varchar(10),
case_num varchar(10),
document_name varchar(200),
PRIMARY KEY (ID)
);
CREATE TABLE Tc (
id varchar(10),
case_num varchar(10),
CASE_NAME varchar(20),
STATUS varchar(10),
PRIMARY KEY (id)
);
SELECT * FROM Td;
SELECT * FROM Tc;
INSERT INTO td (id, case_num, document_name) VALUES (‘1’, ‘A,G,C’, ‘A’);
INSERT INTO td (id, case_num, document_name) VALUES (‘2’, ‘A,F’, ‘B’);
INSERT INTO td (id, case_num, document_name) VALUES (‘3’, ‘F,W’, ‘C’);
INSERT INTO td (id, case_num, document_name) VALUES (‘4’, ‘A,W,A’, ‘D’);
INSERT INTO td (id, case_num, document_name) VALUES (‘5’, ‘A,A,A,AA,A’, ‘F’);
INSERT INTO tc (id, case_num, CASE_NAME, STATUS) VALUES (‘1’, ‘A’, ‘A’, ‘1’);
INSERT INTO tc (id, case_num, CASE_NAME, STATUS) VALUES (‘2’, ‘G’, ‘G’, ‘1’);
INSERT INTO tc (id, case_num, CASE_NAME, STATUS) VALUES (‘3’, ‘C’, ‘C’, ‘1’);
INSERT INTO tc (id, case_num, CASE_NAME, STATUS) VALUES (‘4’, ‘W’, ‘W’, ‘1’);
INSERT INTO tc (id, case_num, CASE_NAME, STATUS) VALUES (‘5’, ‘F’, ‘F’, ‘2’);
INSERT INTO tc (id, case_num, CASE_NAME, STATUS) VALUES (‘6’, ‘AA’, ‘G’, ‘1’);
功能实现与分析
经过各种思考,在不拆分Td的情况下,使用包含 Find_in_set,locate,re grex,like 总有遗漏的数据。
遗漏了重复数据
当Td中的字段包含同一个Tc字段中的case_num或者Td的部分case_num包含Tc字段中的case_num时,无法得到正确数据。
select a.case_num,LOCATE(b.case_num,a.case_num ),b.case_num from Td a , tc b
where LOCATE(b.case_num,a.case_num)>0 and b.status=1
GROUP BY a.case_num having count(1)=LENGTH(a.case_num)-LENGTH(replace(a.case_num,’,’,’’))+1;
select a.case_num,LOCATE(b.case_num,a.case_num ),b.case_num from Td a , tc b
where find_in_set(b.case_num,a.case_num) and b.status=1
GROUP BY a.case_num having count(1)=LENGTH(a.case_num)-LENGTH(replace(a.case_num,’,’,’’))+1;
以上只得到Td的A,G,C
select a.case_num,b.case_num ,replace(a.case_num,b.case_num,’’), (LENGTH(a.case_num) - LENGTH(replace(a.case_num,b.case_num,’’)) ) /LENGTH(b.case_num) cnt from Td a , tc b
where LENGTH(a.case_num)-LENGTH(replace(a.case_num,b.case_num,’’))>0 and b.status=1
GROUP BY a.case_num having sum(cnt)=LENGTH(a.case_num)-LENGTH(replace(a.case_num,’,’,’’))+1;
以上只得到Td的A,G,C 和 A,W,A
最恰当方法
建立一个序列表,序列表为分隔符最大的从1-最大值的一张有序表。(由于我的数据库没有访问mysql.help_topic的权限,所以只能自己制作一个序列表了)
create table Ts(
seq varchar(10)
);
insert into Ts values (1),(2),(3),(4),(5);
select splite_table.document_name,max_seq from
( select document_name, b.seq,
#a.case_num,
SUBSTRING_INDEX(SUBSTRING_INDEX(a.case_num,’,’,b.seq) ,’,’,-1) as case_num, (LENGTH(a.case_num) - LENGTH(replace(a.case_num,’,’,’’)) ) + 1 max_seq from Td a, Ts b
where seq<=LENGTH(a.case_num)-LENGTH(replace(a.case_num,’,’,’’))+1 ORDER BY a.document_name ) splite_table, Tc b
where splite_table.case_num=b.case_num and b.status=1 GROUP BY splite_table.document_name having count(1)=splite_table.max_seq
以上得到了所有Td的A,G,C 和 A,W,A, A,A,A,AA,A