-- 파싱 대상 테이블 생성(테이블들과 필터링조건은 생략)
select d.gserial
, convert(nvarchar(max), ginfo) as ginfo
into ijchoi.dbo.cr_goods
from xxx a with(nolock);
-- 2939
-- 아래 로우로 분리하는 펑션 사용시 <> 형태나 '&'의 html 태그가 있으면 에러 나므로
-- 데이터를 변경하거나 펑션을 변경해야함.
-- 정제
select gserial
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
left(ginfo, charindex(N'</p>', ginfo) - 1), N'전성분표시', '')
, N'전성분]', '')
, N'전성분', '')
, N'<br />', '')
, N'</strong>', '')
, N'</span>', '')
, N'<strong>', '')
, N'<span style="font-size:12px">', '')
, N'<span style="background-color:rgb(255, 255, 255)">', '')
, N'<span style="color:rgb(70, 70, 70); font-family:맑은 고딕">', '')
, N'<span style="background-color:rgb(255, 255, 255); color:rgb(34, 34, 34); font-family:나눔고딕,nanumgothic,helvetica neue,apple sd gothic neo,applegothic,돋움,dotum,sans-serif; font-size:12px">', '')
, N' ', '')
, N' ', '')
, CHAR(10), '')
, CHAR(13), '') as ginfo
into ijchoi.dbo.cr_goods_2
from (
select gserial
, substring(ginfo, charindex(N'전성분', ginfo), len(ginfo) - charindex(N'전성분', ginfo)) as ginfo
from ijchoi.dbo.cr_goods
) a
where ginfo like N'%전성분%'
-- 972
select *
from ijchoi.dbo.cr_goods_2
order by ginfo desc
/*
-- 로우분리용 펑션 생성
use ijchoi
go
drop function [dbo].[SplitStrings]
go
CREATE FUNCTION [dbo].[SplitStrings]
(
@List NVARCHAR(MAX),
@Delimiter NVARCHAR(255)
)
RETURNS TABLE
AS
RETURN (
SELECT Item = CONVERT(nvarchar, Item)
FROM (
SELECT Item = x.i.value(N'(./text())[1]', N'nvarchar(max)')
FROM (
SELECT [XML] = CONVERT(XML, N'<i>' + REPLACE(@List, @Delimiter, N'</i><i>') + N'</i>').query(N'.')
) AS a
CROSS APPLY [XML].nodes(N'i') AS x(i)
) AS y
WHERE Item IS NOT NULL
)
go
*/
select a.gserial, b.Item, a.ginfo
from ijchoi.dbo.cr_goods_2 a
cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
order by b.Item;
select b.Item, count(*)
from ijchoi.dbo.cr_goods_2 a
cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
group by b.Item
order by b.Item;
select b.Item, count(distinct a.Gserial)
from ijchoi.dbo.cr_goods_2 a
cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
group by b.Item
order by count(distinct a.Gserial) desc;
판매중 : 2,969
전성분 : 972
정제및콤마 : 19,836
그룹핑 : 2,465