Physical

파싱 후 집계 샘플

Zac 2021. 7. 16. 15:48
-- 파싱 대상 테이블 생성(테이블들과 필터링조건은 생략)
select d.gserial
     , convert(nvarchar(max), ginfo) as ginfo
  into ijchoi.dbo.cr_goods
  from xxx a with(nolock);
-- 2939

-- 아래 로우로 분리하는 펑션 사용시 <> 형태나 '&'의 html 태그가 있으면 에러 나므로
-- 데이터를 변경하거나 펑션을 변경해야함.
-- 정제
select gserial
     , replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
         left(ginfo, charindex(N'</p>', ginfo) - 1), N'전성분표시', '')
         , N'전성분]', '')
         , N'전성분', '')
         , N'<br />', '')
         , N'</strong>', '')
         , N'</span>', '')
         , N'<strong>', '')
         , N'<span style="font-size:12px">', '')
         , N'<span style="background-color:rgb(255, 255, 255)">', '')
         , N'<span style="color:rgb(70, 70, 70); font-family:맑은 고딕">', '')
         , N'<span style="background-color:rgb(255, 255, 255); color:rgb(34, 34, 34); font-family:나눔고딕,nanumgothic,helvetica neue,apple sd gothic neo,applegothic,돋움,dotum,sans-serif; font-size:12px">', '')
         , N'&nbsp;', '')
         , N' ', '')
         , CHAR(10), '')
         , CHAR(13), '') as ginfo
  into ijchoi.dbo.cr_goods_2
  from (
        select gserial
             , substring(ginfo, charindex(N'전성분', ginfo), len(ginfo) - charindex(N'전성분', ginfo)) as ginfo
          from ijchoi.dbo.cr_goods
       ) a
 where ginfo like N'%전성분%'
-- 972

select *
  from ijchoi.dbo.cr_goods_2
 order by ginfo desc


/*
-- 로우분리용 펑션 생성
use ijchoi
go

drop function [dbo].[SplitStrings]
go

CREATE FUNCTION [dbo].[SplitStrings]
(
   @List      NVARCHAR(MAX),
   @Delimiter NVARCHAR(255)
)
RETURNS TABLE
AS
RETURN (
        SELECT Item = CONVERT(nvarchar, Item)
          FROM (
                SELECT Item = x.i.value(N'(./text())[1]', N'nvarchar(max)')
                  FROM (
                        SELECT [XML] = CONVERT(XML, N'<i>' + REPLACE(@List, @Delimiter, N'</i><i>') + N'</i>').query(N'.')
                       ) AS a
                 CROSS APPLY [XML].nodes(N'i') AS x(i)
               ) AS y
         WHERE Item IS NOT NULL
       )
go
*/

select a.gserial, b.Item, a.ginfo
  from ijchoi.dbo.cr_goods_2 a
 cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
 order by b.Item;

select b.Item, count(*)
  from ijchoi.dbo.cr_goods_2 a
 cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
 group by b.Item
 order by b.Item;

select b.Item, count(distinct a.Gserial)
  from ijchoi.dbo.cr_goods_2 a
 cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
 group by b.Item
 order by count(distinct a.Gserial) desc;

판매중 : 2,969
전성분 : 972
정제및콤마 : 19,836
그룹핑 : 2,465