ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • 파싱 후 집계 샘플
    Physical 2021. 7. 16. 15:48
    -- 파싱 대상 테이블 생성(테이블들과 필터링조건은 생략)
    select d.gserial
         , convert(nvarchar(max), ginfo) as ginfo
      into ijchoi.dbo.cr_goods
      from xxx a with(nolock);
    -- 2939
    
    -- 아래 로우로 분리하는 펑션 사용시 <> 형태나 '&'의 html 태그가 있으면 에러 나므로
    -- 데이터를 변경하거나 펑션을 변경해야함.
    -- 정제
    select gserial
         , replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
             left(ginfo, charindex(N'</p>', ginfo) - 1), N'전성분표시', '')
             , N'전성분]', '')
             , N'전성분', '')
             , N'<br />', '')
             , N'</strong>', '')
             , N'</span>', '')
             , N'<strong>', '')
             , N'<span style="font-size:12px">', '')
             , N'<span style="background-color:rgb(255, 255, 255)">', '')
             , N'<span style="color:rgb(70, 70, 70); font-family:맑은 고딕">', '')
             , N'<span style="background-color:rgb(255, 255, 255); color:rgb(34, 34, 34); font-family:나눔고딕,nanumgothic,helvetica neue,apple sd gothic neo,applegothic,돋움,dotum,sans-serif; font-size:12px">', '')
             , N'&nbsp;', '')
             , N' ', '')
             , CHAR(10), '')
             , CHAR(13), '') as ginfo
      into ijchoi.dbo.cr_goods_2
      from (
            select gserial
                 , substring(ginfo, charindex(N'전성분', ginfo), len(ginfo) - charindex(N'전성분', ginfo)) as ginfo
              from ijchoi.dbo.cr_goods
           ) a
     where ginfo like N'%전성분%'
    -- 972
    
    select *
      from ijchoi.dbo.cr_goods_2
     order by ginfo desc
    
    
    /*
    -- 로우분리용 펑션 생성
    use ijchoi
    go
    
    drop function [dbo].[SplitStrings]
    go
    
    CREATE FUNCTION [dbo].[SplitStrings]
    (
       @List      NVARCHAR(MAX),
       @Delimiter NVARCHAR(255)
    )
    RETURNS TABLE
    AS
    RETURN (
            SELECT Item = CONVERT(nvarchar, Item)
              FROM (
                    SELECT Item = x.i.value(N'(./text())[1]', N'nvarchar(max)')
                      FROM (
                            SELECT [XML] = CONVERT(XML, N'<i>' + REPLACE(@List, @Delimiter, N'</i><i>') + N'</i>').query(N'.')
                           ) AS a
                     CROSS APPLY [XML].nodes(N'i') AS x(i)
                   ) AS y
             WHERE Item IS NOT NULL
           )
    go
    */
    
    select a.gserial, b.Item, a.ginfo
      from ijchoi.dbo.cr_goods_2 a
     cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
     order by b.Item;
    
    select b.Item, count(*)
      from ijchoi.dbo.cr_goods_2 a
     cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
     group by b.Item
     order by b.Item;
    
    select b.Item, count(distinct a.Gserial)
      from ijchoi.dbo.cr_goods_2 a
     cross apply ijchoi.dbo.SplitStrings(a.ginfo, N',') b
     group by b.Item
     order by count(distinct a.Gserial) desc;
    
    판매중 : 2,969
    전성분 : 972
    정제및콤마 : 19,836
    그룹핑 : 2,465

     

     

Designed by Tistory.