T-SQL Type 2 SCD с BINARY_CHECKSUM на лету - PullRequest
0 голосов
/ 11 января 2019

Я пытаюсь реализовать тип 2 SCD для моей работы в T-SQL. Я придерживаюсь замечательного подхода, приведенного в этой ссылке: https://www.mssqltips.com/sqlservertip/2883/using-the-sql-server-merge-statement-to-process-type-2-slowly-changing-dimensions/

Однако в моем случае в таблицах нет встроенного столбца BinaryCheckSum. Я должен вычислять то же самое на лету. Я публикую свой код ниже (с некоторыми образцами данных) для этой реализации.

Может ли кто-нибудь дать мне знать, если этот подход работает нормально? Или мне нужно иметь DimensionCheckSum в самом столбце? Кроме того, в моем случае мне не нужно суррогатное удостоверение личности. (Прошу прощения за любые синтаксические ошибки)

/*
--CREATING A SAMPLE SCD TABLE WITH NO DATA
CREATE TABLE SCD_ACCOUNT
(
     SCD_ACCOUNT_ID INT IDENTITY(1,1) NOT NULL,
     SCD_SRC_ID INT NULL,
     SCD_SF_SRC_ID INT NULL,
     SCD_SF_ATTR1 VARCHAR(100) NULL,
     SCD_SF_ATTR2 VARCHAR(100) NULL,
     SCD_SF_ATTR3 VARCHAR(100) NULL,
     SCD_SF_ATTR4 VARCHAR(100) NULL,
     SCD_SF_ATTR5 VARCHAR(100) NULL,
     SCD_EFF_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_EFF_DATE DEFAULT GETDATE(),
     SCD_END_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_END_DATE DEFAULT '12/31/9999',
     SCD_IS_ACTIVE VARCHAR(10) NULL CONSTRAINT DF_SCD_IS_ACTIVE DEFAULT 'Y',
     SCD_LAST_UPD_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_LAST_UPD_DATE DEFAULT GETDATE(),
     SCD_LAST_UPD_BY VARCHAR(100) NOT NULL CONSTRAINT DF_SCD_LAST_UPD_BY DEFAULT SUSER_SNAME());

SELECT * FROM SCD_ACCOUNT;

--CREATING A SOURCE TABLE WHICH WILL INSERT/UPDATE DATA INTO SCD TABLE
--THIS WILL HAVE THE SAME COLUMNS AS ABOVE TABLE EXCEPT SCD_ACCOUNT_ID, SCD_SRC_ID, SCD_IS_ACTIVE, SCD_EFF_DATE, SCD_END_DATE

CREATE TABLE SF_SRC
(
     SF_SRC_ID INT NULL,
     SF_ATTR1 VARCHAR(100) NULL,
     SF_ATTR2 VARCHAR(100) NULL,
     SF_ATTR3 VARCHAR(100) NULL,
     SF_ATTR4 VARCHAR(100) NULL,
     SF_ATTR5 VARCHAR(100) NULL);

--INSERTING DATA INTO SF_SRC TABLE

INSERT INTO SF_SRC VALUES(100,'Row1Attr1','Row1Attr2','Row1Attr3','Row1Attr4','Row1Attr5');
INSERT INTO SF_SRC VALUES(200,'Row2Attr1','Row2Attr2','Row2Attr3','Row2Attr4','Row2Attr5');
INSERT INTO SF_SRC VALUES(300,'Row3Attr1','Row3Attr2','Row3Attr3','Row3Attr4','Row3Attr5');

SELECT * FROM SF_SRC;

--DROP TABLE SCD_ACCOUNT;
--DROP TABLE SF_SRC;   
*/

-- BEGIN OF INSERT USING MERGE
INSERT INTO dbo.SCD_ACCOUNT
( --TABLE AND COLUMNS IN WHICH TO INSERT THE DATA
  SCD_SF_SRC_ID,
  SCD_SF_ATTR1,
  SCD_SF_ATTR2,
  SCD_SF_ATTR3,
  SCD_SF_ATTR4,
  SCD_SF_ATTR5,
  SCD_EFF_DATE,
  SCD_END_DATE,
  BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
)
-- SELECT THE ROWS/COLUMNS TO INSERT THAT ARE OUTPUT FROM THIS MERGE STATEMENT 
-- IN THIS EXAMPLE, THE ROWS TO BE INSERTED ARE THE ROWS THAT HAVE CHANGED (UPDATE).
SELECT    
  SCD_SF_SRC_ID,
  SCD_SF_ATTR1,
  SCD_SF_ATTR2,
  SCD_SF_ATTR3,
  SCD_SF_ATTR4,
  SCD_SF_ATTR5,
  SCD_EFF_DATE,
  SCD_END_DATE,
  BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
FROM
(
  -- THIS IS THE BEGINNING OF THE MERGE STATEMENT.
  -- THE TARGET MUST BE DEFINED, IN THIS EXAMPLE IT IS OUR SLOWLY CHANGING
  -- DIMENSION TABLE
  MERGE INTO dbo.SCD_ACCOUNT AS target
  -- THE SOURCE MUST BE DEFINED WITH THE USING CLAUSE
  USING 
  (
    -- THE SOURCE IS MADE UP OF THE ATTRIBUTE COLUMNS FROM THE STAGING TABLE.
    SELECT 
    SF_SRC_ID,
    SF_ATTR1,
    SF_ATTR2,
    SF_ATTR3,
    SF_ATTR4
    SF_ATTR5,
    BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
    FROM dbo.SF_SRC
  ) AS SOURCE 
  ( 
    SF_SRC_ID,
    SF_ATTR1,
    SF_ATTR2,
    SF_ATTR3,
    SF_ATTR4,
    SF_ATTR5,
    BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
  ) ON --WE ARE MATCHING ON THE SOURCESYSTEMID IN THE TARGET TABLE AND THE SOURCE TABLE.
  (
    TARGET.SCD_SF_SRC_ID = SOURCE.SF_SRC_ID
  )
  -- IF THE ID'S MATCH BUT THE CHECKSUMS ARE DIFFERENT, THEN THE RECORD HAS CHANGED;
  -- THEREFORE, UPDATE THE EXISTING RECORD IN THE TARGET, END DATING THE RECORD 
  -- AND SET THE CURRENTRECORD FLAG TO N
  WHEN MATCHED AND TARGET.BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SF_ATTR5) 
                <> SOURCE.BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) 
               AND TARGET.SCD_IS_ACTIVE = 'Y'

  THEN 
  UPDATE SET 
    SCD_END_DATE = GETDATE()-1, 
    SCD_IS_ACTIVE = 'N', 
    SCD_LAST_UPD_DATE = GETDATE(), 
    SCD_LAST_UPD_BY = SUSER_SNAME()
  -- IF THE ID'S DO NOT MATCH, THEN THE RECORD IS NEW;
  -- THEREFORE, INSERT THE NEW RECORD INTO THE TARGET USING THE VALUES FROM THE SOURCE.

  WHEN NOT MATCHED THEN  
  INSERT 
  (
      SCD_SF_SRC_ID,
      SCD_SF_ATTR1,
      SCD_SF_ATTR2,
      SCD_SF_ATTR3,
      SCD_SF_ATTR4,
      SCD_SF_ATTR5
  )
  VALUES 
  (
    SOURCE.SF_SRC_ID, 
    SOURCE.SF_ATTR1,
    SOURCE.SF_ATTR2,
    SOURCE.SF_ATTR3,
    SOURCE.SF_ATTR4,
    SOURCE.SF_ATTR5
  )
  OUTPUT $ACTION, 
    SOURCE.SF_SRC_ID, 
    SOURCE.SF_ATTR1,
    SOURCE.SF_ATTR2,
    SOURCE.SF_ATTR3,
    SOURCE.SF_ATTR4,
    SOURCE.SF_ATTR5,
    GETDATE(),
    '12/31/9999'
) -- THE END OF THE MERGE STATEMENT
--THE CHANGES OUTPUT BELOW ARE THE RECORDS THAT HAVE CHANGED AND WILL NEED
--TO BE INSERTED INTO THE SLOWLY CHANGING DIMENSION.
AS CHANGES
(
  ACTION, 
  SCD_SF_SRC_ID, 
  SCD_SF_ATTR1,
  SCD_SF_ATTR2,
  SCD_SF_ATTR3,
  SCD_SF_ATTR4,
  SCD_SF_ATTR5,
  SCD_EFF_DATE,
  SCD_END_DATE
)
WHERE ACTION='UPDATE';

1 Ответ

0 голосов
/ 14 января 2019

Мне удалось найти решение самостоятельно. Разделите это ниже на случай, если кому-то понадобится:

INSERT INTO dbo.SCD_ACCOUNT
(SCD_SF_SRC_ID,  SCD_SF_ATTR1,  SCD_SF_ATTR2,  SCD_SF_ATTR3,  SCD_SF_ATTR4,  SCD_SF_ATTR5,  SCD_EFF_DATE,  SCD_END_DATE)
SELECT SCD_SF_SRC_ID,  SCD_SF_ATTR1,  SCD_SF_ATTR2,  SCD_SF_ATTR3,  SCD_SF_ATTR4,  SCD_SF_ATTR5,  SCD_EFF_DATE,  SCD_END_DATE
FROM
(MERGE INTO dbo.SCD_ACCOUNT AS TARGET USING 
  (SELECT SF_SRC_ID,SF_ATTR1,SF_ATTR2,SF_ATTR3,SF_ATTR4,SF_ATTR5 FROM dbo.SF_SRC) AS SOURCE 
    (SF_SRC_ID,SF_ATTR1,SF_ATTR2,SF_ATTR3,SF_ATTR4, SF_ATTR5) ON 
    (TARGET.SCD_SF_SRC_ID = SOURCE.SF_SRC_ID)
  WHEN MATCHED AND BINARY_CHECKSUM(TARGET.SCD_SF_SRC_ID, TARGET.SCD_SF_ATTR1, TARGET.SCD_SF_ATTR2, TARGET.SCD_SF_ATTR3, TARGET.SCD_SF_ATTR4, TARGET.SCD_SF_ATTR5) 
                <> BINARY_CHECKSUM(SOURCE.SF_SRC_ID, SOURCE.SF_ATTR1, SOURCE.SF_ATTR2, SOURCE.SF_ATTR3, SOURCE.SF_ATTR4, SOURCE.SF_ATTR5) 
               AND TARGET.SCD_IS_ACTIVE = 'Y'
THEN UPDATE SET SCD_END_DATE = GETDATE()-1, SCD_IS_ACTIVE = 'N', SCD_LAST_UPD_DATE = GETDATE(), SCD_LAST_UPD_BY = SUSER_SNAME()
WHEN NOT MATCHED THEN INSERT (SCD_SF_SRC_ID,SCD_SF_ATTR1,SCD_SF_ATTR2,SCD_SF_ATTR3,SCD_SF_ATTR4,SCD_SF_ATTR5) VALUES 
        (SOURCE.SF_SRC_ID,SOURCE.SF_ATTR1,SOURCE.SF_ATTR2,SOURCE.SF_ATTR3,SOURCE.SF_ATTR4,SOURCE.SF_ATTR5)
         OUTPUT $ACTION, SOURCE.SF_SRC_ID,SOURCE.SF_ATTR1,SOURCE.SF_ATTR2,SOURCE.SF_ATTR3,SOURCE.SF_ATTR4,
         SOURCE.SF_ATTR5,GETDATE(),'12/31/9999')AS CHANGES
    (ACTION,SCD_SF_SRC_ID,SCD_SF_ATTR1,SCD_SF_ATTR2,SCD_SF_ATTR3,SCD_SF_ATTR4,SCD_SF_ATTR5,SCD_EFF_DATE,SCD_END_DATE)
WHERE ACTION='UPDATE';
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...