• Marek Vavrovic

HuffnerTextile - SSIS Initial data load.


Examples of BI joins


-Dimensions


DimEmployee

Select

[EmployeeID] [EmployeeKey],

case

when [EmployeeID] = 2 then 1

when [EmployeeID] in (5,6,7,8,9,10,11,12,13,14) then 4

when [EmployeeID] = 16 then 15

when [EmployeeID] = 19 then 18

when [EmployeeID] in (21,22) then 20

when [EmployeeID] in (25,26,27,28) then 24

when [EmployeeID] in (1,3,4,15,17,18,20,23,24,30) then 29

when [EmployeeID] in (31,32) then 30

when [EmployeeID] = 33 then 31

when [EmployeeID] in (34,35,36,37,38,39,40) then 32

end as [ParentEmployeeKey],

e.[CountryID] [CountryKey],

[FirstName],

[LastName],

a.AddressLine CompanyAddressLine,

a.PostalCode,

a.City,

a.CityLatitude,

a.CityLongitude,

d.Name DepartmentName,

dg.Name DepartmentGroup,

e.StartDate HireDate,

[DOB] BirthDate,

[Gender],

[Status] MaritalStatus,

ee.EmployeesEmail,

ep.CountryPhoneCode,

ep.PhoneNumber,

e.[Active] CurrentFlag,

e.[StartDate],

e.[EndDate]

from HuffnerTextile.HumanResources.Employees e

left join HuffnerTextile.HumanResources.EmployeesEmail ee

on e.EmailID = ee.EmployeesEmailID

left join HuffnerTextile.HumanResources.EmployeesPhone ep

on e.PhoneID = ep.EmployeePhoneID

inner join HumanResources.Department d

on e.DepartmentID = d.DepartmentID

inner join HumanResources.DepartmentGroupName dg

on d.GroupNameID = dg.GroupNameID

inner join [HuffnerTextile].[Person].[Address] a

on e.AddressID = a.AddressID


This query is used to populate DimEmploye table which is the result of joining another 5 tables together. Each data flow uses a simple logic: from source to the destination. This is a source query.



DimSubDepotEmployee


DimSubDepotEmployee table contains information about the employees from Huffner's sub depots. There are ten of them and each employee has its boss. ParentPersonKey column doesn't exist in the transactional database, and it is created during the DW load using the case statement. A sample of the table lies under the script.

SELECT

[PersonID] [PersonKey]

, CASE

WHEN [PersonID] IN (2,3,4) THEN 1

WHEN [PersonID] IN (6,7,8) THEN 5

WHEN [PersonID] IN (10,11) THEN 9

WHEN [PersonID] IN (13,14,15) THEN 12

WHEN [PersonID] IN (17,18) THEN 16

WHEN [PersonID] IN (20,21) THEN 19

WHEN [PersonID] IN (23,24,25) THEN 22

WHEN [PersonID] IN (26,28,29) THEN 27

WHEN [PersonID] IN (31,32) THEN 30

WHEN [PersonID] IN (34,35,36,37) THEN 33

END AS ParentPersonKey

,[WarehouseID] [WarehouseKey]

,a.CountryID CountryKey

,[FirstName]

,[LastName]

,a.AddressLine CompanyAddressLine

,a.PostalCode

,a.City

,a.CityLatitude

,a.CityLongitude

,e.Email

,[Gender]

,t.ContactTitle

,[DOB] [BirthDate]

,p.[StartDate]

,p.[EndDate]

,p.[Active] [CurrentFlag]

FROM [HuffnerTextile].[Person].[WarehousePeople] p

inner join Person.Email e

on p.EmailID = e.EmailID

inner join Person.ContactType t

on p.ContactTypeID = t.ContactTypeID

inner join [HuffnerTextile].[Person].[Address] a

on p.AddressID = a.AddressID


DimSupplier


Select

[SupplierID] [SupplierKey],

[SupplierName] SupplierCompany,

c.FirstName,

c.LastName,

a.AddressLine CompanyAddressLine,

a.PostalCode,

a.City,

a.CityLatitude,

a.CityLongitude,

c.Gender,

c.DOB BirthDate,

p.CountryPhoneCode,

p.PhoneNumber,

e.SupplierEmail,

a.CountryID CountryKey

from Purchasing.Supplier s

inner join Purchasing.SupplierContact c

on s.ContactID = c.ContactID

inner join Purchasing.SupplierPhone p

on c.SupplierPhoneID = p.SupplierPhoneID

inner join Purchasing.SupplierEmail e

on c.SupplierEmailID = e.SupplierEmailID

inner join [Person].[Address] a

on s.AddressID = a.AddressID


sample data from the table

DimDate


DECLARE @StartDate date = '20110101';


DECLARE @CutoffDate date = DATEADD(DAY, -1, DATEADD(YEAR, 8, @StartDate));


;WITH seq(n) AS

(

SELECT 0

UNION ALL

SELECT n + 1 FROM seq

WHERE n < DATEDIFF(DAY, @StartDate, @CutoffDate)

),

d(d) AS

(

SELECT DATEADD(DAY, n, @StartDate) FROM seq

),

src AS

(

SELECT

[DateKey] = CONVERT(date, d),

[Day] = DATEPART(DAY, d),

[DayName] = DATENAME(WEEKDAY, d),

[Week] = DATEPART(WEEK, d),

[DayOfWeek] = DATEPART(WEEKDAY, d),

[Month] = DATEPART(MONTH, d),

[MonthName] = DATENAME(MONTH, d),

[Quarter] = DATEPART(Quarter, d),

[Year] = DATEPART(YEAR, d),

TheFirstOfMonth = DATEFROMPARTS(YEAR(d), MONTH(d), 1)

FROM d

),

dim AS

(

SELECT

DateSK = CONVERT(char(8), [DateKey], 112),

[DateKey],

[Day],

[DayName],

[DayOfWeek],

IsWeekend = CASE WHEN [DayOfWeek] IN (CASE @@DATEFIRST WHEN 1 THEN 6 WHEN 7 THEN 1 END,7)

THEN 1 ELSE 0 END,

[Week],

[Month],

[MonthName],

[Quarter],

QuarterDescr = CONCAT('Q',[Quarter]),

YearQuarterDescr =CONCAT(CONVERT(char(4), [Year]),CONCAT('Q',[Quarter])),

YearQuarterSort =CONCAT([Year],0,[Quarter]),

[Year],

IsLeapYear = CONVERT(bit, CASE WHEN ([Year] % 400 = 0)

OR ([Year] % 4 = 0 AND [Year] % 100 <> 0)

THEN 1 ELSE 0 END),

YYYYMMSort =CONVERT(char(6), [DateKey], 112),

YYYYMMDescr =CONCAT(CONVERT(char(4), [Year]),'_',FORMAT([datekey],'MMM'))



FROM src

)

SELECT * INTO HuffnerTextileDW.dbo.DimDate FROM dim

ORDER BY [DateKey]

OPTION (MAXRECURSION 0);


Fact table load (initial)


FactProductProduction


SELECT

CONVERT(char(8), [DateOfManufacture], 112) [DateOfManufactureKey],

[ProductProductionID] [ProductProductionKey]

,[SupplierID] [SupplierKey]

,[ChannelID] [ChanneKey]

,[ProductID] [ProductKey]

,ProductPrice

,ProductCost

,ProductWeight ProductWeight_g

,[RunningMeters]

,([RunningMeters]-[Defected]) Undefected

,[Defected]

,[DefectID] [DefectKey]

,[ProductPrice] * [RunningMeters] [RunningMetersTotalPrice]

,[ProductCost] * [RunningMeters] [RunningMetersTotalCost]

,Convert(decimal(8,2),([ProductWeight] * [RunningMeters]) /1000) [RunningMetersTotalWeight_kg]

,Convert(decimal(8,2),([RunningMeters]-[Defected]) * ProductPrice) UndefectedTotalPrice

,Convert(decimal(8,2),([RunningMeters]-[Defected]) * ProductCost) UndefectedTotalCost

,Convert(decimal(8,2),(([RunningMeters]-[Defected]) * ProductWeight) / 1000) UndefectedTotalWeight_kg

,Convert(decimal(8,2),([Defected] * ProductPrice)) DefectedTotalPrice

,Convert(decimal(8,2),([Defected] * ProductCost)) DefectedTotalCost

,Convert(decimal(8,2),([Defected] * ProductWeight) / 1000) DefectedTotalWeight_kg

,[DateOfManufacture]

FROM [HuffnerTextile].[Production].[ProductProduction]


Data flow in SSIS uses OLE DB Source and destination.

transaction table, sample data: This table doesn't contain any calculations. The calculations are done during the data loading.

data warehouse table, sample data:

FactSubDepotSales


SELECT

CONVERT(char(8), [ShipDate], 112) [ShipDateKey]

,[ShipDate]

,[SubDepotOrderID] [SubDepotOrderKey]

,[StockInventoryID] [StockInventoryKey]

,[EmployeeID] [EmployeeKey]

,DeliveryMethodID DeliveryMethodKey

,PaymentMethodID PaymentMethodKey

,s.[ProductID] [ProductKey]

,[WarehouseID] [WarehouseKey]

,[CountryID] [CountryKey]

,[PersonID] [PersonKey]

,[ChannelID] [ChannelKey]

,[Defected]

,[ProductLength]

,[Quantity]

,[SubTotal]

,[TaxAmount]

,[Discount] [Discount_pct]

,[TotalDue]

,Convert(decimal(6, 2),([ProductLength] * p.ProductWeight) / 1000) ProductWeight_kg

,[OrderDate]

,CONVERT(char(8), [OrderDate], 112) [OrderDateKey]

,[DateOfManufacture]

,CONVERT(char(8), [DateOfManufacture], 112) [DateOfManufactureKey]

FROM [HuffnerTextile].[Sales].[SubDepotSales] s

inner join [HuffnerTextile].Production.Product p

on s.ProductID = p.ProductID


All the ID's columns are aliased as Key columns so the mapping in SSIS is easier. This is a standard naming convention for primary or foreign key columns in DW.


FactClientSales


SELECT

CONVERT(char(8), [DeliveryDate], 112) [DeliveryDateKey]

,[DeliveryDate]

,[ClientOrderID] [ClientOrderKey]

,[EmployeeID] [EmployeeKey]

,DeliveryMethodID DeliveryMethodKey

,PaymentMethodID PaymentMethodKey

,[WarehouseID] [WarehouseKey]

,[CountryID] [CountryKey]

,[PersonID] [PersonKey]

,[ClientID] [ClientKey]

,[ChannelID] [ChannelKey]

,[Defected]

,p.[ProductID] [ProductKey]

,[ProductLength]

,[Quantity]

,[SubTotal]

,[TaxAmount]

,[TotalDue]

,Convert(decimal(6, 2),([ProductLength] * p.ProductWeight) / 1000) ProductWeight_kg

,[OrderDate]

,[DateOfManufacture]

,[ShipDate]

,CONVERT(char(8), [OrderDate], 112) [OrderDateKey]

,CONVERT(char(8), [DateOfManufacture], 112) [DateOfManufactureKey]

,CONVERT(char(8), [ShipDate], 112) [ShipDateKey]

FROM [HuffnerTextile].[Sales].[ClientSales] c

inner join [HuffnerTextile].Production.Product p

on c.ProductID = p.ProductID


If you have a look on the table, you can see that all the foreign keys are on the left side of the table and facts are on the right side. When a tabular model is created it is easier to hide these unnecessary columns from the end user, because they are all next to each other.


22 views0 comments