Submitted byCategory
Review Cycle
.
Public
Joachim Mutter/sysarc
on 01/02/2009 at 03:29 PM
SSiS\Code

Lookup Task replacement

Problem:


Control Flow


Data Flow Task "Fill Recordset"


Data Flow Task "Lookup Dataflow"





' This Script provides the ability to use a former filled recordset as lookup table
' If a Key of a given Input Row exist in the internal row, we add a row for the existingBuffer
' If not, we add a row for the missing Buffer

Imports System
Imports System.Data
Imports System.Math
Imports Microsoft.SqlServer.Dts.Pipeline.Wrapper
Imports Microsoft.SqlServer.Dts.Runtime.Wrapper


Public Class ScriptMain
    Inherits UserComponent

    Dim ContactTable As New Data.DataTable                                      ' Data.DataTable object to hold the reference dataset
    Private normalBuffer As Microsoft.SqlServer.Dts.Pipeline.PipelineBuffer     ' Buffer for valid rows
    Private errorBuffer As Microsoft.SqlServer.Dts.Pipeline.PipelineBuffer      ' Buffer for invalid rows

    ''Override the PreExecute method to intialize the Data.DataTable objects
    Public Overrides Sub PreExecute()
        MyBase.PreExecute()

        ' Initialize the Data.DataTable object to hold the reference dataset only if available.
        If Not Me.Variables.OLEDBRecordset.ToString() = "System.Object" Then
            Dim oledb As New Data.OleDb.OleDbDataAdapter

            ' Fill the Data.DataTable object from the reference dataset populated using the Execute SQL Task
            oledb.Fill(ContactTable, Me.Variables.OLEDBRecordset)

            ' Sort the column to enable Find method to lookup for the values in the reference column
            ContactTable.DefaultView.Sort = "ContactID ASC"

            System.Console.WriteLine("Count of Rows in internal lookup table : {0}", ContactTable.Rows.Count.ToString())
        End If
    End Sub

    ' A generic function to find input value in the reference dataset
    Private Function LookupReferenceValue(ByVal ContactID As Integer) As Boolean
        ' Lookup in reference dataset only if data exists in reference dataset
        If ContactTable.Rows.Count > 0 Then
            ' If the input column value is not found in the reference dataset then return false
            If ContactTable.DefaultView.Find(ContactID) < 0 Then Return False
        Else
            ' If the data does not exists in reference dataset then flag the row as invalid
            Return False
        End If
        Return True
    End Function

    Public Overrides Sub Input0_ProcessInputRow(ByVal Row As Input0Buffer)
        Dim isExisting As Boolean
        Dim comments As String

        If Row.ContactID_IsNull Then        'NOTE 1: The input column value cannot be NULL, this is an error
            isExisting = False
            comments = "Reference for ContactID is NULL"
        ElseIf Row.ContactID = 0 Then       'NOTE 2: If input column value cannot be 0, this is an error
            isExisting = False
            comments = "Reference for ContactID is 0"
        Else                                ' Lookup input column 'ContactID' value.
            isExisting = LookupReferenceValue(Row.ContactID)
            comments = "Reference for ContactID not found. "
        End If

        If isExisting Then                  ' is this an exisitng row?
            ExistingRowsBuffer.AddRow()     ' Add a new row to the Output2Buffer
            With ExistingRowsBuffer
                .ContactID = Row.ContactID
                .FirstName = Row.FirstName
                .Middlename = Row.MiddleName
                .LastName = Row.LastName
                .EmailAddress = Row.EmailAddress
            End With
        Else                                ' No, so use the error Output
            MissingRowsBuffer.AddRow()      ' Add a new row to the Output1Buffer
            With MissingRowsBuffer
                .Comments = comments
                .ContactID = Row.ContactID
            End With

            Dim newRow As DataRow
            newRow = ContactTable.NewRow()

            With newRow
                .Item(0) = Row.ContactID
                .Item(1) = Row.FirstName
                .Item(2) = Row.MiddleName
                .Item(3) = Row.LastName
                .Item(4) = Row.EmailAddress
            End With
            ContactTable.Rows.Add(newRow)
            ContactTable.DefaultView.Sort = "ContactID ASC"

            If LookupReferenceValue(Row.ContactID) = False Then
                MsgBox("Not really existent for " + Row.FirstName + " " + Row.LastName, , "Count = " + ContactTable.Rows.Count.ToString())
            End If
        End If
    End Sub

    Public Overrides Sub PrimeOutput(ByVal Outputs As Integer, ByVal OutputIDs() As Integer, ByVal Buffers() As Microsoft.SqlServer.Dts.Pipeline.PipelineBuffer)
        MyBase.PrimeOutput(Outputs, OutputIDs, Buffers)

        ' save underlying PipelineBuffer to allow for calls to PipelineBuffer methods
        If Outputs < 1 Then MsgBox("There is only one buffer, please define MissingRowsBuffer and ExistingRowsBuffer")
        errorBuffer = Buffers(0)
        normalBuffer = Buffers(1)
    End Sub
End Class



DTSX pacakge

Package.dtsx