jump to navigation

INNER JOIN, LEFT OUTER JOINS and RIGHT OUTER JOIN for DataTables July 11, 2007

Posted by vmaceda in Computers and Internet.
trackback
I've been scavenging the net for resoures on how to do an inner join, right join or left join to a datatable but 
unfortunately dataTable has none native functions for .NET 1.1 and .NET 2.0. So I've manged to get one and tweak it a bit.
Below are the codes:
 
Imports Microsoft.VisualBasic
''' -----------------------------------------------------------------------------
''' Project: Reports.UI
''' Class   : JoinTable
''' 
''' -----------------------------------------------------------------------------
''' <summary>
''' This class will be use to join tables using inner, left and right outer joins and full outer join.
''' </summary>
''' <remarks>
''' </remarks>
''' <history>
'''  [Victor] 10/10/2006 Created
''' </history>
''' -----------------------------------------------------------------------------
Public Class JoinTable

#Region "Private Declarations"
    Private _dataSet As DataSet
    Private _columnList As ArrayList

#End Region

#Region "Properties"

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This Property is used to get and set the ColumnList Property that
    ''' conatains the list of columns in a DataTable
    ''' </summary>
    ''' <value></value>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/10/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Public Property ColumnList() As ArrayList
        Get
            Return _columnList
        End Get
        Set(ByVal Value As ArrayList)
            _columnList = Value
        End Set
    End Property

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This Property is used to get and set the DataSet that contains the dataTables
    ''' that will be used in the joining process.
    ''' </summary>
    ''' <value></value>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/10/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Public Property DataSet() As DataSet
        Get
            Return _dataSet
        End Get
        Set(ByVal Value As DataSet)
            _dataSet = Value
        End Set
    End Property

#End Region

#Region "Methods"

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This Function is used to return a DataTable which is the Result of the following joins:
    ''' INNER, LEFT OUTER JOIN, RIGHT OUTER JOIN and FULL JOIN
    ''' </summary>
    ''' <param name="leftTable">Is the first DataTable also known as the Left Table</param>
    ''' <param name="rightTable">Is the second DataTable also known as the Right Table</param>
    ''' <param name="leftColumn">Is the Column in the Left dataTable that is need for the join operation</param>
    ''' <param name="rightColumn">Is the Column in the Right dataTable that is need for the join operation</param>
    ''' <param name="join">It should be either: "INNER", "LEFT", "RIGHT", "FULL"</param>
    ''' <returns>Returns a DataTable which is the Result of the following joins:
    ''' INNER, LEFT OUTER JOIN, RIGHT OUTER JOIN and FULL JOIN
    ''' </returns>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/9/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Public Function JoinTables(ByVal leftTable As DataTable, _ 
                               ByVal rightTable As DataTable, _ 
                               ByVal leftColumn As String, _ 
                               ByVal rightColumn As String, _ 
                               ByVal join As joinType) As DataTable
        Dim dataRowLeft, dataRowRight, dataRowDestination, dataRowTemp, dataRows() As DataRow
        Dim table, leftDataTable, rightDataTable As DataTable
        Dim rightCol, leftCol, column As String
        Dim rightColumnList, leftColumnList As ArrayList
        Dim columnIndex As Integer

        ' Make sure both tables are in the DataSet.  You can't use Relationships 
        ' unless both tables are in the same DataSet.  We take them out at the end.
        If rightTable.TableName = "" Then
            rightTable.TableName = "__RIGHTTABLE__"
            DataSet.Tables.Add(rightTable)
        End If
        If leftTable.TableName = "" Then
            leftTable.TableName = "__LEFTTABLE__"
            DataSet.Tables.Add(leftTable)
        End If

        ' build the detached TempTable that will be returned
        table = BuildTempTable()
        leftDataTable = leftTable
        rightDataTable = rightTable
        leftCol = leftColumn
        rightCol = rightColumn

        ' what kind of join is this?
        Select Case join
            Case join.inner '"INNER"
            Case join.left  '"LEFT"
            Case join.right '"RIGHT"
                ' We don't really do right joins... we just flip the arguments and
                ' do a left join instead.
                leftDataTable = rightTable
                rightDataTable = leftTable
                leftCol = rightColumn
                rightCol = leftColumn
                'join = "LEFT"
                join = joinType.left
            Case join.full  '"FULL"
                ' A full join is kinda complicated... we do a right join, then a left,
                ' then combine the two together.  The optimum execution plan for a full 
                ' join should be a "merge" operation... but let's not make things any
                ' more complicated than they already are.
                Dim temp As DataTable
                temp = JoinTables(rightTable, leftTable, rightColumn, leftColumn, join.left) '"LEFT"
                table = JoinTables(leftTable, rightTable, leftColumn, rightColumn, join.left) '"LEFT"

                dataRows = temp.Select(TabColName(leftTable.TableName, leftColumn) & " is null")
                For Each dataRowRight In dataRows
                    table.ImportRow(dataRowRight)
                Next
                Return table
            Case Else
                Throw New ApplicationException("SQL syntax error: Unknown Join type '" & join & "'")
        End Select

        Try
            ' create a relationship between the two tables
            DataSet.Relations.Add(New DataRelation("__RELATIONSHIP__", rightDataTable.Columns(rightCol), leftDataTable.Columns(leftCol), False))
        Catch ex As Exception
            Throw
        End Try

        ' let's go!
        dataRowTemp = table.NewRow
        leftColumnList = GetTableColumns(leftDataTable.TableName)
        rightColumnList = GetTableColumns(rightDataTable.TableName)
        For Each dataRowLeft In leftDataTable.Rows

            ' Get the related rows from the "right" table
            dataRows = dataRowLeft.GetParentRows("__RELATIONSHIP__")

            ' For inner joins, we don't record anything unless there is a matching row
            If UBound(dataRows) >= 0 Or join <> joinType.inner Then '"INNER"
                dataRowDestination = table.NewRow

                ' Let's start by just copying the columns from the "left" table
                If leftDataTable.TableName = "__LEFTTABLE__" Then
                    dataRowDestination.ItemArray = dataRowLeft.ItemArray
                Else
                    For Each column In leftColumnList
                        dataRowDestination(TabColName(leftDataTable.TableName, column)) = dataRowLeft(column)
                    Next
                End If

                ' There are three possibilities... there are no matching rows, there is
                ' only one related row, there are many related rows.
                Select Case UBound(dataRows)
                    Case -1
                        ' Just record the row as it is now (with just the columns from
                        ' the left table).
                        table.Rows.Add(dataRowDestination)
                    Case 0
                        dataRowRight = dataRows(0)
                        If rightDataTable.TableName = "__RIGHTTABLE__" Then
                            For columnIndex = 0 To rightDataTable.Columns.Count - 1
                                ' fill in the holes, but do not overwrite the data that
                                ' came from the left table
                                If IsDBNull(dataRowDestination(columnIndex)) Then
                                    dataRowDestination(columnIndex) = dataRowRight(columnIndex)
                                End If
                            Next
                        Else
                            For Each column In rightColumnList
                                dataRowDestination(TabColName(rightDataTable.TableName, column)) = dataRowRight(column)
                            Next
                        End If
                        table.Rows.Add(dataRowDestination)
                    Case Else
                        ' Make a copy of the prototype datarow that we already filled in
                        ' above.  It already has the column data from the left table.
                        dataRowTemp.ItemArray = dataRowDestination.ItemArray
                        For Each dataRowRight In dataRows
                            dataRowDestination = table.NewRow

                            ' Copy prototype row (the left table's data)
                            dataRowDestination.ItemArray = dataRowTemp.ItemArray

                            ' Copy the columns from the related rows in the right table
                            If rightDataTable.TableName = "__RIGHTTABLE__" Then
                                For columnIndex = 0 To rightDataTable.Columns.Count - 1
                                    ' fill in the holes, but do not overwrite the data
                                    ' that came from the left table
                                    If IsDBNull(dataRowDestination(columnIndex)) Then
                                        dataRowDestination(columnIndex) = dataRowRight(columnIndex)
                                    End If
                                Next
                            Else
                                For Each column In rightColumnList
                                    dataRowDestination(TabColName(rightDataTable.TableName, column)) = dataRowRight(column)
                                Next
                            End If
                            table.Rows.Add(dataRowDestination)
                        Next
                End Select
            End If
        Next

        'Clean-Up

        ' delete the temporary relationship we created above
        DataSet.Relations.Remove("__RELATIONSHIP__")

        ' remove the temporary DataTables from the DataSet
        If leftTable.TableName = "__LEFTTABLE__" Then
            DataSet.Tables.Remove(leftTable)
        End If
        If rightTable.TableName = "__RIGHTTABLE__" Then
            DataSet.Tables.Remove(rightTable)
        End If
        Return table
    End Function

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This Method returns an ArrayList that Contains all the Columns of a DataTable
    ''' </summary>
    ''' <param name="table">A String that represents the name of the DataTable.</param>
    ''' <returns>Returns an ArrayList that Contains all the Columns of a DataTable</returns>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/9/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Private Function GetTableColumns(ByVal table As String) As ArrayList
        Dim ans As New ArrayList
        Dim column, columnName, tableName As String

        For Each column In ColumnList
            tableName = ParseNextToLastDot(column)
            columnName = ParseLastDot(column)

            If tableName = table Then
                ans.Add(columnName)
            End If
        Next
        Return ans
    End Function

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This Function returns a String object that contains the 
    ''' name of the column in the table.
    ''' </summary>
    ''' <param name="tableName">Name of the Table</param>
    ''' <param name="columnName">Name of the Column</param>
    ''' <returns>returns a String object that contains the 
    ''' name of the column in the table.
    ''' </returns>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/9/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Private Function TabColName(ByVal tableName As String, _ 
                                ByVal columnName As String) As String
        Return (tableName.Replace(" ", "_").Trim("[]".ToCharArray) & "_" & columnName.Replace(" ", "_").Trim("[]".ToCharArray))
    End Function

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This returns a parsed string given a Server.Database.Table.Column notation to ColumnName format
    ''' on the first to the last dot.
    ''' </summary>
    ''' <param name="buf"></param>
    ''' <returns>returns a parsed string object given a Server.Database.Table.Column notation to ColumnName format</returns>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/9/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Private Function ParseNextToLastDot(ByVal buf As String) As String
        Dim bufs(), ans As String
        If buf.IndexOf(".") > 0 Then
            bufs = buf.Split("."c)
            ans = bufs(UBound(bufs) - 1)
        End If
        Return ans
    End Function

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This returns a parsed string given a Server.Database.Table.Column notation to ColumnName format on \
    ''' the last dot.
    ''' </summary>
    ''' <param name="buf"></param>
    ''' <returns>returns a parsed string object given a Server.Database.Table.Column notation to ColumnName format</returns>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/9/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Private Function ParseLastDot(ByVal buf As String) As String
        Dim bufs(), ans As String
        If buf.IndexOf(".") > 0 Then
            bufs = buf.Split("."c)
            ans = bufs(UBound(bufs))
        Else
            ans = buf
        End If
        Return ans
    End Function

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This Function Returns a DataTable that serves as a Data Result from any of the JOIN 
    ''' Query.
    ''' </summary>
    ''' <returns>
    ''' Returns a DataTable that serves as a Data Result from any of the JOIN 
    ''' Query.
    ''' </returns>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/9/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Private Function BuildTempTable() As DataTable
        Dim table As New DataTable
        Dim col, col_in As DataColumn
        Dim column, TableName, ColumnName As String

        For Each column In ColumnList
            TableName = ParseNextToLastDot(column)
            ColumnName = ParseLastDot(column)

            If TableName = "" Then
                col = New DataColumn(ColumnName)
                col.DataType = GetType(Integer)
            Else
                col_in = DataSet.Tables(TableName).Columns(ColumnName)
                ' using full two-part table column name notation
                col = New DataColumn(TabColName(TableName, ColumnName))
                col.DataType = col_in.DataType
                col.MaxLength = col_in.MaxLength
            End If
            table.Columns.Add(col)
        Next
        col.Dispose()
        col_in.Dispose()
        Return table
    End Function
#End Region

#Region "Enumeration"

    ''' -----------------------------------------------------------------------------
    ''' <summary>
    ''' This enumaration contains the joins supported by this class
    ''' </summary>
    ''' <remarks>
    ''' </remarks>
    ''' <history>
    '''  [Victor] 10/10/2006 Created
    ''' </history>
    ''' -----------------------------------------------------------------------------
    Public Enum joinType
        inner
        left
        right
        full
    End Enum

#End Region

End Class

Comments»

1. Binay - February 24, 2012

Life saving code, can you also provide me the same code in C#

vmaceda - February 24, 2012

I’ve converted it to C# before but I lost it already :). You can easily compile this into DLL and let reflector convert this to C#. 🙂


Leave a comment