EDN Admin
Well-known member
Hi Everyone,
I started using parallel programming a couple of months ago to boost up my reporting performance.
And guess what it work ..great.. the results are slight 40% performance boost in generating report time. However i get the feeling that Im only scratching the surface with parallel programming.. hence in my quest to excel in this stuff..Im learning how to run loops in parallel.. and thus far i hit a snag.. my results started to output duplicates as per below:
Looping on a date range...
Process InnerQuery4 ImportRow for the date==> 09/29/2012 12:00:00 AM==>211...30599...Duration(30.599) secs
Process InnerQuery4 ImportRow for the date==> 10/01/2012 12:00:00 AM==>211...30767...Duration(30.767) secs
Process InnerQuery4 ImportRow for the date==> 10/01/2012 12:00:00 AM==>1537...32868...Duration(32.868) secs
Process InnerQuery4 ImportRow for the date==> 10/02/2012 12:00:00 AM==>1537...32944...Duration(32.944) secs
Process InnerQuery4 ImportRow for the date==> 10/02/2012 12:00:00 AM==>1580...35089...Duration(35.089) secs
Process InnerQuery4 ImportRow for the date==> 10/03/2012 12:00:00 AM==>1580...35146...Duration(35.146) secs
Process InnerQuery4 ImportRow for the date==> 10/03/2012 12:00:00 AM==>903...37425...Duration(37.425) secs
...
part of the code is to loop range of dates from 29 Sept 2012 to 30 Nov 2013. The duplicates are underline below. Why do the same dates repeat? My computer is running a 64bit dual core processor. ([01]: Intel64 Family 6 Model 42 Stepping 7 GenuineIntel ~1595 Mhz)
I dont understand why there are repeated dates.
The source code is further below, what am i doing wrong? i suspect it is due to some objects that are not entirely thread-safe, but if Im am right, how do i make it thread safe. and not return duplicates
Dim cores As Integer = Environment.ProcessorCount
Dim counts As Integer() = New Integer(cores - 1) {}
Parallel.For(0, cores, Function(i)
For Parallel Processing, please define the variables as local as possible
Dim dtHierarchyLocal As New DataTable("CallingResultOutCodeCodeWithHierarchy")
Dim objAccountIDL As New DataColumn("AccountID", GetType(System.Int32))
Dim objStaffCodeL As New DataColumn("StaffCode", GetType(System.String))
Dim objStatusL As New DataColumn("Status", GetType(System.String))
Dim objCallingResultOutCodeL As New DataColumn("CallingResultOutCode", GetType(System.String))
Dim objDateCreatedL As New DataColumn("DateCreated", GetType(System.DateTime))
Dim objListBatchIDL As New DataColumn("ListBatchID", GetType(System.Int32))
Dim objListBatchNameL As New DataColumn("ListBatchName", GetType(System.String))
Dim objListBatchBatchL As New DataColumn("ListPGBatch", GetType(System.String))
Dim objROW_IDL As New DataColumn("ROW_ID", GetType(System.Int64))
dtHierarchyLocal.Columns.Add(objAccountIDL)
dtHierarchyLocal.Columns.Add(objStaffCodeL)
dtHierarchyLocal.Columns.Add(objStatusL)
dtHierarchyLocal.Columns.Add(objCallingResultOutCodeL)
dtHierarchyLocal.Columns.Add(objDateCreatedL)
dtHierarchyLocal.Columns.Add(objListBatchIDL)
dtHierarchyLocal.Columns.Add(objListBatchNameL)
dtHierarchyLocal.Columns.Add(objListBatchBatchL)
dtHierarchyLocal.Columns.Add(objROW_IDL)
Dim dtCallingResultOutCodeCodeClone As DataTable = dtCallingResultOutCodeCode.Copy
Dim dtStrMonNmCallingResultOutCodeCampClone As DataTable = dtStrMonNmCallingResultOutCodeCamp.Copy
Dim dtV2MNClone As DataTable = dtV2MN.Copy
For j As Integer = 0 To obc.Count - 1
dateEndDate = obc.Item(j).Field(Of DateTime)("Date")
strError = "Process InnerQuery1"
Dim InnerQuery1 =
From v In dtV2MNClone.AsEnumerable()
Where v("DateCreated") <= dateEndDate
Select New With {.AccountID = v.Field(Of Integer)("AccountID")}
Dim dtInnerQuery1 As New DataTable("dtInnerQuery1")
dtInnerQuery1 = InnerQuery1.CopyToDataTable
Dim InnerQuery2 =
From v In dtStrMonNmCallingResultOutCodeCampClone.AsEnumerable()
Join r In dtCallingResultOutCodeCodeClone.AsEnumerable() On v("ROW_ID") Equals r("ROW_ID")
Join q1 In dtInnerQuery1.AsEnumerable() On q1.Field(Of Integer)("AccountID") Equals v.Field(Of Integer)("AccountID")
Where (v("DateCreated") = dateEndDate And v("Status") = "IN PROGRESS")
Select New With {
.AccountID = v.Field(Of Integer)("AccountID"),
.DateCreated = dateEndDate,
.RSHierarchy = r.Field(Of Integer)("RSHierarchy"),
.CallingResultOutCode = v.Field(Of String)("CallingResultOutCode"),
.V2MID = v.Field(Of Integer)("V2MID")
}
Dim dtInnerQuery2 As New DataTable("dtInnerQuery2")
dtInnerQuery2 = InnerQuery2.CopyToDataTable
Dim InnerQuery3 =
From q2 In dtInnerQuery2.AsEnumerable()
Group q2 By AccountID = q2.Field(Of Integer)("AccountID"),
DateCreated = q2.Field(Of DateTime)("DateCreated") Into g = Group _
Select New With {
.AccountID = AccountID,
.DateCreated = DateCreated,
.RSHierarchy = g.Max(Function(q2) q2.Field(Of Integer)("RsHierarchy")),
.V2MID = g.Max(Function(q2) q2.Field(Of Integer)("V2MID"))
}
Dim dtInnerQuery3 As New DataTable("InnerQuery3")
dtInnerQuery3 = InnerQuery3.CopyToDataTable
Dim InnerQuery4 =
From v In dtStrMonNmCallingResultOutCodeCampClone.AsEnumerable
Join r In dtCallingResultOutCodeCodeClone.AsEnumerable On v.Field(Of Long)("ROW_ID") Equals r.Field(Of Long)("ROW_ID")
Join q3 In dtInnerQuery3.AsEnumerable
On q3("AccountID") Equals v.Field(Of Integer)("AccountID") _
And q3("RSHierarchy") Equals r.Field(Of Integer)("RsHierarchy")
Join c In dtV2ListBatch.AsEnumerable On c.Field(Of Integer)("ListBatchID") Equals v.Field(Of Integer)("ListBatchID")
Let StaffCode = (From v2 In dtV2MN.AsEnumerable
Where v2.Field(Of Integer)("V2MID") = v.Field(Of Integer)("V2MID")
Select v2.Field(Of String)("StaffCode"))
Select New With {
.AccountID = v.Field(Of Integer)("AccountID"),
.StaffCode = v.Field(Of String)("StaffCode"),
.Status = v.Field(Of String)("Status"),
.CallingResultOutCode = r.Field(Of String)("CallingResultOutCodeCode"),
.DateCreated = v.Field(Of DateTime)("DateCreated"),
.ListBatchID = c.Field(Of Integer)("ListBatchID"),
.ListBatchName = c.Field(Of String)("ListBatchName"),
.ListPGBatch = c.Field(Of String)("ListPGBatch")
}
Dim dtHieLoopResult As DataTable = InnerQuery4.CopyToDataTable
For Each drSource As DataRow In dtHieLoopResult.AsEnumerable
strError = "Process InnerQuery4 ImportRow for the date==> " + dateEndDate.ToString
dtHierarchyLocal.ImportRow(drSource)
Next
oTime.Stop()
Console.WriteLine(strError + "==>" + dtHieLoopResult.Rows.Count.ToString + "..." + oTime.ElapsedMilliseconds.ToString + "...Duration(" + (oTime.ElapsedMilliseconds / 1000).ToString + ") secs")
oTime.Start()
Next
dtHierarchy.Merge(dtHierarchyLocal)
Return (dtHierarchy)
End Function)
View the full article
I started using parallel programming a couple of months ago to boost up my reporting performance.
And guess what it work ..great.. the results are slight 40% performance boost in generating report time. However i get the feeling that Im only scratching the surface with parallel programming.. hence in my quest to excel in this stuff..Im learning how to run loops in parallel.. and thus far i hit a snag.. my results started to output duplicates as per below:
Looping on a date range...
Process InnerQuery4 ImportRow for the date==> 09/29/2012 12:00:00 AM==>211...30599...Duration(30.599) secs
Process InnerQuery4 ImportRow for the date==> 10/01/2012 12:00:00 AM==>211...30767...Duration(30.767) secs
Process InnerQuery4 ImportRow for the date==> 10/01/2012 12:00:00 AM==>1537...32868...Duration(32.868) secs
Process InnerQuery4 ImportRow for the date==> 10/02/2012 12:00:00 AM==>1537...32944...Duration(32.944) secs
Process InnerQuery4 ImportRow for the date==> 10/02/2012 12:00:00 AM==>1580...35089...Duration(35.089) secs
Process InnerQuery4 ImportRow for the date==> 10/03/2012 12:00:00 AM==>1580...35146...Duration(35.146) secs
Process InnerQuery4 ImportRow for the date==> 10/03/2012 12:00:00 AM==>903...37425...Duration(37.425) secs
...
part of the code is to loop range of dates from 29 Sept 2012 to 30 Nov 2013. The duplicates are underline below. Why do the same dates repeat? My computer is running a 64bit dual core processor. ([01]: Intel64 Family 6 Model 42 Stepping 7 GenuineIntel ~1595 Mhz)
I dont understand why there are repeated dates.
The source code is further below, what am i doing wrong? i suspect it is due to some objects that are not entirely thread-safe, but if Im am right, how do i make it thread safe. and not return duplicates
Dim cores As Integer = Environment.ProcessorCount
Dim counts As Integer() = New Integer(cores - 1) {}
Parallel.For(0, cores, Function(i)
For Parallel Processing, please define the variables as local as possible
Dim dtHierarchyLocal As New DataTable("CallingResultOutCodeCodeWithHierarchy")
Dim objAccountIDL As New DataColumn("AccountID", GetType(System.Int32))
Dim objStaffCodeL As New DataColumn("StaffCode", GetType(System.String))
Dim objStatusL As New DataColumn("Status", GetType(System.String))
Dim objCallingResultOutCodeL As New DataColumn("CallingResultOutCode", GetType(System.String))
Dim objDateCreatedL As New DataColumn("DateCreated", GetType(System.DateTime))
Dim objListBatchIDL As New DataColumn("ListBatchID", GetType(System.Int32))
Dim objListBatchNameL As New DataColumn("ListBatchName", GetType(System.String))
Dim objListBatchBatchL As New DataColumn("ListPGBatch", GetType(System.String))
Dim objROW_IDL As New DataColumn("ROW_ID", GetType(System.Int64))
dtHierarchyLocal.Columns.Add(objAccountIDL)
dtHierarchyLocal.Columns.Add(objStaffCodeL)
dtHierarchyLocal.Columns.Add(objStatusL)
dtHierarchyLocal.Columns.Add(objCallingResultOutCodeL)
dtHierarchyLocal.Columns.Add(objDateCreatedL)
dtHierarchyLocal.Columns.Add(objListBatchIDL)
dtHierarchyLocal.Columns.Add(objListBatchNameL)
dtHierarchyLocal.Columns.Add(objListBatchBatchL)
dtHierarchyLocal.Columns.Add(objROW_IDL)
Dim dtCallingResultOutCodeCodeClone As DataTable = dtCallingResultOutCodeCode.Copy
Dim dtStrMonNmCallingResultOutCodeCampClone As DataTable = dtStrMonNmCallingResultOutCodeCamp.Copy
Dim dtV2MNClone As DataTable = dtV2MN.Copy
For j As Integer = 0 To obc.Count - 1
dateEndDate = obc.Item(j).Field(Of DateTime)("Date")
strError = "Process InnerQuery1"
Dim InnerQuery1 =
From v In dtV2MNClone.AsEnumerable()
Where v("DateCreated") <= dateEndDate
Select New With {.AccountID = v.Field(Of Integer)("AccountID")}
Dim dtInnerQuery1 As New DataTable("dtInnerQuery1")
dtInnerQuery1 = InnerQuery1.CopyToDataTable
Dim InnerQuery2 =
From v In dtStrMonNmCallingResultOutCodeCampClone.AsEnumerable()
Join r In dtCallingResultOutCodeCodeClone.AsEnumerable() On v("ROW_ID") Equals r("ROW_ID")
Join q1 In dtInnerQuery1.AsEnumerable() On q1.Field(Of Integer)("AccountID") Equals v.Field(Of Integer)("AccountID")
Where (v("DateCreated") = dateEndDate And v("Status") = "IN PROGRESS")
Select New With {
.AccountID = v.Field(Of Integer)("AccountID"),
.DateCreated = dateEndDate,
.RSHierarchy = r.Field(Of Integer)("RSHierarchy"),
.CallingResultOutCode = v.Field(Of String)("CallingResultOutCode"),
.V2MID = v.Field(Of Integer)("V2MID")
}
Dim dtInnerQuery2 As New DataTable("dtInnerQuery2")
dtInnerQuery2 = InnerQuery2.CopyToDataTable
Dim InnerQuery3 =
From q2 In dtInnerQuery2.AsEnumerable()
Group q2 By AccountID = q2.Field(Of Integer)("AccountID"),
DateCreated = q2.Field(Of DateTime)("DateCreated") Into g = Group _
Select New With {
.AccountID = AccountID,
.DateCreated = DateCreated,
.RSHierarchy = g.Max(Function(q2) q2.Field(Of Integer)("RsHierarchy")),
.V2MID = g.Max(Function(q2) q2.Field(Of Integer)("V2MID"))
}
Dim dtInnerQuery3 As New DataTable("InnerQuery3")
dtInnerQuery3 = InnerQuery3.CopyToDataTable
Dim InnerQuery4 =
From v In dtStrMonNmCallingResultOutCodeCampClone.AsEnumerable
Join r In dtCallingResultOutCodeCodeClone.AsEnumerable On v.Field(Of Long)("ROW_ID") Equals r.Field(Of Long)("ROW_ID")
Join q3 In dtInnerQuery3.AsEnumerable
On q3("AccountID") Equals v.Field(Of Integer)("AccountID") _
And q3("RSHierarchy") Equals r.Field(Of Integer)("RsHierarchy")
Join c In dtV2ListBatch.AsEnumerable On c.Field(Of Integer)("ListBatchID") Equals v.Field(Of Integer)("ListBatchID")
Let StaffCode = (From v2 In dtV2MN.AsEnumerable
Where v2.Field(Of Integer)("V2MID") = v.Field(Of Integer)("V2MID")
Select v2.Field(Of String)("StaffCode"))
Select New With {
.AccountID = v.Field(Of Integer)("AccountID"),
.StaffCode = v.Field(Of String)("StaffCode"),
.Status = v.Field(Of String)("Status"),
.CallingResultOutCode = r.Field(Of String)("CallingResultOutCodeCode"),
.DateCreated = v.Field(Of DateTime)("DateCreated"),
.ListBatchID = c.Field(Of Integer)("ListBatchID"),
.ListBatchName = c.Field(Of String)("ListBatchName"),
.ListPGBatch = c.Field(Of String)("ListPGBatch")
}
Dim dtHieLoopResult As DataTable = InnerQuery4.CopyToDataTable
For Each drSource As DataRow In dtHieLoopResult.AsEnumerable
strError = "Process InnerQuery4 ImportRow for the date==> " + dateEndDate.ToString
dtHierarchyLocal.ImportRow(drSource)
Next
oTime.Stop()
Console.WriteLine(strError + "==>" + dtHieLoopResult.Rows.Count.ToString + "..." + oTime.ElapsedMilliseconds.ToString + "...Duration(" + (oTime.ElapsedMilliseconds / 1000).ToString + ") secs")
oTime.Start()
Next
dtHierarchy.Merge(dtHierarchyLocal)
Return (dtHierarchy)
End Function)
View the full article