E
etl2016
Guest
Hello,
Is DataTable a good fit for large scale data processing, of order hundred thousand rows?
I have a scenario, prototyped as below, effectively to achieve two requirements, illustrated in if-else.
Scenario:
There is a DataTable. There is a uni dimensional list/array, of the same count. The values held in DataTable's particular column are to be replaced with corresponding positional values from the list. If nulls are found, these positional values need on-the-fly computation, resulting in a new list of {key, value} pair, parked aside.
The scanning of DataTable row by row is proving to be very slow. Is DataTable a good fit for such purposes? If yes, is there a more performant manner to achieve the same objective, to read/update the DataTable? If no, could you please advise about what alternate .net programming features do the trick here, thank you.
using System;
using System.Data;
using System.Collections.Generic;
namespace UpdateDataTable
{
class Program
{
static void Main(string[] args)
{
DataTable table = GetTable();
string[] CarNames = { "Volvo", "Tesla", null, null, "Ford" };
DataTable returnedTable = UpdateTable(table, CarNames);
for (int i = 0; i < returnedTable.Rows.Count; i++)
{
DataRow row = table.Rows;
Console.WriteLine(row["Car"]);
}
}
static DataTable GetTable()
{
DataTable table = new DataTable();
table.Columns.Add("ID", typeof(int));
table.Columns.Add("Car", typeof(string));
table.Columns.Add("Name", typeof(string));
table.Columns.Add("Date", typeof(DateTime));
table.Rows.Add(25, "Car A", "A", DateTime.Now);
table.Rows.Add(50, "Car B", "B", DateTime.Now);
table.Rows.Add(10, "Car C", "C", DateTime.Now);
table.Rows.Add(21, "Car D", "D", DateTime.Now);
table.Rows.Add(100, "Car E", "E", DateTime.Now);
return table;
}
static DataTable UpdateTable(DataTable table, string[] CarNames)
{
var list = new List<KeyValuePair<int, string>>();
for (int i=0; i < table.Rows.Count; i ++)
{
DataRow row = table.Rows;
if ( !string.IsNullOrEmpty (CarNames ) )
{
row["Car"] = CarNames; // Requirement-1 : to update datatable with non-nulls
}
else
{
row["Car"] = "hello"; //Requirement-2: Construct new value and pile them up in a list of pairs
list.Add(new KeyValuePair<int, string>(i, "hello"));
}
} // DataTable is updated row-by-row and is found very slow for large volumes
return table;
} // end of UpdateTable
} // end of class
}
thank you
Continue reading...
Is DataTable a good fit for large scale data processing, of order hundred thousand rows?
I have a scenario, prototyped as below, effectively to achieve two requirements, illustrated in if-else.
Scenario:
There is a DataTable. There is a uni dimensional list/array, of the same count. The values held in DataTable's particular column are to be replaced with corresponding positional values from the list. If nulls are found, these positional values need on-the-fly computation, resulting in a new list of {key, value} pair, parked aside.
The scanning of DataTable row by row is proving to be very slow. Is DataTable a good fit for such purposes? If yes, is there a more performant manner to achieve the same objective, to read/update the DataTable? If no, could you please advise about what alternate .net programming features do the trick here, thank you.
using System;
using System.Data;
using System.Collections.Generic;
namespace UpdateDataTable
{
class Program
{
static void Main(string[] args)
{
DataTable table = GetTable();
string[] CarNames = { "Volvo", "Tesla", null, null, "Ford" };
DataTable returnedTable = UpdateTable(table, CarNames);
for (int i = 0; i < returnedTable.Rows.Count; i++)
{
DataRow row = table.Rows;
Console.WriteLine(row["Car"]);
}
}
static DataTable GetTable()
{
DataTable table = new DataTable();
table.Columns.Add("ID", typeof(int));
table.Columns.Add("Car", typeof(string));
table.Columns.Add("Name", typeof(string));
table.Columns.Add("Date", typeof(DateTime));
table.Rows.Add(25, "Car A", "A", DateTime.Now);
table.Rows.Add(50, "Car B", "B", DateTime.Now);
table.Rows.Add(10, "Car C", "C", DateTime.Now);
table.Rows.Add(21, "Car D", "D", DateTime.Now);
table.Rows.Add(100, "Car E", "E", DateTime.Now);
return table;
}
static DataTable UpdateTable(DataTable table, string[] CarNames)
{
var list = new List<KeyValuePair<int, string>>();
for (int i=0; i < table.Rows.Count; i ++)
{
DataRow row = table.Rows;
if ( !string.IsNullOrEmpty (CarNames ) )
{
row["Car"] = CarNames; // Requirement-1 : to update datatable with non-nulls
}
else
{
row["Car"] = "hello"; //Requirement-2: Construct new value and pile them up in a list of pairs
list.Add(new KeyValuePair<int, string>(i, "hello"));
}
} // DataTable is updated row-by-row and is found very slow for large volumes
return table;
} // end of UpdateTable
} // end of class
}
thank you
Continue reading...