patterncsharpMinor
Reading a text file that contains several CSV-like tables
Viewed 0 times
readingfiletablestextcsvlikeseveralcontainsthat
Problem
I wrote some extension methods to read CSV-styled text directly into a datatable or dataset or write them to this format. Is it right to use the methods as extensions or should I create a separate class that contains this functionality, and so on?
Formats explained:
The
The
where each table is read to a separate
`using System;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace CsvExtensions
{
///
/// Erweiterungen für die Klassen System.Data.DataTable und System.Data.DataSet
/// zum einlesen von an CSV angelehnten Daten direkt in eine Instanz dieser Typen
///
public static class DataCsvExtension
{
//Trennzeichen der einzelnen Spalten
private const char SEPERATOR = ';';
///
/// Liest die Daten einer CSV Datei ein
///
/// DataTable object
/// Pfad zur CSV Datei
public static void ReadFromCsv(this DataTable table, string filepath)
{
using (Stream filestream = File.Open(filepath, FileMode.Open))
{
table.ReadFromCsv(filestream);
}
}
///
/// Liest die Daten einer CSV Datei ein
///
/// DataTable object
/// Stream der CSV Datei
public static void ReadFromCsv(this DataTable table, Stream filestream)
{
table.Clear();
Encoding encoding = Encoding.UTF8; //Encoding.Default;
//if (Utf8Checker.IsUtf8(
Formats explained:
The
ReadFromCsv and WriteToCsv will work with a normal CSV style like:Column1;Column2;Column3;...
Value11;Value12;Value13;...
Value21;Value22;Value23;...
The
ReadFromSectionedCsv and WriteToSectionedCsv methods use a format like this:[Table1]
Column1;Column2;Column3;...
Value11;Value12;Value13;...
Value21;Value22;Value23;...
[Table2]
Column1;Column2;Column3;...
Value11;Value12;Value13;...
Value21;Value22;Value23;...
where each table is read to a separate
DataTable in the DataSet.`using System;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace CsvExtensions
{
///
/// Erweiterungen für die Klassen System.Data.DataTable und System.Data.DataSet
/// zum einlesen von an CSV angelehnten Daten direkt in eine Instanz dieser Typen
///
public static class DataCsvExtension
{
//Trennzeichen der einzelnen Spalten
private const char SEPERATOR = ';';
///
/// Liest die Daten einer CSV Datei ein
///
/// DataTable object
/// Pfad zur CSV Datei
public static void ReadFromCsv(this DataTable table, string filepath)
{
using (Stream filestream = File.Open(filepath, FileMode.Open))
{
table.ReadFromCsv(filestream);
}
}
///
/// Liest die Daten einer CSV Datei ein
///
/// DataTable object
/// Stream der CSV Datei
public static void ReadFromCsv(this DataTable table, Stream filestream)
{
table.Clear();
Encoding encoding = Encoding.UTF8; //Encoding.Default;
//if (Utf8Checker.IsUtf8(
Solution
if (array.Length < count)
{
string[] newArray = new string[count];
for (int s = 0; s<array.Length;s++)
{
newArray[s] = array[s];
}
for( int s = array.Length; s<count;s++)
{
newArray[s] = "";
}
array = newArray;
}You can use
Array.Resize to simplify this.if (array.Length < count)
{
var length = array.Length;
Array.Resize(ref array, count);
for (var i = length; i < array.Length; i++)
{
array[i] = string.Empty;
}
}if (line == null
|| (line.Trim() == "" || !line.Contains(SEPERATOR)
|| String.IsNullOrEmpty(line.Replace(';', ' ').Trim())))I think you want to be using
SEPERATOR instead of ; here.sw.WriteLine("[{0}]", table.TableName);
int numberOfColumns = table.Columns.Count;
for (int i = 0; i < numberOfColumns; i++)
{
sw.Write(table.Columns[i]);
if (i < numberOfColumns - 1)
sw.Write(SEPERATOR);
}
foreach (DataRow dr in table.Rows)
{
sw.WriteLine();
for (int i = 0; i < numberOfColumns; i++)
{
sw.Write(dr[i].ToString());
if (i < numberOfColumns - 1)
sw.Write(SEPERATOR);
}
}
sw.WriteLine();This can be simplified.
writer.WriteLine("[{0}]", table.TableName);
writer.WriteLine(string.Join(SEPERATOR.ToString(), table.Columns.Cast()));
foreach (DataRow row in table.Rows)
{
writer.WriteLine(string.Join(SEPERATOR.ToString(), row.ItemArray));
}How can I organize the code with respect to easy unit testing?
I would consider making the methods take a
TextReader (TextWriter) instead of a Stream. You can then pass a StringReader (StringWriter) from your unit tests, while client code will normally pass a StreamReader (StreamWriter). This will also allow client code to choose the encoding (which they really should be doing) instead of being forced to use UTF-8.Another reason to consider doing this is that someone calling
WriteToCsv might want to write to the stream after the call returns. But they will get an exception, since StreamWriter disposes of the underlying stream. For example, we get an ObjectDisposedException when we call WriteByte here:using (var stream = new MemoryStream())
{
using (var writer = new StreamWriter(stream))
{
}
stream.WriteByte(0);
}Finally, it makes code re-use a bit easier. For instance,
WriteToSectionedCsv can be written in terms of WriteToCsv.public static void WriteToCsv(this DataTable table, TextWriter writer)
{
writer.WriteLine(string.Join(SEPERATOR.ToString(), table.Columns.Cast()));
foreach (DataRow row in table.Rows)
{
writer.WriteLine(string.Join(SEPERATOR.ToString(), row.ItemArray));
}
}
public static void WriteToSectionedCsv(this DataSet dataSet, TextWriter writer)
{
foreach (DataTable table in dataSet.Tables)
{
writer.WriteLine("[{0}]", table.TableName);
table.WriteToCsv(writer);
}
}Code Snippets
if (array.Length < count)
{
string[] newArray = new string[count];
for (int s = 0; s<array.Length;s++)
{
newArray[s] = array[s];
}
for( int s = array.Length; s<count;s++)
{
newArray[s] = "";
}
array = newArray;
}if (array.Length < count)
{
var length = array.Length;
Array.Resize(ref array, count);
for (var i = length; i < array.Length; i++)
{
array[i] = string.Empty;
}
}if (line == null
|| (line.Trim() == "" || !line.Contains(SEPERATOR)
|| String.IsNullOrEmpty(line.Replace(';', ' ').Trim())))sw.WriteLine("[{0}]", table.TableName);
int numberOfColumns = table.Columns.Count;
for (int i = 0; i < numberOfColumns; i++)
{
sw.Write(table.Columns[i]);
if (i < numberOfColumns - 1)
sw.Write(SEPERATOR);
}
foreach (DataRow dr in table.Rows)
{
sw.WriteLine();
for (int i = 0; i < numberOfColumns; i++)
{
sw.Write(dr[i].ToString());
if (i < numberOfColumns - 1)
sw.Write(SEPERATOR);
}
}
sw.WriteLine();writer.WriteLine("[{0}]", table.TableName);
writer.WriteLine(string.Join(SEPERATOR.ToString(), table.Columns.Cast<DataColumn>()));
foreach (DataRow row in table.Rows)
{
writer.WriteLine(string.Join(SEPERATOR.ToString(), row.ItemArray));
}Context
StackExchange Code Review Q#83176, answer score: 8
Revisions (0)
No revisions yet.