patterncsharpModerate
Processing a large number of files in a folder
Viewed 0 times
numberlargefilesfolderprocessing
Problem
I have a folder which contains lots of files. I need to write C# code that will open and read and display the content of it. Is this efficient code or should something be changed?
```
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security;
using System.Threading;
using System.Threading.Tasks;
using System.Text;
using System.Collections;
class Program
{
static void Main()
{
var sw = Stopwatch.StartNew();
ProcessRead().Wait();
Console.Write("Done ");
Console.WriteLine("Elapsed Time" + sw.ElapsedMilliseconds+"and"+sw.ElapsedTicks);
Console.ReadKey();
}
static async Task ProcessRead()
{
string folder = @"Directory";
string[] fileEntries = Directory.GetFiles(folder);
int count = 0;
foreach (string fname in fileEntries)
{
if (File.Exists(fname) == false)
{
Console.WriteLine("file not found: " + fname);
}
else
{
try
{
count++;
string text = await ReadTextAsync(fname);
Console.WriteLine(text);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
Console.WriteLine(count);
}
static async Task ReadTextAsync(string filePath)
{
using (FileStream sourceStream = new FileStream(filePath,
FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: 4096, useAsync: true))
{
StringBuilder sb = new StringBuilder();
byte[] buffer = new byte[0x1000];
int numRead;
while ((numRead = await sourceStream.ReadAsync(buffer, 0, buffer.Length)) != 0)
{
string text = Encoding.UTF8.GetString(buffer, 0, numRead);
```
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security;
using System.Threading;
using System.Threading.Tasks;
using System.Text;
using System.Collections;
class Program
{
static void Main()
{
var sw = Stopwatch.StartNew();
ProcessRead().Wait();
Console.Write("Done ");
Console.WriteLine("Elapsed Time" + sw.ElapsedMilliseconds+"and"+sw.ElapsedTicks);
Console.ReadKey();
}
static async Task ProcessRead()
{
string folder = @"Directory";
string[] fileEntries = Directory.GetFiles(folder);
int count = 0;
foreach (string fname in fileEntries)
{
if (File.Exists(fname) == false)
{
Console.WriteLine("file not found: " + fname);
}
else
{
try
{
count++;
string text = await ReadTextAsync(fname);
Console.WriteLine(text);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
Console.WriteLine(count);
}
static async Task ReadTextAsync(string filePath)
{
using (FileStream sourceStream = new FileStream(filePath,
FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: 4096, useAsync: true))
{
StringBuilder sb = new StringBuilder();
byte[] buffer = new byte[0x1000];
int numRead;
while ((numRead = await sourceStream.ReadAsync(buffer, 0, buffer.Length)) != 0)
{
string text = Encoding.UTF8.GetString(buffer, 0, numRead);
Solution
- You could use
Directory.EnumerateFilesto allow processing of each path without loading all the paths to memory.
- It is not required to check if the file exists because you just checked that (getting the path with
GetFiles).
- Instead of implementing your own
ReadTextAsync, just useFile.ReadAllText.
- There is no need to use a new synchronization context (async call) for each file. If you want to process the files in the background, it is better processing all files in one single Task than using one task for each file. Remember that each context switch produces a little overhead.
-
If you want to use an async API that way, consider using
ConfigureAwait(true) to avoid context switches:await sourceStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(true)I suppose the following code will do the same faster and with less memory consumption:
static async Task ProcessRead()
{
await Task.Run(() =>
{
IEnumerable fileEntries = Directory.EnumerateFiles(@"Directory");
int count = 0;
foreach (string fname in fileEntries)
{
try
{
count++;
string text = File.ReadAllText(fname);
Console.WriteLine(text);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
Console.WriteLine(count);
}
}Code Snippets
await sourceStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(true)static async Task ProcessRead()
{
await Task.Run(() =>
{
IEnumerable<string> fileEntries = Directory.EnumerateFiles(@"Directory");
int count = 0;
foreach (string fname in fileEntries)
{
try
{
count++;
string text = File.ReadAllText(fname);
Console.WriteLine(text);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
Console.WriteLine(count);
}
}Context
StackExchange Code Review Q#152426, answer score: 14
Revisions (0)
No revisions yet.