patternjavaMinor
Java app for getting metadata of millions of files in a directory
Viewed 0 times
directoryappmillionsgettingjavafilesformetadata
Problem
I am writing a Java app that gets the metadata of files in a directory and exports it to a .csv file. The app works fine if the number of files is fewer than a million. But if I feed in a path that has about 3200000 files in all of directories and sub-directories, it takes forever. Is there a way I can speed up things here?
```
private void extractDetailsCSV(File libSourcePath, String extractFile) throws ScraperException {
log.info("Inside extract details csv");
try{
FileMetadataUtil fileUtil = new FileMetadataUtil();
File[] listOfFiles = libSourcePath.listFiles();
for(int i = 0; i < listOfFiles.length; i++) {
if(listOfFiles[i].isDirectory()) {
extractDetailsCSV(listOfFiles[i],extractFile);
}
if(listOfFiles[i].isFile()){
ScraperOutputVO so = new ScraperOutputVO();
Path path = Paths.get(listOfFiles[i].getAbsolutePath());
so.setFilePath(listOfFiles[i].getParent());
so.setFileName(listOfFiles[i].getName());
so.setFileType(getFileType(listOfFiles[i].getAbsolutePath()));
BasicFileAttributes basicAttribs = fileUtil.getBasicFileAttributes(path);
if(basicAttribs != null) {
so.setDateCreated(basicAttribs.creationTime().toString().substring(0, 10) + " " + basicAttribs.creationTime().toString().substring(11, 16));
so.setDateLastModified(basicAttribs.lastModifiedTime().toString().substring(0, 10) + " " + basicAttribs.lastModifiedTime().toString().substring(11, 16));
so.setDateLastAccessed(basicAttribs.lastAccessTime().toString().substring(0, 10) + " " + basicAttribs.lastAccessTime().toString().substring(11, 16));
}
so.setFileSize(String.valueOf(listOfFiles[i].length()));
so.setAuthors(fileUtil.getOwner(path));
so.setFolderLink(listOfFiles[i].getAbsol
```
private void extractDetailsCSV(File libSourcePath, String extractFile) throws ScraperException {
log.info("Inside extract details csv");
try{
FileMetadataUtil fileUtil = new FileMetadataUtil();
File[] listOfFiles = libSourcePath.listFiles();
for(int i = 0; i < listOfFiles.length; i++) {
if(listOfFiles[i].isDirectory()) {
extractDetailsCSV(listOfFiles[i],extractFile);
}
if(listOfFiles[i].isFile()){
ScraperOutputVO so = new ScraperOutputVO();
Path path = Paths.get(listOfFiles[i].getAbsolutePath());
so.setFilePath(listOfFiles[i].getParent());
so.setFileName(listOfFiles[i].getName());
so.setFileType(getFileType(listOfFiles[i].getAbsolutePath()));
BasicFileAttributes basicAttribs = fileUtil.getBasicFileAttributes(path);
if(basicAttribs != null) {
so.setDateCreated(basicAttribs.creationTime().toString().substring(0, 10) + " " + basicAttribs.creationTime().toString().substring(11, 16));
so.setDateLastModified(basicAttribs.lastModifiedTime().toString().substring(0, 10) + " " + basicAttribs.lastModifiedTime().toString().substring(11, 16));
so.setDateLastAccessed(basicAttribs.lastAccessTime().toString().substring(0, 10) + " " + basicAttribs.lastAccessTime().toString().substring(11, 16));
}
so.setFileSize(String.valueOf(listOfFiles[i].length()));
so.setAuthors(fileUtil.getOwner(path));
so.setFolderLink(listOfFiles[i].getAbsol
Solution
I suspect that using the old
So I've created a class
So the class should look like that :
The method
File class of Java is the possible root problem of your solution right now. Since you're using Java 7, you should use those new classes. I've seen that you use some of them like Path, so it shouldn't be too difficult. I don't what your class look at moment so I've changed some method base on what I'm use to do. So the class I will be using is SimpleFileVisitor since this is the basic implementation of FileVisitor.So I've created a class
Walker (this is a very bad name, you should change it for something clearer for you, since I have no good idea right now) that extends SimpleFileVisitor. The class has an attribute extractFile that correspond to the filename of the csv. This class will have the preVisitDirectory, visitFile and visitFileFailed that we will override from the FileVisitor. I've also added your method writeCsvFileDtl, createDate (thanks to @unholysampler, you should read his answer too).So the class should look like that :
import static java.nio.file.FileVisitResult.CONTINUE;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
public class Walker extends SimpleFileVisitor {
private String extractFile;
public Walker(String extractFile) {
this.extractFile = extractFile;
}
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attr)
throws IOException {
populateAndWrite(dir, attr);
return CONTINUE;
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attr) {
populateAndWrite(file, attr);
return CONTINUE;
}
@Override
public FileVisitResult visitFileFailed(Path file, IOException exc) {
//You should determine if you need this method or not
return CONTINUE;
}
private void populateAndWrite(Path file, BasicFileAttributes attr) {
ScraperOutputVO so = new ScraperOutputVO();
if (file.getParent() != null) {
so.setFilePath(file.getParent().toString());
}
if (file.getFileName() != null) {
so.setFileName(file.getFileName().toString());
}
so.setFileType(getFileType(file.toAbsolutePath().toString()));
if (attr != null) {
so.setDateCreated(createDate(attr.creationTime()));
so.setDateLastModified(createDate(attr.lastModifiedTime()));
so.setDateLastAccessed(createDate(attr.lastAccessTime()));
}
if (!attr.isDirectory()) {
so.setFileSize(String.valueOf(attr.size()));
}
so.setAuthors(fileUtil.getOwner(file));
so.setFolderLink(file.toAbsolutePath().toString());
try {
writeCsvFileDtl(extractFile, so);
} catch (IOException e) {
log.info("IOException while writing to csv file" +
e.fillInStackTrace());
throw new
ScraperException("IOException while writing to csv file" ,
e.fillInStackTrace());
}
}
private String createDate(FileTime time) {
String timeStr = time.toString();
return timeStr.substring(0, 10) + " " + timeStr.substring(11, 16);
}
private void writeCsvFileDtl(ScraperOutputVO scraperOutputVO)
throws ScraperException {
try {
FileWriter writer = new FileWriter(extractFile, true);
writer.append(scraperOutputVO.getFilePath());
writer.append(',');
writer.append(scraperOutputVO.getFileName());
writer.append(',');
writer.append(scraperOutputVO.getFileType());
writer.append(',');
writer.append(scraperOutputVO.getDateCreated());
writer.append(',');
writer.append(scraperOutputVO.getDateLastModified());
writer.append(',');
writer.append(scraperOutputVO.getDateLastAccessed());
writer.append(',');
writer.append(scraperOutputVO.getFileSize());
writer.append(',');
writer.append(scraperOutputVO.getAuthors());
writer.append(',');
writer.append(scraperOutputVO.getFolderLink());
writer.append('\n');
writer.flush();
writer.close();
} catch (IOException e) {
log.info("IOException while writing to csv file" +
e.fillStackTrace();
throw new ScraperException("IOException while writing to csv file",
e.fillInStackTrace());
}
}
}The method
populateAndWrite is use in preVisitDirectory and visitFile, basically it will populate each attribute of your object ScraperOutputVO and then send it to the the write method. I'm not sure if you to list directories, so if you Code Snippets
import static java.nio.file.FileVisitResult.CONTINUE;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
public class Walker extends SimpleFileVisitor<Path> {
private String extractFile;
public Walker(String extractFile) {
this.extractFile = extractFile;
}
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attr)
throws IOException {
populateAndWrite(dir, attr);
return CONTINUE;
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attr) {
populateAndWrite(file, attr);
return CONTINUE;
}
@Override
public FileVisitResult visitFileFailed(Path file, IOException exc) {
//You should determine if you need this method or not
return CONTINUE;
}
private void populateAndWrite(Path file, BasicFileAttributes attr) {
ScraperOutputVO so = new ScraperOutputVO();
if (file.getParent() != null) {
so.setFilePath(file.getParent().toString());
}
if (file.getFileName() != null) {
so.setFileName(file.getFileName().toString());
}
so.setFileType(getFileType(file.toAbsolutePath().toString()));
if (attr != null) {
so.setDateCreated(createDate(attr.creationTime()));
so.setDateLastModified(createDate(attr.lastModifiedTime()));
so.setDateLastAccessed(createDate(attr.lastAccessTime()));
}
if (!attr.isDirectory()) {
so.setFileSize(String.valueOf(attr.size()));
}
so.setAuthors(fileUtil.getOwner(file));
so.setFolderLink(file.toAbsolutePath().toString());
try {
writeCsvFileDtl(extractFile, so);
} catch (IOException e) {
log.info("IOException while writing to csv file" +
e.fillInStackTrace());
throw new
ScraperException("IOException while writing to csv file" ,
e.fillInStackTrace());
}
}
private String createDate(FileTime time) {
String timeStr = time.toString();
return timeStr.substring(0, 10) + " " + timeStr.substring(11, 16);
}
private void writeCsvFileDtl(ScraperOutputVO scraperOutputVO)
throws ScraperException {
try {
FileWriter writer = new FileWriter(extractFile, true);
writer.append(scraperOutputVO.getFilePath());
writer.append(',');
writer.append(scraperOutputVO.getFileName());
writer.append(',');
writer.append(scraperOutputVO.getFileType());
writer.append(',');
writer.append(scraperOutputVO.getDateCreated());
writer.append(',');
writer.append(scraperOutputVO.getDateLastMpublic static void main(String args[]){
Path root = Paths.get("Path to your directory");
Walker walker = new Walker("Name of your csv file");
try {
Files.walkFileTree(root, walker);
} catch (IOException e) {
//you should handle exception here
//log.info("Problem walking the directory")
e.printStackTrace();
}
}import java.io.FileWriter;
import java.io.IOException;
public class Writer {
static FileWriter writer = null;
public static void openFileWriter() {
try {
writer = new FileWriter("C:/dev/file.txt", true);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void writeToTheFileWithOpenEachTime(){
try {
writer.append("firstColumn");
writer.append(',');
writer.append("secondColumn");
writer.append(',');
writer.append("thirdColumn");
writer.append(',');
writer.append("fourthColumn");
writer.append(',');
writer.append("fifthColumn");
writer.append(',');
writer.append("sixthColumn");
writer.append(',');
writer.append("sevenColumn");
writer.append(',');
writer.append("eigthColumn");
writer.append(',');
writer.append("ninethColumn");
writer.append('\n');
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void closeWriter() {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}Context
StackExchange Code Review Q#29952, answer score: 6
Revisions (0)
No revisions yet.