HiveBrain v1.2.0
Get Started
← Back to all entries
patternjavaMinor

Java app for getting metadata of millions of files in a directory

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
directoryappmillionsgettingjavafilesformetadata

Problem

I am writing a Java app that gets the metadata of files in a directory and exports it to a .csv file. The app works fine if the number of files is fewer than a million. But if I feed in a path that has about 3200000 files in all of directories and sub-directories, it takes forever. Is there a way I can speed up things here?

```
private void extractDetailsCSV(File libSourcePath, String extractFile) throws ScraperException {

log.info("Inside extract details csv");

try{
FileMetadataUtil fileUtil = new FileMetadataUtil();

File[] listOfFiles = libSourcePath.listFiles();

for(int i = 0; i < listOfFiles.length; i++) {

if(listOfFiles[i].isDirectory()) {
extractDetailsCSV(listOfFiles[i],extractFile);
}

if(listOfFiles[i].isFile()){

ScraperOutputVO so = new ScraperOutputVO();

Path path = Paths.get(listOfFiles[i].getAbsolutePath());

so.setFilePath(listOfFiles[i].getParent());
so.setFileName(listOfFiles[i].getName());

so.setFileType(getFileType(listOfFiles[i].getAbsolutePath()));

BasicFileAttributes basicAttribs = fileUtil.getBasicFileAttributes(path);
if(basicAttribs != null) {
so.setDateCreated(basicAttribs.creationTime().toString().substring(0, 10) + " " + basicAttribs.creationTime().toString().substring(11, 16));
so.setDateLastModified(basicAttribs.lastModifiedTime().toString().substring(0, 10) + " " + basicAttribs.lastModifiedTime().toString().substring(11, 16));
so.setDateLastAccessed(basicAttribs.lastAccessTime().toString().substring(0, 10) + " " + basicAttribs.lastAccessTime().toString().substring(11, 16));
}

so.setFileSize(String.valueOf(listOfFiles[i].length()));
so.setAuthors(fileUtil.getOwner(path));

so.setFolderLink(listOfFiles[i].getAbsol

Solution

I suspect that using the old File class of Java is the possible root problem of your solution right now. Since you're using Java 7, you should use those new classes. I've seen that you use some of them like Path, so it shouldn't be too difficult. I don't what your class look at moment so I've changed some method base on what I'm use to do. So the class I will be using is SimpleFileVisitor since this is the basic implementation of FileVisitor.

So I've created a class Walker (this is a very bad name, you should change it for something clearer for you, since I have no good idea right now) that extends SimpleFileVisitor. The class has an attribute extractFile that correspond to the filename of the csv. This class will have the preVisitDirectory, visitFile and visitFileFailed that we will override from the FileVisitor. I've also added your method writeCsvFileDtl, createDate (thanks to @unholysampler, you should read his answer too).

So the class should look like that :

import static java.nio.file.FileVisitResult.CONTINUE;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;

public class Walker extends SimpleFileVisitor {

    private String extractFile;

    public Walker(String extractFile) {
        this.extractFile = extractFile;
    }

    @Override
    public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attr)
            throws IOException {
        populateAndWrite(dir, attr);
        return CONTINUE;
    }

    @Override
    public FileVisitResult visitFile(Path file, BasicFileAttributes attr) {
        populateAndWrite(file, attr);
        return CONTINUE;
    }

    @Override
    public FileVisitResult visitFileFailed(Path file, IOException exc) {
        //You should determine if you need this method or not
        return CONTINUE;
    }

    private void populateAndWrite(Path file, BasicFileAttributes attr) {
        ScraperOutputVO so = new ScraperOutputVO();
        if (file.getParent() != null) {
            so.setFilePath(file.getParent().toString());
        }

        if (file.getFileName() != null) {
            so.setFileName(file.getFileName().toString());
        }

         so.setFileType(getFileType(file.toAbsolutePath().toString()));

        if (attr != null) {
            so.setDateCreated(createDate(attr.creationTime()));
            so.setDateLastModified(createDate(attr.lastModifiedTime()));
            so.setDateLastAccessed(createDate(attr.lastAccessTime()));
        }
        if (!attr.isDirectory()) {
            so.setFileSize(String.valueOf(attr.size()));
        }

        so.setAuthors(fileUtil.getOwner(file));

        so.setFolderLink(file.toAbsolutePath().toString());
        try {
            writeCsvFileDtl(extractFile, so);
        } catch (IOException e) {
            log.info("IOException while writing to csv file" +
            e.fillInStackTrace());
            throw new
            ScraperException("IOException while writing to csv file" ,
            e.fillInStackTrace());
        }
    }

    private String createDate(FileTime time) {
        String timeStr = time.toString();
        return timeStr.substring(0, 10) + " " + timeStr.substring(11, 16);
    }

    private void writeCsvFileDtl(ScraperOutputVO scraperOutputVO) 
    throws ScraperException {
        try {
            FileWriter writer = new FileWriter(extractFile, true);
            writer.append(scraperOutputVO.getFilePath());
            writer.append(',');
            writer.append(scraperOutputVO.getFileName());
            writer.append(',');
            writer.append(scraperOutputVO.getFileType());
            writer.append(',');
            writer.append(scraperOutputVO.getDateCreated());
            writer.append(',');
            writer.append(scraperOutputVO.getDateLastModified());
            writer.append(',');
            writer.append(scraperOutputVO.getDateLastAccessed());
            writer.append(',');
            writer.append(scraperOutputVO.getFileSize());
            writer.append(',');
            writer.append(scraperOutputVO.getAuthors());
            writer.append(',');
            writer.append(scraperOutputVO.getFolderLink());
            writer.append('\n');
            writer.flush();
            writer.close();
        } catch (IOException e) {
            log.info("IOException while writing to csv file" +
             e.fillStackTrace();
            throw new ScraperException("IOException while writing to csv file",
                    e.fillInStackTrace());

        }
    }
}


The method populateAndWrite is use in preVisitDirectory and visitFile, basically it will populate each attribute of your object ScraperOutputVO and then send it to the the write method. I'm not sure if you to list directories, so if you

Code Snippets

import static java.nio.file.FileVisitResult.CONTINUE;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;

public class Walker extends SimpleFileVisitor<Path> {

    private String extractFile;

    public Walker(String extractFile) {
        this.extractFile = extractFile;
    }

    @Override
    public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attr)
            throws IOException {
        populateAndWrite(dir, attr);
        return CONTINUE;
    }

    @Override
    public FileVisitResult visitFile(Path file, BasicFileAttributes attr) {
        populateAndWrite(file, attr);
        return CONTINUE;
    }

    @Override
    public FileVisitResult visitFileFailed(Path file, IOException exc) {
        //You should determine if you need this method or not
        return CONTINUE;
    }

    private void populateAndWrite(Path file, BasicFileAttributes attr) {
        ScraperOutputVO so = new ScraperOutputVO();
        if (file.getParent() != null) {
            so.setFilePath(file.getParent().toString());
        }

        if (file.getFileName() != null) {
            so.setFileName(file.getFileName().toString());
        }

         so.setFileType(getFileType(file.toAbsolutePath().toString()));

        if (attr != null) {
            so.setDateCreated(createDate(attr.creationTime()));
            so.setDateLastModified(createDate(attr.lastModifiedTime()));
            so.setDateLastAccessed(createDate(attr.lastAccessTime()));
        }
        if (!attr.isDirectory()) {
            so.setFileSize(String.valueOf(attr.size()));
        }

        so.setAuthors(fileUtil.getOwner(file));

        so.setFolderLink(file.toAbsolutePath().toString());
        try {
            writeCsvFileDtl(extractFile, so);
        } catch (IOException e) {
            log.info("IOException while writing to csv file" +
            e.fillInStackTrace());
            throw new
            ScraperException("IOException while writing to csv file" ,
            e.fillInStackTrace());
        }
    }

    private String createDate(FileTime time) {
        String timeStr = time.toString();
        return timeStr.substring(0, 10) + " " + timeStr.substring(11, 16);
    }

    private void writeCsvFileDtl(ScraperOutputVO scraperOutputVO) 
    throws ScraperException {
        try {
            FileWriter writer = new FileWriter(extractFile, true);
            writer.append(scraperOutputVO.getFilePath());
            writer.append(',');
            writer.append(scraperOutputVO.getFileName());
            writer.append(',');
            writer.append(scraperOutputVO.getFileType());
            writer.append(',');
            writer.append(scraperOutputVO.getDateCreated());
            writer.append(',');
            writer.append(scraperOutputVO.getDateLastM
public static void main(String args[]){
    Path root = Paths.get("Path to your directory");
    Walker walker = new Walker("Name of your csv file");
    try {
        Files.walkFileTree(root, walker);
    } catch (IOException e) {
        //you should handle exception here
        //log.info("Problem walking the directory")
        e.printStackTrace();
    }
}
import java.io.FileWriter;
import java.io.IOException;


public class Writer {
    static FileWriter writer = null;
    public static void openFileWriter() {
        try {
            writer  = new FileWriter("C:/dev/file.txt", true);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public static void writeToTheFileWithOpenEachTime(){
          try {
                writer.append("firstColumn");
                writer.append(',');
                writer.append("secondColumn");
                writer.append(',');
                writer.append("thirdColumn");
                writer.append(',');
                writer.append("fourthColumn");
                writer.append(',');
                writer.append("fifthColumn");
                writer.append(',');
                writer.append("sixthColumn");
                writer.append(',');
                writer.append("sevenColumn");
                writer.append(',');
                writer.append("eigthColumn");
                writer.append(',');
                writer.append("ninethColumn");
                writer.append('\n');
                writer.flush();
            } catch (IOException e) {
                e.printStackTrace();
            }
    }

    public static void closeWriter() {

        try {
            writer.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

Context

StackExchange Code Review Q#29952, answer score: 6

Revisions (0)

No revisions yet.