HiveBrain v1.2.0
Get Started
← Back to all entries
patternjavascriptMinor

Recursively reading a directory in node.js

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
directorynodereadingrecursively

Problem

I made a function for recursively reading a directory and it just seems kind of long and complicated. If you could take a look and tell me what's good or bad about it, that would be great.

Reading the directory synchronously is not an option.

var dutils = require('./dutils');
module.exports = {
    getIndex: function(ignore, cb) {
        var root = this.root;
        var cbs = 1;
        var index = {};

        // recursive function for walking directory
        (function walk(dir) {
            fs.readdir(dir, function(err, files) {
                if (err) return cb(err);

                files.forEach(function(fileName) {
                    // prepare
                    var filePath = path.join(dir, fileName); // filePath with root
                    var relFilePath = path.relative(root, filePath); // filePath without root

                    // if the file is on our ignore list don't index it
                    if (dutils.globmatch(filePath, ignore)) return;

                    var stats = fs.statSync(filePath);
                    var isDir = stats.isDirectory();

                    index[relFilePath] = {
                        isDir: isDir,
                        mtime: stats.mtime
                    };

                    // if the file is a directory, walk it next
                    if (isDir) {
                        // count the number of callbacks that are running concurrently
                        cbs++;
                        walk(filePath);
                    }
                });

                // our callback was completed
                cbs--;

                // if all callbacks were completed, we've completely walked our directory
                if (cbs === 0) cb(null, index);
            });
        })(root);
    }
}

Solution

My comments are inline starting with the //*. I also very much encourage you to look at async libraries such as Q.

var dutils = require('./dutils');

//* Why export an object with a single function? Why not just export the function itself?
module.exports = {
    //* I don't like intermixing method implementation with exports declaration because you tend to be 
    //* looking for very different things when looking at each. Hoisting to the rescure!
    //*     module.exports = { getIndex: getIndex };
    //*     ... later ...
    //*     function getIndex(ignore, cb) { 
    //*         ...
    getIndex: function(ignore, cb) {
        //* What if they didn't pass a  callback? Might as well check here and throw an error. Otherwise
        //* there will be a confusing error later on

        //* What is `this`? I **highly** recommend not using the `this` parameter unless you 
        //* understand _very_ well what it does and how it works. You can almost always achieve the same
        //* thing using simpler techniques
        var root = this.root;
        //* cds is not a good variable name - I have no idea what this represents reading it
        var cbs = 1;
        var index = {};

        // recursive function for walking directory
        (function walk(dir) {

            //* I usually prefer naming callback functions. Not only does it allow you to give a good
            //* label to the code but the function will show up labeled in stack traces
            fs.readdir(dir, function(err, files) {
                //* Be aware that it is possible for the callback to be triggered multiple times when there are 
                //* errors. This is an unusual interface and just be certain that you do this on purpose
                if (err) return cb(err);

                //* Again, I would recommend labeling this function.
                //* Also I'm not sure if this will follow symlinks or not but if does then this might end up in
                //* an endless loop.
                files.forEach(function(fileName) {
                    // prepare
                    var filePath = path.join(dir, fileName); // filePath with root
                    var relFilePath = path.relative(root, filePath); // filePath without root

                    //* I don't think this is really a useful comment as it doesn't say much more than
                    //* the code itself does. I also usually prefer putting the `return` statement for early returns
                    //* on its own line to make the early return stand out visually.
                    // if the file is on our ignore list don't index it
                    if (dutils.globmatch(filePath, ignore)) return;

                    var stats = fs.statSync(filePath);
                    var isDir = stats.isDirectory();

                    index[relFilePath] = {
                        isDir: isDir,
                        mtime: stats.mtime
                    };

                    //* This comment is not very useful as it - again - just restates what the code does
                    // if the file is a directory, walk it next
                    if (isDir) {
                        // count the number of callbacks that are running concurrently
                        cbs++;
                        walk(filePath);
                    }
                });

                // our callback was completed
                cbs--;

                // if all callbacks were completed, we've completely walked our directory
                if (cbs === 0) cb(null, index);

                //* Is the entire point of the `cbs` counter to see if any `readdir` operation resulted in an error?
                //* that seems to be all that it does. Why not just have a `var lastError = null` in `getIndex` and
                //* rather than checking for err do `if(lastError = err) return`. That way if `err` is truthy you have
                //* an error, otherwise you do not
                //*
                //* Also keep in mind that cb will be called multiple times but each time with the 
                //* same instance of the index object. That means that previously invoked callbacks will be
                //* having this object mutate underneat them. This might be exactly what you want but it is unuaual
                //* and should definitely be documented clearly. At the very least you want a big bold doc comment
                //* on this function
            });
        })(root);
    }
}


Here is an article I wrote on this parameters

Code Snippets

var dutils = require('./dutils');

//* Why export an object with a single function? Why not just export the function itself?
module.exports = {
    //* I don't like intermixing method implementation with exports declaration because you tend to be 
    //* looking for very different things when looking at each. Hoisting to the rescure!
    //*     module.exports = { getIndex: getIndex };
    //*     ... later ...
    //*     function getIndex(ignore, cb) { 
    //*         ...
    getIndex: function(ignore, cb) {
        //* What if they didn't pass a  callback? Might as well check here and throw an error. Otherwise
        //* there will be a confusing error later on

        //* What is `this`? I **highly** recommend not using the `this` parameter unless you 
        //* understand _very_ well what it does and how it works. You can almost always achieve the same
        //* thing using simpler techniques
        var root = this.root;
        //* cds is not a good variable name - I have no idea what this represents reading it
        var cbs = 1;
        var index = {};

        // recursive function for walking directory
        (function walk(dir) {

            //* I usually prefer naming callback functions. Not only does it allow you to give a good
            //* label to the code but the function will show up labeled in stack traces
            fs.readdir(dir, function(err, files) {
                //* Be aware that it is possible for the callback to be triggered multiple times when there are 
                //* errors. This is an unusual interface and just be certain that you do this on purpose
                if (err) return cb(err);

                //* Again, I would recommend labeling this function.
                //* Also I'm not sure if this will follow symlinks or not but if does then this might end up in
                //* an endless loop.
                files.forEach(function(fileName) {
                    // prepare
                    var filePath = path.join(dir, fileName); // filePath with root
                    var relFilePath = path.relative(root, filePath); // filePath without root

                    //* I don't think this is really a useful comment as it doesn't say much more than
                    //* the code itself does. I also usually prefer putting the `return` statement for early returns
                    //* on its own line to make the early return stand out visually.
                    // if the file is on our ignore list don't index it
                    if (dutils.globmatch(filePath, ignore)) return;

                    var stats = fs.statSync(filePath);
                    var isDir = stats.isDirectory();

                    index[relFilePath] = {
                        isDir: isDir,
                        mtime: stats.mtime
                    };

                    //* This comment is not very useful as it - again - just restates what the code does
                    // if the file is a di

Context

StackExchange Code Review Q#46998, answer score: 2

Revisions (0)

No revisions yet.