HiveBrain v1.2.0
Get Started
← Back to all entries
patternjavaMinor

Extracting the five most frequent queries from a log file

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
thefilefivelogextractingqueriesfromfrequentmost

Problem

I'm trying to print the top 5 most queried strings from a text file. I can't use other third-party libraries to make it easier w.r.t hashmap implementations.

I need to improve on these if possible:

  • cyclomatic complexity



  • Memory usage



  • Execution time



  • Page faults



```
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;

class Topfive {

private HashMap hashmap = new HashMap();

public HashMap getHashmap() {
return hashmap;
}

public void putWord(String main) {
Integer frequency = getHashmap().get(main);
if (frequency == null) {
frequency = 0;
}
hashmap.put(main, frequency + 1);
}

public TreeMap process(File fFile)
throws FileNotFoundException {
Scanner scanner = new Scanner(new FileReader(fFile));
while (scanner.hasNextLine()) {
String wordp = scanner.nextLine();
int j = wordp.indexOf("query=") + 6;
int k = wordp.length() - 1;
String fut = wordp.substring(j, k).trim();
this.putWord(fut);
}
scanner.close();
ValueComparator bvc = new ValueComparator(getHashmap());
TreeMap sorted_map = new TreeMap(bvc);
sorted_map.putAll(getHashmap());
return sorted_map;
}
public static void main(String[] args) {
if (args.length > 0) {
File fFile = new File(args[0]);
Topfive topfive = new Topfive();
try {
TreeMap sorted_map = topfive.process(fFile);
int count = 0;
for (String key : sorted_map.keySet()) {
System.out.println(key);
count++;
if (count >= 5) {
break;
}
}
} catch (FileNotFound

Solution

Your names could be improved, e.g. hm isn't a good name for a member variable.

You could replace the HashMap by a multiset implementation, e.g. HashMultiset from Google Guava. Then you get putWord for free.

The Comparator (and its Map) should be generified. Further, it seems to be plain wrong, as it never gives 0 as result of compare, but that is the expected outcome if two values are "equal" (whatever this means in the actual context). Without looking too deep in the code, it might even be that a simple priority queue could replace all the Comparator and TreeMap stuff.

The main method is too long and should be split in logical parts. Probably it would be better to avoid static methods, but to create a Topfive instance which does most of the work.

If you can use Java 7, try out the ARM block feature for your file access.

For your question you used the tag "clean code", but it looks like you didn't read the book by Uncle Bob. Check it out!

[Edit]

Based on your clarification I would write the class as follows:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;

public class TopWords {
    private final SortedSet frequencies = new TreeSet();

    public TopWords(File file)
            throws IOException {
        List wordList = readWords(file);
        Map freqMap = getFrequencies(wordList);
        sortFrequencies(freqMap);
    }

    public SortedSet getFrequencies() {
        return frequencies;
    }

    private List readWords(File file) throws IOException {
        List result = new ArrayList();
        BufferedReader br = new BufferedReader(new FileReader(file));
        for (String line = br.readLine(); line != null; line = br.readLine()) {
            result.add(line);
        }
        return result;
    }

    private Map getFrequencies(List wordList) throws IOException {
        Map freqMap = new HashMap();
        for (String line : wordList) {
            int start = line.indexOf("query=") + 6;
            int end = line.length() - 1;
            String word = line.substring(start, end).trim();
            Integer frequency = freqMap.get(word);
            freqMap.put(word, frequency == null ? 1 : frequency + 1);
        }
        return freqMap;
    }

    private void sortFrequencies(Map freqMap) {
        for (Entry entry : freqMap.entrySet()) {
            frequencies.add(new Freq(entry.getKey(), entry.getValue()));
        }
    }

    public List getTop(int count) {
        List result = new ArrayList(count);
        for (Freq freq : frequencies) {
            if (count-- == 0) {
                break;
            }
            result.add(freq.word);
        }
        return result;
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            try {
                TopWords topFive = new TopWords(new File(args[0]));
                List topList = topFive.getTop(5);
                for (String word : topList) {
                    System.out.println(word);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public class Freq implements Comparable {
        public final String word;
        public final int frequency;

        private Freq(String word, int frequency) {
            this.word = word;
            this.frequency = frequency;
        }

        public int compareTo(Freq that) {
            int result = that.frequency - this.frequency;
            return result != 0 ? result : that.word.compareTo(this.word);
        }
    }
}


Of course this might be suboptimal depending on the intended use, but I think you get the general idea.

[Update]

After some meditation I came to the conclusion that I thought way too complicated. Try the following version:

```
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class TopWords {

private final List frequencies = new ArrayList();

public TopWords(File file)
throws IOException {
List wordList = getSortedWordList(file);
sortFrequencies(wordList);
}

public List getFrequencies() {
return frequencies;
}

private List getSortedWordList(File file) throws IOException {
List result = new ArrayList();
BufferedReader br = new BufferedReader(new FileReader(file));
for (String line = br.readLine(); line != null; line = br.readLine()) {
int start = line.indexOf("query=") + 6;
int end = line.length() - 1;
String word = line.substring(start, end).trim();
result.add(word);
}
br.close();

Code Snippets

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;

public class TopWords {
    private final SortedSet<Freq> frequencies = new TreeSet<Freq>();

    public TopWords(File file)
            throws IOException {
        List<String> wordList = readWords(file);
        Map<String, Integer> freqMap = getFrequencies(wordList);
        sortFrequencies(freqMap);
    }

    public SortedSet<Freq> getFrequencies() {
        return frequencies;
    }

    private List<String> readWords(File file) throws IOException {
        List<String> result = new ArrayList<String>();
        BufferedReader br = new BufferedReader(new FileReader(file));
        for (String line = br.readLine(); line != null; line = br.readLine()) {
            result.add(line);
        }
        return result;
    }

    private Map<String, Integer> getFrequencies(List<String> wordList) throws IOException {
        Map<String, Integer> freqMap = new HashMap<String, Integer>();
        for (String line : wordList) {
            int start = line.indexOf("query=") + 6;
            int end = line.length() - 1;
            String word = line.substring(start, end).trim();
            Integer frequency = freqMap.get(word);
            freqMap.put(word, frequency == null ? 1 : frequency + 1);
        }
        return freqMap;
    }

    private void sortFrequencies(Map<String, Integer> freqMap) {
        for (Entry<String, Integer> entry : freqMap.entrySet()) {
            frequencies.add(new Freq(entry.getKey(), entry.getValue()));
        }
    }

    public List<String> getTop(int count) {
        List<String> result = new ArrayList<String>(count);
        for (Freq freq : frequencies) {
            if (count-- == 0) {
                break;
            }
            result.add(freq.word);
        }
        return result;
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            try {
                TopWords topFive = new TopWords(new File(args[0]));
                List<String> topList = topFive.getTop(5);
                for (String word : topList) {
                    System.out.println(word);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public class Freq implements Comparable<Freq> {
        public final String word;
        public final int frequency;

        private Freq(String word, int frequency) {
            this.word = word;
            this.frequency = frequency;
        }

        public int compareTo(Freq that) {
            int result = that.frequency - this.frequency;
            return result != 0 ? result : that.word.compareTo(this.word);
        }
    }
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class TopWords {

    private final List<Freq> frequencies = new ArrayList<Freq>();

    public TopWords(File file)
            throws IOException {
        List<String> wordList = getSortedWordList(file);
        sortFrequencies(wordList);
    }

    public List<Freq> getFrequencies() {
        return frequencies;
    }

    private List<String> getSortedWordList(File file) throws IOException {
        List<String> result = new ArrayList<String>();
        BufferedReader br = new BufferedReader(new FileReader(file));
        for (String line = br.readLine(); line != null; line = br.readLine()) {
            int start = line.indexOf("query=") + 6;
            int end = line.length() - 1;
            String word = line.substring(start, end).trim();
            result.add(word);
        }
        br.close();
        Collections.sort(result);
        return result;
    }

    private void sortFrequencies(List<String> wordList) {
        String lastWord = null;
        int count = 0;
        for (String word : wordList) {
            if (word.equals(lastWord)) {
                count++;
            } else {
                if (lastWord != null) {
                    frequencies.add(new Freq(lastWord, count));
                }
                lastWord = word;
                count = 1;
            }
        }
        if (lastWord != null) {
           frequencies.add(new Freq(lastWord, count));
        } 
        Collections.sort(frequencies);
    }

    public List<Freq> getTop(int count) {
        return frequencies.subList(0, Math.min(frequencies.size(), count));
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            try {
                TopWords topFive = new TopWords(new File(args[0]));
                List<Freq> topList = topFive.getTop(5);
                for (Freq freq : topList) {
                    System.out.println(freq.word + " " + freq.frequency);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public class Freq implements Comparable<Freq> {

        public final String word;
        public final int frequency;

        private Freq(String word, int frequency) {
            this.word = word;
            this.frequency = frequency;
        }

        public int compareTo(Freq that) {
            int result = that.frequency - this.frequency;
            return result != 0 ? result : that.word.compareTo(this.word);
        }
    }
}

Context

StackExchange Code Review Q#7120, answer score: 8

Revisions (0)

No revisions yet.