patternjavaMinor
Huffman code implementation
Viewed 0 times
codeimplementationhuffman
Problem
I'm looking for code review, optimizations, best practices.
```
/**
* Huffman encoding obeys the huffman algorithm.
* It compresses the input sentence and serializes the "huffman code"
* and the "tree" used to generate the huffman code
* Both the serialized files are intended to be sent to client.
*
*/
public final class Huffman {
private Huffman() {};
private static class HuffmanNode {
char ch;
int frequency;
HuffmanNode left;
HuffmanNode right;
HuffmanNode(char ch, int frequency, HuffmanNode left, HuffmanNode right) {
this.ch = ch;
this.frequency = frequency;
this.left = left;
this.right = right;
}
}
private static class HuffManComparator implements Comparator {
@Override
public int compare(HuffmanNode node1, HuffmanNode node2) {
return node1.frequency - node2.frequency;
}
}
/**
* Compresses the string using huffman algorithm.
* The huffman tree and the huffman code are serialized to disk
*
* @param sentence The sentence to be serialized
* @throws FileNotFoundException If file is not found
* @throws IOException If IO exception occurs.
*/
public static void compress(String sentence) throws FileNotFoundException, IOException {
if (sentence == null) {
throw new NullPointerException("Input sentence cannot be null.");
}
if (sentence.length() == 0) {
throw new IllegalArgumentException("The string should atleast have 1 character.");
}
final Map charFreq = getCharFrequency(sentence);
final HuffmanNode root = buildTree(charFreq);
final Map charCode = generateCodes(charFreq.keySet(), root);
final String encodedMessage = encodeMessage(charCode, sentence);
serializeTree(root);
serializeMessage(encodedMessage);
}
private s
```
/**
* Huffman encoding obeys the huffman algorithm.
* It compresses the input sentence and serializes the "huffman code"
* and the "tree" used to generate the huffman code
* Both the serialized files are intended to be sent to client.
*
*/
public final class Huffman {
private Huffman() {};
private static class HuffmanNode {
char ch;
int frequency;
HuffmanNode left;
HuffmanNode right;
HuffmanNode(char ch, int frequency, HuffmanNode left, HuffmanNode right) {
this.ch = ch;
this.frequency = frequency;
this.left = left;
this.right = right;
}
}
private static class HuffManComparator implements Comparator {
@Override
public int compare(HuffmanNode node1, HuffmanNode node2) {
return node1.frequency - node2.frequency;
}
}
/**
* Compresses the string using huffman algorithm.
* The huffman tree and the huffman code are serialized to disk
*
* @param sentence The sentence to be serialized
* @throws FileNotFoundException If file is not found
* @throws IOException If IO exception occurs.
*/
public static void compress(String sentence) throws FileNotFoundException, IOException {
if (sentence == null) {
throw new NullPointerException("Input sentence cannot be null.");
}
if (sentence.length() == 0) {
throw new IllegalArgumentException("The string should atleast have 1 character.");
}
final Map charFreq = getCharFrequency(sentence);
final HuffmanNode root = buildTree(charFreq);
final Map charCode = generateCodes(charFreq.keySet(), root);
final String encodedMessage = encodeMessage(charCode, sentence);
serializeTree(root);
serializeMessage(encodedMessage);
}
private s
Solution
Great Start
Encapsulate
Separate and encapsulate some of the logic into custom classes rather than passing around collections directly. Doing so improves testability and reusability. One prime example is the character frequency map which could easily be reused in many other programs.
Here's a simple implementation that provides only what's necessary for this program, but it could certainly benefit from methods such as
To avoid exposing the underlying map you could go further by using a closure (Java 8) or anonymous visitor class:
I would perform the same encapsulation with
Miscellaneous
-
Provide a two-argument constructor for
-
Implement
-
Be careful not to include too much code detail in the comments. As the code changes, the comments become incorrect. Case in point:
The first line is unnecessary since it's in the method signature, and the second line mentions the parameter should be a
-
Thanks to the garbage collector this is totally unnecessary:
Since
- Clear code
- Short methods
- JavaDoc
- Tests (though they need reorganizing as palacsint noted)
Encapsulate
Separate and encapsulate some of the logic into custom classes rather than passing around collections directly. Doing so improves testability and reusability. One prime example is the character frequency map which could easily be reused in many other programs.
Here's a simple implementation that provides only what's necessary for this program, but it could certainly benefit from methods such as
size(), getCount(Character c), etc. I also haven't bothered wrapping the exposed map/set in unmodifiable collections which would be advisable (though see below for another option)./**
* Keeps a running count of how many times each unique character is seen.
*/
public class CharacterCounter
{
private final HashMap counts = new HashMap<>();
/**
* Increments the count of the given character,
* setting it to one on first appearance.
*
* @param c the character to count
*/
public void increment(Character c) {
Integer freq = counts.get(c);
if (freq == null) {
counts.put(c, 1);
} else {
counts.put(c, freq + 1);
}
}
/**
* Increments the count of each character in the given text.
*
* @param text contains the characters to count
*/
public void increment(String text) {
for (char c : text) {
increment(c);
}
}
/**
* Returns the set of characters seen.
*
* @return set containing each character seen while counting
*/
public Set getCharacters() {
return counts.keySet();
}
/**
* Returns the set of characters seen along with their counts.
*
* @return set containing each character seen while counting and its count
*/
public Set> getCharacterCounts() {
return counts.entrySet();
}
}To avoid exposing the underlying map you could go further by using a closure (Java 8) or anonymous visitor class:
public class CharacterCounter
{
...
public interface Visitor {
void visit(Character c, Integer count);
}
/**
* Passes each character and its count to the given visitor.
*
* @param visitor receives characters and counts in an undefined order
*/
public void apply(Visitor visitor) {
for (Map.Entry entry : counts.entrySet()) {
visitor.visit(entry.getKey(), entry.getValue());
}
}
}
private static Queue createNodeQueue(CharacterCounter counter) {
final Queue pq = new PriorityQueue<>(11, new HuffManComparator());
counter.apply(new CharacterCounter.Visitor() {
public void visit(Character c, Integer count) {
pq.add(new HuffmanNode(c, count, null, null));
}
});
return pq;
}I would perform the same encapsulation with
HuffmanTree building and managing the nodes from the CharacterCounter.Miscellaneous
-
Provide a two-argument constructor for
HuffmanNode that defaults the other parameters to null.-
Implement
Comparable directly for natural ordering instead of supplying a separate Comparator to the priority queue.-
Be careful not to include too much code detail in the comments. As the code changes, the comments become incorrect. Case in point:
/**
* Map map
* Some implementation of that treeSet is passed as parameter.
* @param map
*/
private static HuffmanNode buildTree(Map map) {The first line is unnecessary since it's in the method signature, and the second line mentions the parameter should be a
TreeSet which is not correct.-
Thanks to the garbage collector this is totally unnecessary:
// remove it to prevent object leak.
return nodeQueue.remove();Since
nodeQueue is local and never exposed externally, it will be collected along with its reference to the root node.Code Snippets
/**
* Keeps a running count of how many times each unique character is seen.
*/
public class CharacterCounter
{
private final HashMap<Character, Integer> counts = new HashMap<>();
/**
* Increments the count of the given character,
* setting it to one on first appearance.
*
* @param c the character to count
*/
public void increment(Character c) {
Integer freq = counts.get(c);
if (freq == null) {
counts.put(c, 1);
} else {
counts.put(c, freq + 1);
}
}
/**
* Increments the count of each character in the given text.
*
* @param text contains the characters to count
*/
public void increment(String text) {
for (char c : text) {
increment(c);
}
}
/**
* Returns the set of characters seen.
*
* @return set containing each character seen while counting
*/
public Set<Character> getCharacters() {
return counts.keySet();
}
/**
* Returns the set of characters seen along with their counts.
*
* @return set containing each character seen while counting and its count
*/
public Set<Map.Entry<Character, Integer>> getCharacterCounts() {
return counts.entrySet();
}
}public class CharacterCounter
{
...
public interface Visitor {
void visit(Character c, Integer count);
}
/**
* Passes each character and its count to the given visitor.
*
* @param visitor receives characters and counts in an undefined order
*/
public void apply(Visitor visitor) {
for (Map.Entry<Character, Integer> entry : counts.entrySet()) {
visitor.visit(entry.getKey(), entry.getValue());
}
}
}
private static Queue<HuffmanNode> createNodeQueue(CharacterCounter counter) {
final Queue<HuffmanNode> pq = new PriorityQueue<>(11, new HuffManComparator());
counter.apply(new CharacterCounter.Visitor() {
public void visit(Character c, Integer count) {
pq.add(new HuffmanNode(c, count, null, null));
}
});
return pq;
}/**
* Map<Character, Integer> map
* Some implementation of that treeSet is passed as parameter.
* @param map
*/
private static HuffmanNode buildTree(Map<Character, Integer> map) {// remove it to prevent object leak.
return nodeQueue.remove();Context
StackExchange Code Review Q#44473, answer score: 8
Revisions (0)
No revisions yet.