patternjavaMinor
Intersection of words
Viewed 0 times
wordsintersectionstackoverflow
Problem
I get a
This code consumes too much time:
```
public void setDocuments() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor("sky"));
neighbors.add(new Neighbor("earth"));
Map docInfo;
// Document is a Map
Document doc1 = new Document();
Document doc2;
Map sorted;
ArrayList intersectionWords;
HashSet different;
try {
docInfo = dbDocMeta.getDocInfo();
String family;
int index = 0;
for (Long id : docInfo.keySet()) {
family = docInfo.get(id).contains("sky") ? "sky" : "earth";
for (Neighbor neighbor : neighbors) {
// check if the document in the Neighbour family
if (neighbor.getFamily().equalsIgnoreCase(family)) {
different = new HashSet();
different.addAll(someWord);
doc1 = dbWords.getNeighbors(id);
doc2 = new Document();
intersectionWords = new ArrayList();
for (String word : doc1.getMap().keySet()) {
index = dbWords.getIndex(word);
if (selectedFeatures.contains(word)) {
doc2.add(index, doc1.getAttributes().get(word));
intersectionWords.add(word);
}
}
if (!doc2.getMap().isEmpty()) {
different.removeAll(intersectionWords);
Iterator iterator = different.iterator();
while (iterator.hasNext()) {
String s = iterator.next();
index = dbWords.getIndex(s);
doc2.a
map from DB called doc1 and I also have the arrayList called someWord. I will find the subscription of doc1 and someWord and store it in doc2. I will also add the intersecting words with zero index in doc2.This code consumes too much time:
```
public void setDocuments() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor("sky"));
neighbors.add(new Neighbor("earth"));
Map docInfo;
// Document is a Map
Document doc1 = new Document();
Document doc2;
Map sorted;
ArrayList intersectionWords;
HashSet different;
try {
docInfo = dbDocMeta.getDocInfo();
String family;
int index = 0;
for (Long id : docInfo.keySet()) {
family = docInfo.get(id).contains("sky") ? "sky" : "earth";
for (Neighbor neighbor : neighbors) {
// check if the document in the Neighbour family
if (neighbor.getFamily().equalsIgnoreCase(family)) {
different = new HashSet();
different.addAll(someWord);
doc1 = dbWords.getNeighbors(id);
doc2 = new Document();
intersectionWords = new ArrayList();
for (String word : doc1.getMap().keySet()) {
index = dbWords.getIndex(word);
if (selectedFeatures.contains(word)) {
doc2.add(index, doc1.getAttributes().get(word));
intersectionWords.add(word);
}
}
if (!doc2.getMap().isEmpty()) {
different.removeAll(intersectionWords);
Iterator iterator = different.iterator();
while (iterator.hasNext()) {
String s = iterator.next();
index = dbWords.getIndex(s);
doc2.a
Solution
You should declare variables in the smallest possible scope. E.g. instead of
we could do
The same applies to
You have this snippet:
which could be abbreviated to
If we clean up these two points, we end up with this:
In a next step, we remove calculations from inner loops that don't have to be recalculated each time. E.g.
Furthermore,
We can further remove unnecessary variables – their types just add clutter to the code.
A bare
The
If the whole body of a loop us subject to an
Unrelated implementation details like calculating the intersection of two sets should be factored out into helper functions.
```
public void setDocunents() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor("sky"));
neighbors.add(new Neighbor("earth"));
try {
Map docInfo = dbDocMeta.getDocInfo();
for (Long id : docInfo.keySet()) {
String family = docInfo.get(id).contains("sky") ? "sky" : "earth";
Document doc1 = dbWords.getNeighbors(id);
neighbors:
for (Neighbor neighbor : neighbors) {
// check if the document in the Neighbour family
if (!neighbor.getFamily().equalsIgnoreCase(family)) {
continue neighbors;
}
Set intersectionWords = intersection(doc1.getMap().keySet(), selectedFeatures);
if (intersectionWords.isEmpty()) {
continue neighbors;
}
HashSet different = new HashSet(someWord);
Document doc2 = new Document();
for (String s : intersectionWords) {
doc2.add(dbWords.getIndex(word), doc1.getAttributes().get(word));
different.remove(s);
}
for (String s : different) {
doc2.add(dbWords.getIndex(s), 0);
}
String family;
for (...) {
family = ...;
...
}we could do
for (...) {
String family = ...;
...
}The same applies to
docInfo, doc1, doc2, sorted, intersectionWords, different, and index.You have this snippet:
Iterator iterator = different.iterator();
while (iterator.hasNext()) {
String s = iterator.next();
...
}which could be abbreviated to
for (String s : different) { ... }.If we clean up these two points, we end up with this:
public void setDocunents() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor("sky"));
neighbors.add(new Neighbor("earth"));
try {
Map docInfo = dbDocMeta.getDocInfo();
for (Long id : docInfo.keySet()) {
String family = docInfo.get(id).contains("sky") ? "sky" : "earth";
for (Neighbor neighbor : neighbors) {
// check if the document in the Neighbour family
if (neighbor.getFamily().equalsIgnoreCase(family)) {
HashSet different = new HashSet();
different.addAll(someWord);
Document doc1 = dbWords.getNeighbors(id);
Document doc2 = new Document();
ArrayList intersectionWords = new ArrayList();
for (String word : doc1.getMap().keySet()) {
int index = dbWords.getIndex(word);
if (selectedFeatures.contains(word)) {
doc2.add(index, doc1.getAttributes().get(word));
intersectionWords.add(word);
}
}
if (!doc2.getMap().isEmpty()) {
different.removeAll(intersectionWords);
for (String s : different) {
int index = dbWords.getIndex(s);
doc2.add(index, 0);
}
Map sorted = new TreeMap(doc2.getAttributes());
doc2.setMap(sorted);
neighbor.addDocument(doc2);
break;
}
}
}
}
} catch (InterruptedException e) {
logger.error("InterruptedException msg : {} \n {}", e.getMessage(), e.getStackTrace());
} catch (ClassNotFoundException e) {
logger.error("ClassNotFoundException msg : {} \n {}", e.getMessage(), e.getStackTrace());
}
}In a next step, we remove calculations from inner loops that don't have to be recalculated each time. E.g.
doc1 = dbWords.getNeighbors(id) depends only on id, and probably does not have to be called for each neighbor. On the other hand, different is only needed inside a specific if branch.Furthermore,
doc2 will be discarded unless that branch is entered, so we will build it only in that case (assuming doc2.getMap().isEmpty() is equivalent to intersectionWords.isEmpty().We can further remove unnecessary variables – their types just add clutter to the code.
A bare
break inside nested loops can be a bit confusing, so we'll use the labelled for: break neighbors.The
HashSet constructor can directly take a collection, there is no need to separately call addAll.If the whole body of a loop us subject to an
if (cond), it is often easier to read when at the beginning of a loop, we if (!cond) continue.Unrelated implementation details like calculating the intersection of two sets should be factored out into helper functions.
```
public void setDocunents() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor("sky"));
neighbors.add(new Neighbor("earth"));
try {
Map docInfo = dbDocMeta.getDocInfo();
for (Long id : docInfo.keySet()) {
String family = docInfo.get(id).contains("sky") ? "sky" : "earth";
Document doc1 = dbWords.getNeighbors(id);
neighbors:
for (Neighbor neighbor : neighbors) {
// check if the document in the Neighbour family
if (!neighbor.getFamily().equalsIgnoreCase(family)) {
continue neighbors;
}
Set intersectionWords = intersection(doc1.getMap().keySet(), selectedFeatures);
if (intersectionWords.isEmpty()) {
continue neighbors;
}
HashSet different = new HashSet(someWord);
Document doc2 = new Document();
for (String s : intersectionWords) {
doc2.add(dbWords.getIndex(word), doc1.getAttributes().get(word));
different.remove(s);
}
for (String s : different) {
doc2.add(dbWords.getIndex(s), 0);
}
Code Snippets
String family;
for (...) {
family = ...;
...
}for (...) {
String family = ...;
...
}Iterator<String> iterator = different.iterator();
while (iterator.hasNext()) {
String s = iterator.next();
...
}public void setDocunents() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor<Integer>("sky"));
neighbors.add(new Neighbor<Integer>("earth"));
try {
Map<Long, String> docInfo = dbDocMeta.getDocInfo();
for (Long id : docInfo.keySet()) {
String family = docInfo.get(id).contains("sky") ? "sky" : "earth";
for (Neighbor<Integer> neighbor : neighbors) {
// check if the document in the Neighbour family
if (neighbor.getFamily().equalsIgnoreCase(family)) {
HashSet<String> different = new HashSet<String>();
different.addAll(someWord);
Document<String> doc1 = dbWords.getNeighbors(id);
Document<Integer> doc2 = new Document<Integer>();
ArrayList<String> intersectionWords = new ArrayList<String>();
for (String word : doc1.getMap().keySet()) {
int index = dbWords.getIndex(word);
if (selectedFeatures.contains(word)) {
doc2.add(index, doc1.getAttributes().get(word));
intersectionWords.add(word);
}
}
if (!doc2.getMap().isEmpty()) {
different.removeAll(intersectionWords);
for (String s : different) {
int index = dbWords.getIndex(s);
doc2.add(index, 0);
}
Map<Integer, Long> sorted = new TreeMap<Integer, Long>(doc2.getAttributes());
doc2.setMap(sorted);
neighbor.addDocument(doc2);
break;
}
}
}
}
} catch (InterruptedException e) {
logger.error("InterruptedException msg : {} \n {}", e.getMessage(), e.getStackTrace());
} catch (ClassNotFoundException e) {
logger.error("ClassNotFoundException msg : {} \n {}", e.getMessage(), e.getStackTrace());
}
}public void setDocunents() {
// Neighbor contain the arrayList of Documents
neighbors.add(new Neighbor<Integer>("sky"));
neighbors.add(new Neighbor<Integer>("earth"));
try {
Map<Long, String> docInfo = dbDocMeta.getDocInfo();
for (Long id : docInfo.keySet()) {
String family = docInfo.get(id).contains("sky") ? "sky" : "earth";
Document<String> doc1 = dbWords.getNeighbors(id);
neighbors:
for (Neighbor<Integer> neighbor : neighbors) {
// check if the document in the Neighbour family
if (!neighbor.getFamily().equalsIgnoreCase(family)) {
continue neighbors;
}
Set<String> intersectionWords = intersection<String>(doc1.getMap().keySet(), selectedFeatures);
if (intersectionWords.isEmpty()) {
continue neighbors;
}
HashSet<String> different = new HashSet<String>(someWord);
Document<Integer> doc2 = new Document<Integer>();
for (String s : intersectionWords) {
doc2.add(dbWords.getIndex(word), doc1.getAttributes().get(word));
different.remove(s);
}
for (String s : different) {
doc2.add(dbWords.getIndex(s), 0);
}
// due to the choice of a TreeMap, the elements are sorted
doc2.setMap(new TreeMap<Integer, Long>(doc2.getAttributes()));
neighbor.addDocument(doc2);
break neighbors;
}
}
} catch (InterruptedException e) {
logger.error("InterruptedException msg : {} \n {}", e.getMessage(), e.getStackTrace());
} catch (ClassNotFoundException e) {
logger.error("ClassNotFoundException msg : {} \n {}", e.getMessage(), e.getStackTrace());
}
}
private static <A> Set<A> intersection(final Set<A> xs, final Set<A> ys) {
// make sure that xs is the smaller set
if (ys.size() < xs.size()) {
return intersection<A>(ys, xs);
}
final HashSet<A> result = new HashSet<>();
for (A x : xs) {
if (ys.contains(x)) {
result.add(x)
}
}
return result;
}Context
StackExchange Code Review Q#44150, answer score: 4
Revisions (0)
No revisions yet.