patternjavaModerate
Improving the Java UUID class performance
Viewed 0 times
thejavaperformanceuuidclassimproving
Problem
I am looking to submit the following code (adapted to fit into
FromString gets a 3x speedup and 1/25th the object allocations. ToString gets a 6x speedup and 1/10th of the object allocations.
And here's the code:
```
/**
* A class that provides an alternate implementation of {@link
* UUID#fromString(String)} and {@link UUID#toString()}.
*
* The version in the JDK uses {@link String#split(String)}
* which does not compile the regular expression that is used for splitting
* the UUID string and results in the allocation of multiple strings in a
* string array. We decided to write {@link NessUUID} when we ran into
* performance issues with the garbage produced by the JDK class.
*
*/
public class NessUUID {
private NessUUID() {}
private static final int NUM_ALPHA_DIFF = 'A' - '9' - 1;
private static final int LOWER_UPPER_DIFF = 'a' - 'A';
// FROM STRING
public static UUID fromString(String str) {
int dashCount = 4;
int [] dashPos = new int [6];
dashPos[0] = -1;
dashPos[5] = str.length();
for (int i = str.length()-1; i >= 0; i--) {
if (str.charAt(i) == '-') {
if (dashCount == 0) {
throw new IllegalArgumentException("Invalid UUID string: " + str);
}
dashPos[dashCount--] = i;
}
}
if (dashCount > 0) {
throw new IllegalArgumentException("Invalid UUID string: " + str);
}
java.util obviously) which significantly improves performance and reduces useless allocations of java.util.UUID. Help me find the bugs and stupidity before I submit myself to the judgment of the JDK maintainers! :-)benchmark instances Bytes ns linear runtime
JdkUuidFromString 51.00 1544.0 608.2 ==============================
NessUuidFromString 2.00 72.0 179.1 ========
JdkUuidToString 31.00 1720.0 321.4 ===============
NessUuidToString 3.00 200.0 51.5 ==FromString gets a 3x speedup and 1/25th the object allocations. ToString gets a 6x speedup and 1/10th of the object allocations.
And here's the code:
```
/**
* A class that provides an alternate implementation of {@link
* UUID#fromString(String)} and {@link UUID#toString()}.
*
* The version in the JDK uses {@link String#split(String)}
* which does not compile the regular expression that is used for splitting
* the UUID string and results in the allocation of multiple strings in a
* string array. We decided to write {@link NessUUID} when we ran into
* performance issues with the garbage produced by the JDK class.
*
*/
public class NessUUID {
private NessUUID() {}
private static final int NUM_ALPHA_DIFF = 'A' - '9' - 1;
private static final int LOWER_UPPER_DIFF = 'a' - 'A';
// FROM STRING
public static UUID fromString(String str) {
int dashCount = 4;
int [] dashPos = new int [6];
dashPos[0] = -1;
dashPos[5] = str.length();
for (int i = str.length()-1; i >= 0; i--) {
if (str.charAt(i) == '-') {
if (dashCount == 0) {
throw new IllegalArgumentException("Invalid UUID string: " + str);
}
dashPos[dashCount--] = i;
}
}
if (dashCount > 0) {
throw new IllegalArgumentException("Invalid UUID string: " + str);
}
Solution
I could not get Caliper running, but hacked my own test:
My initial results were:
So, at face value, yes, your algorithm is nicely faster.
As for the code review, I have some comments:
fromString()
toString()
Consider:
I had a hack at this and decided I could do better.... consider the following results:
This is almost three times faster than your version for the fromString, and another 0.2-times faster than your toString.
Here is the code that is (in my experience) about as fast as you can get with Java:
```
package uuid;
import java.util.Arrays;
import java.util.UUID;
/**
* A class that provides an alternate implementation of {@link
* UUID#fromString(String)} and {@link UUID#toString()}.
*
* The version in the JDK uses {@link String#split(String)}
* which does not compile the regular expression that is used for splitting
* the UUID string and results in the allocation of multiple strings in a
* string array. We decided to write {@link NessUUID} when we ran into
* performance issues with the garbage produced by the JDK class.
*
*/
public class NessUUID {
private NessUUID() {}
// lookup is an array indexed by the char, and it has
// valid values set with the decimal value of the hex char.
private static final long[] lookup = buildLookup();
private static final int DASH = -1;
private static final int ERROR = -2;
private static final long[] buildLookup() {
long [] lu = new long[128];
Arrays.fill(lu, ERROR);
lu['0'] = 0;
lu['1'] = 1;
lu['2'] = 2;
lu['3'] = 3;
lu['4'] = 4;
lu['5'] = 5;
lu['6'] = 6;
lu['7'] = 7;
lu['8'] = 8;
lu['9'] = 9;
lu['a'] = 10;
lu['b'] = 11;
lu['c'] = 12;
lu['d'] = 13;
lu['e'] = 14;
lu['f'] = 15;
lu['A'] = 10;
lu['B'] = 11;
lu['C'] = 12;
lu['D'] = 13;
lu['E'] = 14;
lu['F'] = 15;
lu['-'] = DASH;
return lu;
}
// FROM STRING
public static UUID fromString(final String str) {
final int len = str.length();
if (len != 36) {
throw new IllegalArgumentException("Invalid UUID string (expected to be 36 characters long)");
}
final long[] vals = new long[2];
int shift = 60;
int index = 0;
for (int i = 0; i = lookup.length || lookup[c] == ERROR) {
throw new IllegalArgumentException("Invalid UUID string (unexpected '" + str.charAt(i) + "' at position " + i + " -> " + str + " )");
}
if (lookup[c] == DASH) {
if ((i - 8) % 5 != 0) {
throw new IllegalArgumentException("Invalid UUID string (unexpected '-' at position " + i + " -> " + str + "
My initial results were:
warmup
JdkFrom: 1787.38 JdkTo: 635.12 NessFrom: 460.15 NessTo: 183.67 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
Real Run
JdkFrom: 1415.68 JdkTo: 553.28 NessFrom: 426.29 NessTo: 94.69 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
JdkFrom: 1394.24 JdkTo: 387.14 NessFrom: 340.78 NessTo: 59.33 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
JdkFrom: 1378.38 JdkTo: 339.20 NessFrom: 325.73 NessTo: 59.20 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
JdkFrom: 1381.61 JdkTo: 334.28 NessFrom: 389.30 NessTo: 59.09 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]So, at face value, yes, your algorithm is nicely faster.
As for the code review, I have some comments:
fromString()
- I don't like that you ignore the required format for UUID's, essentially you say if it has 4 dashes it's cool, but, really, the number of digites between dashes is significant, and you ignore that.
- I feel that you should be calculating the long bits at the same time as you are validating and counting the dashes. Repeating the loops afterwards seems redundant.
- If you are looking for raw performance, a trick I have found out is that lookup tables make a big difference... I will show an example in a bit.
toString()
- I don't like the public
toString(long,long)method. This is not 'symmetrical'. Only the toString(UUID) should be public.
- The
DIGITScode appears to be designed to satisfy many different radices (radixes, what's the plural?). This makes it a little bulky for this special case.
- There are too many levels of method calls. It can be much shallower.
Consider:
I had a hack at this and decided I could do better.... consider the following results:
warmup
JdkFrom: 1929.14 JdkTo: 542.10 NessFrom: 270.43 NessTo: 175.71 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
Real Run
JdkFrom: 1569.85 JdkTo: 404.93 NessFrom: 249.37 NessTo: 45.94 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
JdkFrom: 1528.79 JdkTo: 279.55 NessFrom: 114.74 NessTo: 44.71 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
JdkFrom: 1657.85 JdkTo: 271.24 NessFrom: 118.20 NessTo: 44.43 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
JdkFrom: 1563.52 JdkTo: 273.69 NessFrom: 140.96 NessTo: 46.46 [2254274162472357232, 70459000, 2254274162472357232, 70459000]This is almost three times faster than your version for the fromString, and another 0.2-times faster than your toString.
Here is the code that is (in my experience) about as fast as you can get with Java:
```
package uuid;
import java.util.Arrays;
import java.util.UUID;
/**
* A class that provides an alternate implementation of {@link
* UUID#fromString(String)} and {@link UUID#toString()}.
*
* The version in the JDK uses {@link String#split(String)}
* which does not compile the regular expression that is used for splitting
* the UUID string and results in the allocation of multiple strings in a
* string array. We decided to write {@link NessUUID} when we ran into
* performance issues with the garbage produced by the JDK class.
*
*/
public class NessUUID {
private NessUUID() {}
// lookup is an array indexed by the char, and it has
// valid values set with the decimal value of the hex char.
private static final long[] lookup = buildLookup();
private static final int DASH = -1;
private static final int ERROR = -2;
private static final long[] buildLookup() {
long [] lu = new long[128];
Arrays.fill(lu, ERROR);
lu['0'] = 0;
lu['1'] = 1;
lu['2'] = 2;
lu['3'] = 3;
lu['4'] = 4;
lu['5'] = 5;
lu['6'] = 6;
lu['7'] = 7;
lu['8'] = 8;
lu['9'] = 9;
lu['a'] = 10;
lu['b'] = 11;
lu['c'] = 12;
lu['d'] = 13;
lu['e'] = 14;
lu['f'] = 15;
lu['A'] = 10;
lu['B'] = 11;
lu['C'] = 12;
lu['D'] = 13;
lu['E'] = 14;
lu['F'] = 15;
lu['-'] = DASH;
return lu;
}
// FROM STRING
public static UUID fromString(final String str) {
final int len = str.length();
if (len != 36) {
throw new IllegalArgumentException("Invalid UUID string (expected to be 36 characters long)");
}
final long[] vals = new long[2];
int shift = 60;
int index = 0;
for (int i = 0; i = lookup.length || lookup[c] == ERROR) {
throw new IllegalArgumentException("Invalid UUID string (unexpected '" + str.charAt(i) + "' at position " + i + " -> " + str + " )");
}
if (lookup[c] == DASH) {
if ((i - 8) % 5 != 0) {
throw new IllegalArgumentException("Invalid UUID string (unexpected '-' at position " + i + " -> " + str + "
Code Snippets
warmup
JdkFrom: 1787.38 JdkTo: 635.12 NessFrom: 460.15 NessTo: 183.67 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
Real Run
JdkFrom: 1415.68 JdkTo: 553.28 NessFrom: 426.29 NessTo: 94.69 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
JdkFrom: 1394.24 JdkTo: 387.14 NessFrom: 340.78 NessTo: 59.33 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
JdkFrom: 1378.38 JdkTo: 339.20 NessFrom: 325.73 NessTo: 59.20 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]
JdkFrom: 1381.61 JdkTo: 334.28 NessFrom: 389.30 NessTo: 59.09 [-4552303853801426784, 69220000, -4552303853801426784, 69220000]warmup
JdkFrom: 1929.14 JdkTo: 542.10 NessFrom: 270.43 NessTo: 175.71 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
Real Run
JdkFrom: 1569.85 JdkTo: 404.93 NessFrom: 249.37 NessTo: 45.94 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
JdkFrom: 1528.79 JdkTo: 279.55 NessFrom: 114.74 NessTo: 44.71 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
JdkFrom: 1657.85 JdkTo: 271.24 NessFrom: 118.20 NessTo: 44.43 [2254274162472357232, 70459000, 2254274162472357232, 70459000]
JdkFrom: 1563.52 JdkTo: 273.69 NessFrom: 140.96 NessTo: 46.46 [2254274162472357232, 70459000, 2254274162472357232, 70459000]package uuid;
import java.util.Arrays;
import java.util.UUID;
/**
* A class that provides an alternate implementation of {@link
* UUID#fromString(String)} and {@link UUID#toString()}.
*
* <p> The version in the JDK uses {@link String#split(String)}
* which does not compile the regular expression that is used for splitting
* the UUID string and results in the allocation of multiple strings in a
* string array. We decided to write {@link NessUUID} when we ran into
* performance issues with the garbage produced by the JDK class.
*
*/
public class NessUUID {
private NessUUID() {}
// lookup is an array indexed by the **char**, and it has
// valid values set with the decimal value of the hex char.
private static final long[] lookup = buildLookup();
private static final int DASH = -1;
private static final int ERROR = -2;
private static final long[] buildLookup() {
long [] lu = new long[128];
Arrays.fill(lu, ERROR);
lu['0'] = 0;
lu['1'] = 1;
lu['2'] = 2;
lu['3'] = 3;
lu['4'] = 4;
lu['5'] = 5;
lu['6'] = 6;
lu['7'] = 7;
lu['8'] = 8;
lu['9'] = 9;
lu['a'] = 10;
lu['b'] = 11;
lu['c'] = 12;
lu['d'] = 13;
lu['e'] = 14;
lu['f'] = 15;
lu['A'] = 10;
lu['B'] = 11;
lu['C'] = 12;
lu['D'] = 13;
lu['E'] = 14;
lu['F'] = 15;
lu['-'] = DASH;
return lu;
}
// FROM STRING
public static UUID fromString(final String str) {
final int len = str.length();
if (len != 36) {
throw new IllegalArgumentException("Invalid UUID string (expected to be 36 characters long)");
}
final long[] vals = new long[2];
int shift = 60;
int index = 0;
for (int i = 0; i < len; i++) {
final int c = str.charAt(i);
if (c >= lookup.length || lookup[c] == ERROR) {
throw new IllegalArgumentException("Invalid UUID string (unexpected '" + str.charAt(i) + "' at position " + i + " -> " + str + " )");
}
if (lookup[c] == DASH) {
if ((i - 8) % 5 != 0) {
throw new IllegalArgumentException("Invalid UUID string (unexpected '-' at position " + i + " -> " + str + " )");
}
continue;
}
vals[index] |= lookup[c] << shift;
shift -= 4;
if (shift < 0) {
shift = 60;
index++;
}
}
return new UUID(vals[0], vals[1]);
}
// TO STRING
// recode is 2-byte arrays representing the hex representation of every byte value (all 256)
private static final char[][] recode = buildByteBlocks();
private static char[][] buildByteBlocks() {
final char[][] ret = new char[256][];
for (int i = 0; i < ret.length; i++) {
ret[i] = String.format(package uuid;
import java.util.Arrays;
import java.util.UUID;
public class PerformanceComparison
{
private final int N_UUIDS = 1000;
private final UUID[] testUuids = new UUID[N_UUIDS];
private final String[] testStrings = new String[N_UUIDS];
public void setup () {
for (int i = 0; i < N_UUIDS; i++)
{
testUuids[i] = UUID.randomUUID();
testStrings[i] = testUuids[i].toString();
}
}
public static void main(String[] args) {
PerformanceComparison pc = new PerformanceComparison();
final UUID uuidj = UUID.randomUUID();
String valj = uuidj.toString();
String valn = NessUUID.toString(uuidj);
UUID uuidn = NessUUID.fromString(valn);
if (!valj.equals(valn)) {
throw new IllegalStateException("Illegal conversion");
}
if (!uuidj.equals(uuidn)) {
throw new IllegalStateException("Illegal conversion");
}
pc.setup();
final int reps = 1000000;
System.out.println(" warmup");
pc.runAll(reps);
System.out.println(" Real Run");
pc.runAll(reps);
pc.runAll(reps);
pc.runAll(reps);
pc.runAll(reps);
}
private final void runAll(final int reps) {
long[] accum = new long[4];
System.out.printf(" JdkFrom: %6.2f JdkTo: %6.2f NessFrom: %6.2f NessTo: %6.2f %s\n",
timeJdkUuidFromString(reps, accum, 0) / 1000000.0,
timeJdkUuidToString(reps, accum, 1) / 1000000.0,
timeNessUuidFromString(reps, accum, 2) / 1000000.0,
timeNessUuidToString(reps, accum, 3) / 1000000.0,
Arrays.toString(accum));
}
public long timeJdkUuidFromString(int reps, long[] accum2, int j)
{
long accum = 0;
long start = System.nanoTime();
for (int i = 0; i < reps; i++)
{
accum += UUID.fromString(testStrings[i % N_UUIDS]).getMostSignificantBits();
}
accum2[j] = accum;
return System.nanoTime() - start;
}
public long timeJdkUuidToString(int reps, long[] accum2, int j)
{
long accum = 0;
long start = System.nanoTime();
for (int i = 0; i < reps; i++)
{
accum += testUuids[i % N_UUIDS].toString().charAt(0);
}
accum2[j] = accum;
return System.nanoTime() - start;
}
public long timeNessUuidFromString(int reps, long[] accum2, int j)
{
long accum = 0;
long start = System.nanoTime();
for (int i = 0; i < reps; i++)
{
accum += NessUUID.fromString(testStrings[i % N_UUIDS]).getMostSignificantBits();
}
accum2[j] = accum;
return System.nanoTime() - start;
}
public long timeNessUuidToString(int reps, long[] accum2, int j)
{
long accum = 0;
long start = System.nanoTime();
for (int i = 0; i < reps; i++)
Context
StackExchange Code Review Q#19860, answer score: 13
Revisions (0)
No revisions yet.