patternjavaMinor
Encoding and decoding an array of digits
Viewed 0 times
arraydigitsdecodingencodingand
Problem
I have two simple functions where performance is critical, one for encoding an array of
The solutions I have come up with below are fairly fast. Can these be made even faster?
Please note: I am constrained by using the same function signatures.
These are used for persisting a Chess move in a simple engine, for example:
```
//=========================================================================
// MOVE INFO
//=========================================================================
public static final int MOVE_INFO_COUNT = 19;
public static final int MOVE_INFO_EXTRA_1 = 0; // -> ACTIVE DIGI
ints to a long, another for doing the opposite (decoding the long back to an array of ints). The solutions I have come up with below are fairly fast. Can these be made even faster?
Please note: I am constrained by using the same function signatures.
public static final long encode(int[] digits) {
return
1000000000000000000L * digits[0]
+ 100000000000000000L * digits[1]
+ 10000000000000000L * digits[2]
+ 1000000000000000L * digits[3]
+ 100000000000000L * digits[4]
+ 10000000000000L * digits[5]
+ 1000000000000L * digits[6]
+ 100000000000L * digits[7]
+ 10000000000L * digits[8]
+ 1000000000L * digits[9]
+ 100000000L * digits[10]
+ 10000000L * digits[11]
+ 1000000L * digits[12]
+ 100000L * digits[13]
+ 10000L * digits[14]
+ 1000L * digits[15]
+ 100L * digits[16]
+ 10L * digits[17]
+ 1L * digits[18];
}
public static final int[] decode(long move) {
int[] digits = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int index = digits.length - 1;
while(move > 0) {
digits[index--] = (int)(move % 10);
move = move / 10;
}
return digits;
}These are used for persisting a Chess move in a simple engine, for example:
```
//=========================================================================
// MOVE INFO
//=========================================================================
public static final int MOVE_INFO_COUNT = 19;
public static final int MOVE_INFO_EXTRA_1 = 0; // -> ACTIVE DIGI
Solution
By measuring with JMH I found your decoder being much slower than the encoder. Contrary to your conclusion that the bottleneck is array allocation, I find that the difference between one static array and a new array each time has secondary effect. Compare these results (single static array):
with these (new array each time):
Your method comes out behind by a significant factor in both cases.
I conjecture that this is due to the involvement of the slow divison operators (
If you used bitfield-based encoding, performance would be better and more robust (not sensitive to special-cased JIT optimizations). Here is how it would look, not much different than your code:
For those interested, this is the full JMH code.
MeasureEncoding.decodeBitfield avgt 5 5.896 ± 0.091 ns/op
MeasureEncoding.decodeConstantin avgt 5 44.287 ± 1.217 ns/op
MeasureEncoding.decodeMarco13 avgt 5 7.256 ± 0.240 ns/op
MeasureEncoding.encodeBitfield avgt 5 8.637 ± 0.279 ns/op
MeasureEncoding.encodeConstantin avgt 5 8.942 ± 0.207 ns/opwith these (new array each time):
Benchmark Mode Cnt Score Error Units
MeasureEncoding.decodeBitfield avgt 5 11.299 ± 0.360 ns/op
MeasureEncoding.decodeConstantin avgt 5 50.503 ± 2.331 ns/op
MeasureEncoding.decodeMarco13 avgt 5 17.295 ± 0.120 ns/op
MeasureEncoding.encodeBitfield avgt 5 8.560 ± 0.130 ns/op
MeasureEncoding.encodeConstantin avgt 5 8.966 ± 0.161 ns/opYour method comes out behind by a significant factor in both cases.
I conjecture that this is due to the involvement of the slow divison operators (
/ and %). The surprising finding is that just unrolling your decoding loop (as in Marco13's proposal) makes a huge difference. It must be triggering some optimization of the division in the JIT compiler.If you used bitfield-based encoding, performance would be better and more robust (not sensitive to special-cased JIT optimizations). Here is how it would look, not much different than your code:
public static long encodeBitfield(int[] move) {
if (move.length != ARRAY_SIZE) {
throw new IllegalArgumentException("move must have 19 elements");
}
long enc = 0;
int bitsUsed = 0;
enc |= move[3];
enc |= (long) move[4] >= 1;
for (; ind >= 4) {
dec[ind] = (int) move & 15;
}
for (; ind >= 2) {
dec[ind] = (int) move & 3;
}
return dec;
}For those interested, this is the full JMH code.
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@OperationsPerInvocation(1)
@Warmup(iterations = 8, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@State(Scope.Thread)
@Fork(1)
public class MeasureEncoding
{
public static final int ARRAY_SIZE = 19;
final int[] digits = new int[ARRAY_SIZE];
@Setup(Level.Iteration) public void setup() {
for (int i = 0; i 0) {
digits[index--] = (int)(move % 10);
move = move / 10;
}
return digits;
}
static long encodeBitfield(int[] move) {
if (move.length != ARRAY_SIZE) {
throw new IllegalArgumentException("move must have 19 elements");
}
long enc = 0;
int bitsUsed = 0;
enc |= move[3];
enc |= (long) move[4] >= 1;
for (; ind >= 4) {
dec[ind] = (int) move & 15;
}
for (; ind >= 2) {
dec[ind] = (int) move & 3;
}
return dec;
}
static int[] decodeMarco13(long move)
{
int[] digits = new int[19];
digits[18] = (int)(move % 10); move /= 10;
digits[17] = (int)(move % 10); move /= 10;
digits[16] = (int)(move % 10); move /= 10;
digits[15] = (int)(move % 10); move /= 10;
digits[14] = (int)(move % 10); move /= 10;
digits[13] = (int)(move % 10); move /= 10;
digits[12] = (int)(move % 10); move /= 10;
digits[11] = (int)(move % 10); move /= 10;
digits[10] = (int)(move % 10); move /= 10;
digits[ 9] = (int)(move % 10); move /= 10;
digits[ 8] = (int)(move % 10); move /= 10;
digits[ 7] = (int)(move % 10); move /= 10;
digits[ 6] = (int)(move % 10); move /= 10;
digits[ 5] = (int)(move % 10); move /= 10;
digits[ 4] = (int)(move % 10); move /= 10;
digits[ 3] = (int)(move % 10); move /= 10;
digits[ 2] = (int)(move % 10); move /= 10;
digits[ 1] = (int)(move % 10); move /= 10;
digits[ 0] = (int)(move % 10); move /= 10;
return digits;
}
public static void main(String[] args) {
final int[] move = {1, 0, 0, 1, 8, 6, 9, 9, 9, 9, 6, 9, 9, 9, 9, 3, 3, 3, 3};
System.out.println(move.length);
long encoded = encodeBitfield(move);
System.out.println(Long.toString(encoded, 2));
System.out.println(Arrays.toString(move));
System.out.println(Arrays.toString(decodeBitfield(encoded)));
}
}Code Snippets
MeasureEncoding.decodeBitfield avgt 5 5.896 ± 0.091 ns/op
MeasureEncoding.decodeConstantin avgt 5 44.287 ± 1.217 ns/op
MeasureEncoding.decodeMarco13 avgt 5 7.256 ± 0.240 ns/op
MeasureEncoding.encodeBitfield avgt 5 8.637 ± 0.279 ns/op
MeasureEncoding.encodeConstantin avgt 5 8.942 ± 0.207 ns/opBenchmark Mode Cnt Score Error Units
MeasureEncoding.decodeBitfield avgt 5 11.299 ± 0.360 ns/op
MeasureEncoding.decodeConstantin avgt 5 50.503 ± 2.331 ns/op
MeasureEncoding.decodeMarco13 avgt 5 17.295 ± 0.120 ns/op
MeasureEncoding.encodeBitfield avgt 5 8.560 ± 0.130 ns/op
MeasureEncoding.encodeConstantin avgt 5 8.966 ± 0.161 ns/oppublic static long encodeBitfield(int[] move) {
if (move.length != ARRAY_SIZE) {
throw new IllegalArgumentException("move must have 19 elements");
}
long enc = 0;
int bitsUsed = 0;
enc |= move[3];
enc |= (long) move[4] << (bitsUsed += 1);
enc |= (long) move[5] << (bitsUsed += 4);
enc |= (long) move[6] << (bitsUsed += 4);
enc |= (long) move[7] << (bitsUsed += 4);
enc |= (long) move[8] << (bitsUsed += 4);
enc |= (long) move[9] << (bitsUsed += 4);
enc |= (long) move[10] << (bitsUsed += 4);
enc |= (long) move[11] << (bitsUsed += 4);
enc |= (long) move[12] << (bitsUsed += 4);
enc |= (long) move[13] << (bitsUsed += 4);
enc |= (long) move[14] << (bitsUsed += 4);
enc |= (long) move[15] << (bitsUsed += 4);
enc |= (long) move[16] << (bitsUsed += 2);
enc |= (long) move[17] << (bitsUsed += 2);
enc |= (long) move[18] << (bitsUsed + 2);
return enc;
}
public static int[] decodeBitfield(long move) {
final int[] dec = new int[ARRAY_SIZE];
dec[0] = 1;
int ind = 3;
dec[ind++] = (int) move & 1;
move >>= 1;
for (; ind < 15; ind++, move >>= 4) {
dec[ind] = (int) move & 15;
}
for (; ind < ARRAY_SIZE; ind++, move >>= 2) {
dec[ind] = (int) move & 3;
}
return dec;
}@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@OperationsPerInvocation(1)
@Warmup(iterations = 8, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@State(Scope.Thread)
@Fork(1)
public class MeasureEncoding
{
public static final int ARRAY_SIZE = 19;
final int[] digits = new int[ARRAY_SIZE];
@Setup(Level.Iteration) public void setup() {
for (int i = 0; i < digits.length; i++) {
digits[i] = i % 10;
}
}
@Benchmark public long encodeConstantin() {
return encode(digits);
}
@Benchmark public long encodeBitfield() {
return encodeBitfield(digits);
}
@Benchmark public int[] decodeConstantin() {
return decode(1234567890123456789L);
}
@Benchmark public int[] decodeBitfield() {
return decodeBitfield(0b11111111100110011001100101101001100110011001011010001L);
}
@Benchmark public int[] decodeMarco13() {
return decodeMarco13(1234567890123456789L);
}
static long encode(int[] digits) {
return
1000000000000000000L * digits[0]
+ 100000000000000000L * digits[1]
+ 10000000000000000L * digits[2]
+ 1000000000000000L * digits[3]
+ 100000000000000L * digits[4]
+ 10000000000000L * digits[5]
+ 1000000000000L * digits[6]
+ 100000000000L * digits[7]
+ 10000000000L * digits[8]
+ 1000000000L * digits[9]
+ 100000000L * digits[10]
+ 10000000L * digits[11]
+ 1000000L * digits[12]
+ 100000L * digits[13]
+ 10000L * digits[14]
+ 1000L * digits[15]
+ 100L * digits[16]
+ 10L * digits[17]
+ 1L * digits[18];
}
static int[] decode(long move) {
int[] digits = new int[19];
int index = digits.length - 1;
while(move > 0) {
digits[index--] = (int)(move % 10);
move = move / 10;
}
return digits;
}
static long encodeBitfield(int[] move) {
if (move.length != ARRAY_SIZE) {
throw new IllegalArgumentException("move must have 19 elements");
}
long enc = 0;
int bitsUsed = 0;
enc |= move[3];
enc |= (long) move[4] << (bitsUsed += 1);
enc |= (long) move[5] << (bitsUsed += 4);
enc |= (long) move[6] << (bitsUsed += 4);
enc |= (long) move[7] << (bitsUsed += 4);
enc |= (long) move[8] << (bitsUsed += 4);
enc |= (long) move[9] << (bitsUsed += 4);
enc |= (long) move[10] << (bitsUsed += 4);
enc |= (long) move[11] << (bitsUsed += 4);
enc |= (long) move[12] << (bitsUsed += 4);
enc |= (long) move[13] << (bitsUsed += 4);
enc |= (long) move[14] << (bitsUsed += 4);
enc |= (long) move[15] << (bitsUsed += 4);
enc |= (long) move[16] << (bitsUsed += 2);
enc |= (long) move[17] << (bitsUsed += 2);
enc |= (long) move[18] << (bitsUsed + 2);
return enc;
}
static int[] decodeBitfield(long move) {
final int[] dContext
StackExchange Code Review Q#96516, answer score: 8
Revisions (0)
No revisions yet.