patterncppMinor
Mostly portable 128 by 64 bit division
Viewed 0 times
128bitdivisionmostlyportable
Problem
I wrote this out of curiosity. It is based on the theory behind Knuth's Algorithm D and is intended to emulate the behavior of the x86
I am primarily interested in correctness (it seems to work correctly), and have not yet compared performance with anything simpler.
Two's complement hardware is assumed.
```
uint64_t div(uint64_t a_lo, uint64_t a_hi, uint64_t b, uint64_t &r)
{
uint64_t p_lo;
uint64_t p_hi;
uint64_t q = 0;
auto r_hi = a_hi;
auto r_lo = a_lo;
int s = 0;
if(0 == (b >> 63)){
// Normalize so quotient estimates are
// no more than 2 in error.
// Note: If any bits get shifted out of
// r_hi at this point, the result would
// overflow.
s = 63 - bsr(b);
const auto t = 64 - s;
b > t);
r_lo > 32;
/*
The first full-by-half division places b
across r_hi and r_lo, making the reduction
step a little complicated.
To make this easier, u_hi and u_lo will hold
a shifted image of the remainder.
[u_hi|| ][u_lo|| ]
[r_hi|| ][r_lo|| ]
[ b || ]
[p_hi|| ][p_lo|| ]
|
V
[q_hi|| ]
*/
auto q_hat = r_hi / b_hi;
p_lo = mul(b, q_hat, p_hi);
const auto u_hi = r_hi >> 32;
const auto u_lo = (r_hi > 32);
// r -= b*q_hat
//
// At most 2 iterations of this...
while(
(p_hi > u_hi) ||
((p_hi == u_hi) && (p_lo > u_lo))
)
{
if(p_lo > 32);
if(w_lo > r_lo){
++w_hi;
}
r_lo -= w_lo;
r_hi -= w_hi;
q = q_hat > 32)) / b_hi;
p_lo = mul(b, q_hat, p_hi);
// r -= b*q_hat
//
// ...and at most 2 iterations of this.
while(
(p_hi > r_hi) ||
((p_hi == r_hi) && (p_lo > r_lo))
)
{
if(p_lo > s;
return q;
}
div instruction (though the result is truncated instead of raising an exception for overflow).I am primarily interested in correctness (it seems to work correctly), and have not yet compared performance with anything simpler.
Two's complement hardware is assumed.
```
uint64_t div(uint64_t a_lo, uint64_t a_hi, uint64_t b, uint64_t &r)
{
uint64_t p_lo;
uint64_t p_hi;
uint64_t q = 0;
auto r_hi = a_hi;
auto r_lo = a_lo;
int s = 0;
if(0 == (b >> 63)){
// Normalize so quotient estimates are
// no more than 2 in error.
// Note: If any bits get shifted out of
// r_hi at this point, the result would
// overflow.
s = 63 - bsr(b);
const auto t = 64 - s;
b > t);
r_lo > 32;
/*
The first full-by-half division places b
across r_hi and r_lo, making the reduction
step a little complicated.
To make this easier, u_hi and u_lo will hold
a shifted image of the remainder.
[u_hi|| ][u_lo|| ]
[r_hi|| ][r_lo|| ]
[ b || ]
[p_hi|| ][p_lo|| ]
|
V
[q_hi|| ]
*/
auto q_hat = r_hi / b_hi;
p_lo = mul(b, q_hat, p_hi);
const auto u_hi = r_hi >> 32;
const auto u_lo = (r_hi > 32);
// r -= b*q_hat
//
// At most 2 iterations of this...
while(
(p_hi > u_hi) ||
((p_hi == u_hi) && (p_lo > u_lo))
)
{
if(p_lo > 32);
if(w_lo > r_lo){
++w_hi;
}
r_lo -= w_lo;
r_hi -= w_hi;
q = q_hat > 32)) / b_hi;
p_lo = mul(b, q_hat, p_hi);
// r -= b*q_hat
//
// ...and at most 2 iterations of this.
while(
(p_hi > r_hi) ||
((p_hi == r_hi) && (p_lo > r_lo))
)
{
if(p_lo > s;
return q;
}
Solution
The original code seemed too complicated for this so I set out to write a simpler version. Should be fairly portable too, and fast as it is just bit shifts and subtraction.
(It works similarly to how one would accomplish division in assembly language on a processor without hardware division.)
(It works similarly to how one would accomplish division in assembly language on a processor without hardware division.)
// 128-bit / 64-bit unsigned divide
#include
#include
int main(void)
{
// numerator
uint64_t a_lo = 1234567;
uint64_t a_hi = 0;
// denominator
uint64_t b = 10;
// quotient
uint64_t q = a_lo > 63;
uint64_t temp_carry = 0;
int i;
for(i = 0; i > 63;
rem = b)
{
carry = 1;
}
else
{
temp_carry = q >> 63;
q > 63;
q <<= 1;
q |= carry;
carry = temp_carry;
}
printf("quotient = %llu\n", (long long unsigned int)q);
printf("remainder = %llu\n", (long long unsigned int)rem);
return 0;
}Code Snippets
// 128-bit / 64-bit unsigned divide
#include <stdio.h>
#include <stdint.h>
int main(void)
{
// numerator
uint64_t a_lo = 1234567;
uint64_t a_hi = 0;
// denominator
uint64_t b = 10;
// quotient
uint64_t q = a_lo << 1;
// remainder
uint64_t rem = a_hi;
uint64_t carry = a_lo >> 63;
uint64_t temp_carry = 0;
int i;
for(i = 0; i < 64; i++)
{
temp_carry = rem >> 63;
rem <<= 1;
rem |= carry;
carry = temp_carry;
if(carry == 0)
{
if(rem >= b)
{
carry = 1;
}
else
{
temp_carry = q >> 63;
q <<= 1;
q |= carry;
carry = temp_carry;
continue;
}
}
rem -= b;
rem -= (1 - carry);
carry = 1;
temp_carry = q >> 63;
q <<= 1;
q |= carry;
carry = temp_carry;
}
printf("quotient = %llu\n", (long long unsigned int)q);
printf("remainder = %llu\n", (long long unsigned int)rem);
return 0;
}Context
StackExchange Code Review Q#67962, answer score: 3
Revisions (0)
No revisions yet.