libpixelflinger/fixed.cpp - android-core - Git at Google

 /* libs/pixelflinger/fixed.cpp
 **
 ** Copyright 2006, The Android Open Source Project
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 **
 **     http://www.apache.org/licenses/LICENSE-2.0
 **
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 */

 #include <stdio.h>

 #include <private/pixelflinger/ggl_context.h>
 #include <private/pixelflinger/ggl_fixed.h>


 // ------------------------------------------------------------------------

 int32_t gglRecipQNormalized(int32_t x, int* exponent)
 {
     const int32_t s = x>>31;
     uint32_t a = s ? -x : x;

     // the result will overflow, so just set it to the biggest/inf value
     if (ggl_unlikely(a <= 2LU)) {
         *exponent = 0;
         return s ? FIXED_MIN : FIXED_MAX;
     }

     // Newton-Raphson iteration:
     // x = r*(2 - a*r)

     const int32_t lz = gglClz(a);
     a <<= lz;  // 0.32
     uint32_t r = a;
     // note: if a == 0x80000000, this means x was a power-of-2, in this
     // case we don't need to compute anything. We get the reciprocal for
     // (almost) free.
     if (a != 0x80000000) {
         r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
         // 0.32 + 2.30 = 2.62 -> 2.30
         // 2.30 + 2.30 = 4.60 -> 2.30
         r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
         r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
     }

     // shift right 1-bit to make room for the sign bit
     *exponent = 30-lz-1;
     r >>= 1;
     return s ? -r : r;
 }

 int32_t gglRecipQ(GGLfixed x, int q)
 {
     int shift;
     x = gglRecipQNormalized(x, &shift);
     shift += 16-q;
     if (shift > 0)
         x += 1L << (shift-1);   // rounding
     x >>= shift;
     return x;
 }

 // ------------------------------------------------------------------------

 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
 {
     if ((d>>24) && ((d>>24)+1)) {
         n >>= 8;
         d >>= 8;
     }
     return gglMulx(n, gglRecip(d));
 }

 // ------------------------------------------------------------------------

 static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
     // 1/sqrt(x) with x = 1-N/16, N=[8...1]
     0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
 };

 GGLfixed gglSqrtRecipx(GGLfixed x)
 {
     if (x == 0)         return FIXED_MAX;
     if (x == FIXED_ONE) return x;
     const GGLfixed a = x;
     const int32_t lz = gglClz(x);
     x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
     const int32_t exp = lz - 16;
     if (exp <= 0)   x >>= -exp>>1;
     else            x <<= (exp>>1) + (exp & 1);
     if (exp & 1) {
         x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
     }
     // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
     x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
     x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
     return x;
 }

 GGLfixed gglSqrtx(GGLfixed a)
 {
     // Compute a full precision square-root (24 bits accuracy)
     GGLfixed r = 0;
     GGLfixed bit = 0x800000;
     int32_t bshift = 15;
     do {
         GGLfixed temp = bit + (r<<1);
         if (bshift >= 8)    temp <<= (bshift-8);
         else                temp >>= (8-bshift);
         if (a >= temp) {
             r += bit;
             a -= temp;
         }
         bshift--;
     } while (bit>>=1);
     return r;
 }

 // ------------------------------------------------------------------------

 static const GGLfixed ggl_log_approx_tab[] = {
     // -ln(x)/ln(2) with x = N/16, N=[8...16]
     0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
 };

 static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
 	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
 };

 GGLfixed gglPowx(GGLfixed x, GGLfixed y)
 {
     // prerequisite: 0 <= x <= 1, and y >=0

     // pow(x,y) = 2^(y*log2(x))
     // =  2^(y*log2(x*(2^exp)*(2^-exp))))
     // =  2^(y*(log2(X)-exp))
     // =  2^(log2(X)*y - y*exp)
     // =  2^( - (-log2(X)*y + y*exp) )

     int32_t exp = gglClz(x) - 16;
     GGLfixed f = x << exp;
     x = (f & 0x0FFF)<<4;
     f = (f >> 12) & 0x7;
     GGLfixed p = gglMulAddx(
             ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
             ggl_log_approx_tab[f]);
     p = gglMulAddx(p, y, y*exp);
     exp = gglFixedToIntFloor(p);
     if (exp < 31) {
         p = gglFracx(p);
         x = (p & 0x1FFF)<<3;
         p >>= 13;
         p = gglMulAddx(
                 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
                 ggl_alog_approx_tab[p]);
         p >>= exp;
     } else {
         p = 0;
     }
     return p;
         // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
 }

 // ------------------------------------------------------------------------

 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
 {
     //int32_t r =int32_t((int64_t(n)<<i)/d);
     const int32_t ds = n^d;
     if (n<0) n = -n;
     if (d<0) d = -d;
     int nd = gglClz(d) - gglClz(n);
     i += nd + 1;
     if (nd > 0) d <<= nd;
     else        n <<= -nd;
     uint32_t q = 0;

     int j = i & 7;
     i >>= 3;

     // gcc deals with the code below pretty well.
     // we get 3.75 cycles per bit in the main loop
     // and 8 cycles per bit in the termination loop
     if (ggl_likely(i)) {
         n -= d;
         do {
             q <<= 8;
             if (n>=0)   q |= 128;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 64;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 32;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 16;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 8;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 4;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 2;
             else        n += d;
             n = n*2 - d;
             if (n>=0)   q |= 1;
             else        n += d;

             if (--i == 0)
                 goto finish;

             n = n*2 - d;
         } while(true);
         do {
             q <<= 1;
             n = n*2 - d;
             if (n>=0)   q |= 1;
             else        n += d;
         finish: ;
         } while (j--);
         return (ds<0) ? -q : q;
     }

     n -= d;
     if (n>=0)   q |= 1;
     else        n += d;
     j--;
     goto finish;
 }

 // ------------------------------------------------------------------------

 // assumes that the int32_t values of a, b, and c are all positive
 // use when both a and b are larger than c

 template <typename T>
 static inline void swap(T& a, T& b) {
     T t(a);
     a = b;
     b = t;
 }

 static __attribute__((noinline))
 int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
 {
 	// first we compute a*b as a 64-bit integer
     // (GCC generates umull with the code below)
     uint64_t ab = uint64_t(a)*b;
     uint32_t hi = ab>>32;
     uint32_t lo = ab;
     uint32_t result;

 	// now perform the division
 	if (hi >= c) {
 	overflow:
 		result = 0x7fffffff;  // basic overflow
 	} else if (hi == 0) {
 		result = lo/c;  // note: c can't be 0
 		if ((result >> 31) != 0)  // result must fit in 31 bits
 			goto overflow;
 	} else {
 		uint32_t r = hi;
 		int bits = 31;
 	    result = 0;
 		do {
 			r = (r << 1) | (lo >> 31);
 			lo <<= 1;
 			result <<= 1;
 			if (r >= c) {
 				r -= c;
 				result |= 1;
 			}
 		} while (bits--);
 	}
 	return int32_t(result);
 }

 // assumes a >= 0 and c >= b >= 0
 static inline
 int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
 {
     int32_t r = 0, q = 0, i;
     int leading = gglClz(a);
     i = 32 - leading;
     a <<= leading;
     do {
         r <<= 1;
         if (a < 0)
             r += b;
         a <<= 1;
         q <<= 1;
         if (r >= c) {
             r -= c;
             q++;
         }
         asm(""::); // gcc generates better code this way
         if (r >= c) {
             r -= c;
             q++;
         }
     }
     while (--i);
     return q;
 }

 // this function computes a*b/c with 64-bit intermediate accuracy
 // overflows (e.g. division by 0) are handled and return INT_MAX

 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
 {
 	int32_t result;
 	int32_t sign = a^b^c;

 	if (a < 0) a = -a;
 	if (b < 0) b = -b;
 	if (c < 0) c = -c;

     if (a < b) {
         swap(a, b);
     }

 	if (b <= c) result = quick_muldiv(a, b, c);
 	else        result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);

 	if (sign < 0)
 		result = -result;

     return result;
 }
	/* libs/pixelflinger/fixed.cpp
	**
	** Copyright 2006, The Android Open Source Project
	**
	** Licensed under the Apache License, Version 2.0 (the "License");
	** you may not use this file except in compliance with the License.
	** You may obtain a copy of the License at
	**
	** http://www.apache.org/licenses/LICENSE-2.0
	**
	** Unless required by applicable law or agreed to in writing, software
	** distributed under the License is distributed on an "AS IS" BASIS,
	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	** See the License for the specific language governing permissions and
	** limitations under the License.
	*/

	#include <stdio.h>

	#include <private/pixelflinger/ggl_context.h>
	#include <private/pixelflinger/ggl_fixed.h>


	// ------------------------------------------------------------------------

	int32_t gglRecipQNormalized(int32_t x, int* exponent)
	{
	const int32_t s = x>>31;
	uint32_t a = s ? -x : x;

	// the result will overflow, so just set it to the biggest/inf value
	if (ggl_unlikely(a <= 2LU)) {
	*exponent = 0;
	return s ? FIXED_MIN : FIXED_MAX;
	}

	// Newton-Raphson iteration:
	// x = r(2 - ar)

	const int32_t lz = gglClz(a);
	a <<= lz; // 0.32
	uint32_t r = a;
	// note: if a == 0x80000000, this means x was a power-of-2, in this
	// case we don't need to compute anything. We get the reciprocal for
	// (almost) free.
	if (a != 0x80000000) {
	r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
	// 0.32 + 2.30 = 2.62 -> 2.30
	// 2.30 + 2.30 = 4.60 -> 2.30
	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
	}

	// shift right 1-bit to make room for the sign bit
	*exponent = 30-lz-1;
	r >>= 1;
	return s ? -r : r;
	}

	int32_t gglRecipQ(GGLfixed x, int q)
	{
	int shift;
	x = gglRecipQNormalized(x, &shift);
	shift += 16-q;
	if (shift > 0)
	x += 1L << (shift-1); // rounding
	x >>= shift;
	return x;
	}

	// ------------------------------------------------------------------------

	GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
	{
	if ((d>>24) && ((d>>24)+1)) {
	n >>= 8;
	d >>= 8;
	}
	return gglMulx(n, gglRecip(d));
	}

	// ------------------------------------------------------------------------

	static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
	// 1/sqrt(x) with x = 1-N/16, N=[8...1]
	0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
	};

	GGLfixed gglSqrtRecipx(GGLfixed x)
	{
	if (x == 0) return FIXED_MAX;
	if (x == FIXED_ONE) return x;
	const GGLfixed a = x;
	const int32_t lz = gglClz(x);
	x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
	const int32_t exp = lz - 16;
	if (exp <= 0) x >>= -exp>>1;
	else x <<= (exp>>1) + (exp & 1);
	if (exp & 1) {
	x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
	}
	// 2 Newton-Raphson iterations: x = x/2(3-(ax)*x)
	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
	return x;
	}

	GGLfixed gglSqrtx(GGLfixed a)
	{
	// Compute a full precision square-root (24 bits accuracy)
	GGLfixed r = 0;
	GGLfixed bit = 0x800000;
	int32_t bshift = 15;
	do {
	GGLfixed temp = bit + (r<<1);
	if (bshift >= 8) temp <<= (bshift-8);
	else temp >>= (8-bshift);
	if (a >= temp) {
	r += bit;
	a -= temp;
	}
	bshift--;
	} while (bit>>=1);
	return r;
	}

	// ------------------------------------------------------------------------

	static const GGLfixed ggl_log_approx_tab[] = {
	// -ln(x)/ln(2) with x = N/16, N=[8...16]
	0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
	};

	static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
	};

	GGLfixed gglPowx(GGLfixed x, GGLfixed y)
	{
	// prerequisite: 0 <= x <= 1, and y >=0

	// pow(x,y) = 2^(y*log2(x))
	// = 2^(ylog2(x(2^exp)*(2^-exp))))
	// = 2^(y*(log2(X)-exp))
	// = 2^(log2(X)y - yexp)
	// = 2^( - (-log2(X)y + yexp) )

	int32_t exp = gglClz(x) - 16;
	GGLfixed f = x << exp;
	x = (f & 0x0FFF)<<4;
	f = (f >> 12) & 0x7;
	GGLfixed p = gglMulAddx(
	ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
	ggl_log_approx_tab[f]);
	p = gglMulAddx(p, y, y*exp);
	exp = gglFixedToIntFloor(p);
	if (exp < 31) {
	p = gglFracx(p);
	x = (p & 0x1FFF)<<3;
	p >>= 13;
	p = gglMulAddx(
	ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
	ggl_alog_approx_tab[p]);
	p >>= exp;
	} else {
	p = 0;
	}
	return p;
	// ( powf((a65536.0f), (b65536.0f)) ) * 65536.0f;
	}

	// ------------------------------------------------------------------------

	int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
	{
	//int32_t r =int32_t((int64_t(n)<<i)/d);
	const int32_t ds = n^d;
	if (n<0) n = -n;
	if (d<0) d = -d;
	int nd = gglClz(d) - gglClz(n);
	i += nd + 1;
	if (nd > 0) d <<= nd;
	else n <<= -nd;
	uint32_t q = 0;

	int j = i & 7;
	i >>= 3;

	// gcc deals with the code below pretty well.
	// we get 3.75 cycles per bit in the main loop
	// and 8 cycles per bit in the termination loop
	if (ggl_likely(i)) {
	n -= d;
	do {
	q <<= 8;
	if (n>=0) q \|= 128;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 64;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 32;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 16;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 8;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 4;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 2;
	else n += d;
	n = n*2 - d;
	if (n>=0) q \|= 1;
	else n += d;

	if (--i == 0)
	goto finish;

	n = n*2 - d;
	} while(true);
	do {
	q <<= 1;
	n = n*2 - d;
	if (n>=0) q \|= 1;
	else n += d;
	finish: ;
	} while (j--);
	return (ds<0) ? -q : q;
	}

	n -= d;
	if (n>=0) q \|= 1;
	else n += d;
	j--;
	goto finish;
	}

	// ------------------------------------------------------------------------

	// assumes that the int32_t values of a, b, and c are all positive
	// use when both a and b are larger than c

	template <typename T>
	static inline void swap(T& a, T& b) {
	T t(a);
	a = b;
	b = t;
	}

	static __attribute__((noinline))
	int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
	{
	// first we compute a*b as a 64-bit integer
	// (GCC generates umull with the code below)
	uint64_t ab = uint64_t(a)*b;
	uint32_t hi = ab>>32;
	uint32_t lo = ab;
	uint32_t result;

	// now perform the division
	if (hi >= c) {
	overflow:
	result = 0x7fffffff; // basic overflow
	} else if (hi == 0) {
	result = lo/c; // note: c can't be 0
	if ((result >> 31) != 0) // result must fit in 31 bits
	goto overflow;
	} else {
	uint32_t r = hi;
	int bits = 31;
	result = 0;
	do {
	r = (r << 1) \| (lo >> 31);
	lo <<= 1;
	result <<= 1;
	if (r >= c) {
	r -= c;
	result \|= 1;
	}
	} while (bits--);
	}
	return int32_t(result);
	}

	// assumes a >= 0 and c >= b >= 0
	static inline
	int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
	{
	int32_t r = 0, q = 0, i;
	int leading = gglClz(a);
	i = 32 - leading;
	a <<= leading;
	do {
	r <<= 1;
	if (a < 0)
	r += b;
	a <<= 1;
	q <<= 1;
	if (r >= c) {
	r -= c;
	q++;
	}
	asm(""::); // gcc generates better code this way
	if (r >= c) {
	r -= c;
	q++;
	}
	}
	while (--i);
	return q;
	}

	// this function computes a*b/c with 64-bit intermediate accuracy
	// overflows (e.g. division by 0) are handled and return INT_MAX

	int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
	{
	int32_t result;
	int32_t sign = a^b^c;

	if (a < 0) a = -a;
	if (b < 0) b = -b;
	if (c < 0) c = -c;

	if (a < b) {
	swap(a, b);
	}

	if (b <= c) result = quick_muldiv(a, b, c);
	else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);

	if (sign < 0)
	result = -result;

	return result;
	}