| ; PowerPC optimized drawing methods for Goom |
| ; © 2003 Guillaume Borios |
| ; This library is free software; you can redistribute it and/or |
| ; modify it under the terms of the GNU Library General Public |
| ; License as published by the Free Software Foundation; either |
| ; version 2 of the License, or (at your option) any later version. |
| ; |
| ; This library is distributed in the hope that it will be useful, |
| ; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| ; Library General Public License for more details. |
| ; |
| ; You should have received a copy of the GNU Library General Public |
| ; License along with this library; if not, write to the |
| ; Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, |
| ; Boston, MA 02110-1301, USA. |
| |
| ; Change log : |
| ; 30 May 2003 : File creation |
| |
| ; Section definition : We use a read only code section for the whole file |
| .section __TEXT,__text,regular,pure_instructions |
| |
| |
| ; -------------------------------------------------------------------------------------- |
| ; Single 32b pixel drawing macros |
| ; Usage : |
| ; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4 |
| ; Only the work registers (WR) can be touched by the macros |
| ; |
| ; Available methods : |
| ; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW) |
| ; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST) |
| ; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing |
| ; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST) |
| ; DRAWMETHOD_B_OR_MACRO : Bitwise OR |
| ; DRAWMETHOD_BAND_MACRO : Bitwise AND |
| ; DRAWMETHOD_BXOR_MACRO : Bitwise XOR |
| ; DRAWMETHOD_BNOT_MACRO : Bitwise NOT |
| ; -------------------------------------------------------------------------------------- |
| |
| .macro DRAWMETHOD_OVRW_MACRO |
| stw $2,0($1) ;; *$1 <- $2 |
| .endmacro |
| |
| .macro DRAWMETHOD_B_OR_MACRO |
| lwz $3,0($0) ;; $3 <- *$0 |
| or $3,$3,$2 ;; $3 <- $3 | $2 |
| stw $3,0($1) ;; *$1 <- $3 |
| .endmacro |
| |
| .macro DRAWMETHOD_BAND_MACRO |
| lwz $3,0($0) ;; $3 <- *$0 |
| and $3,$3,$2 ;; $3 <- $3 & $2 |
| stw $3,0($1) ;; *$1 <- $3 |
| .endmacro |
| |
| .macro DRAWMETHOD_BXOR_MACRO |
| lwz $3,0($0) ;; $3 <- *$0 |
| xor $3,$3,$2 ;; $3 <- $3 ^ $2 |
| stw $3,0($1) ;; *$1 <- $3 |
| .endmacro |
| |
| .macro DRAWMETHOD_BNOT_MACRO |
| lwz $3,0($0) ;; $3 <- *$0 |
| nand $3,$3,$3 ;; $3 <- ~$3 |
| stw $3,0($1) ;; *$1 <- $3 |
| .endmacro |
| |
| .macro DRAWMETHOD_PLUS_MACRO |
| lwz $4,0($0) ;; $4 <- *$0 |
| andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 |
| andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 |
| add $3,$3,$5 ;; $3 <- $3 + $5 |
| rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15) |
| srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension) |
| or $3,$3,$5 ;; $3 <- $3 | $5 |
| lis $5,0xFF ;; $5 <- 0x00FF00FF |
| addi $5,$5,0xFF |
| and $4,$4,$5 ;; $4 <- $4 & $5 |
| and $6,$2,$5 ;; $6 <- $2 & $5 |
| add $4,$4,$6 ;; $4 <- $4 + $6 |
| rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7) |
| srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension) |
| rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23) |
| srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension) |
| rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31] |
| or $4,$4,$6 ;; $4 <- $4 | $6 |
| rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23] |
| stw $4,0($1) ;; *$1 <- $4 |
| .endmacro |
| |
| .macro DRAWMETHOD_HALF_MACRO |
| lwz $4,0($0) ;; $4 <- *$0 |
| andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 |
| andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 |
| add $3,$3,$5 ;; $3 <- $3 + $5 |
| lis $5,0xFF ;; $5 <- 0x00FF00FF |
| addi $5,$5,0xFF |
| and $4,$4,$5 ;; $4 <- $4 & $5 |
| and $5,$2,$5 ;; $5 <- $2 & $5 |
| add $4,$4,$5 ;; $4 <- $4 + $5 |
| srwi $4,$4,1 ;; $4 <- $4 >> 1 |
| rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22] |
| stw $4,0($1) ;; *$1 <- $4 |
| .endmacro |
| |
| .macro DRAWMETHOD_DFLT_MACRO |
| DRAWMETHOD_PLUS_MACRO |
| .endmacro |
| |
| ; -------------------------------------------------------------------------------------- |
| |
| |
| |
| ; ************************************************************************************** |
| ; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col); |
| ; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); |
| ; ************************************************************************************** |
| .globl _DRAWMETHOD_PLUS_2_PPC |
| .align 3 |
| _DRAWMETHOD_PLUS_2_PPC: |
| DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9 |
| blr ;; return |
| |
| .globl _DRAWMETHOD_PLUS_PPC |
| .align 3 |
| _DRAWMETHOD_PLUS_PPC: |
| DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9 |
| blr ;; return |
| |
| |
| ; ************************************************************************************** |
| ; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col); |
| ; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); |
| ; ************************************************************************************** |
| .globl _DRAWMETHOD_HALF_2_PPC |
| .align 3 |
| _DRAWMETHOD_HALF_2_PPC: |
| DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8 |
| blr ;; return |
| |
| .globl _DRAWMETHOD_HALF_PPC |
| .align 3 |
| _DRAWMETHOD_HALF_PPC: |
| DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7 |
| blr ;; return |
| |
| |
| ; ************************************************************************************** |
| ; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col, |
| ; unsigned int screenx, unsigned int screeny) |
| ; ************************************************************************************** |
| .globl _DRAW_LINE_PPC |
| .align 3 |
| _DRAW_LINE_PPC: |
| ;; NOT IMPLEMENTED YET |
| blr ;; return |
| |
| |
| ; ************************************************************************************** |
| ; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff) |
| ; ************************************************************************************** |
| |
| |
| .const |
| .align 4 |
| vectorZERO: |
| .long 0,0,0,0 |
| .long 0x10101000, 0x10101001, 0x10101002, 0x10101003 |
| .long 0x10101004, 0x10101005, 0x10101006, 0x10101007 |
| .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B |
| .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F |
| |
| |
| .section __TEXT,__text,regular,pure_instructions |
| |
| .globl _ppc_brightness_G4 |
| .align 3 |
| _ppc_brightness_G4: |
| |
| |
| ;; PowerPC Altivec code |
| srwi r5,r5,2 |
| mtctr r5 |
| |
| ;;vrsave |
| mfspr r11,256 |
| lis r12,0xCFFC |
| mtspr 256,r12 |
| |
| mflr r0 |
| bcl 20,31,"L00000000001$pb" |
| "L00000000001$pb": |
| mflr r10 |
| mtlr r0 |
| |
| addis r9,r10,ha16(vectorZERO-"L00000000001$pb") |
| addi r9,r9,lo16(vectorZERO-"L00000000001$pb") |
| |
| vxor v0,v0,v0 ;; V0 = NULL vector |
| |
| addi r9,r9,16 |
| lvx v10,0,r9 |
| addi r9,r9,16 |
| lvx v11,0,r9 |
| addi r9,r9,16 |
| lvx v12,0,r9 |
| addi r9,r9,16 |
| lvx v13,0,r9 |
| |
| addis r9,r10,ha16(vectortmpwork-"L00000000001$pb") |
| addi r9,r9,lo16(vectortmpwork-"L00000000001$pb") |
| stw r6,0(r9) |
| li r6,8 |
| stw r6,4(r9) |
| lvx v9,0,r9 |
| li r9,128 |
| vspltw v8,v9,0 |
| vspltw v9,v9,1 |
| |
| ;; elt counter |
| li r9,0 |
| lis r7,0x0F01 |
| b L7 |
| .align 4 |
| L7: |
| lvx v1,r9,r3 |
| |
| vperm v4,v1,v0,v10 |
| ;********************* |
| add r10,r9,r3 |
| ;********************* |
| vperm v5,v1,v0,v11 |
| vperm v6,v1,v0,v12 |
| vperm v7,v1,v0,v13 |
| |
| vmulouh v4,v4,v8 |
| ;********************* |
| dst r10,r7,3 |
| ;********************* |
| vmulouh v5,v5,v8 |
| vmulouh v6,v6,v8 |
| vmulouh v7,v7,v8 |
| vsrw v4,v4,v9 |
| vsrw v5,v5,v9 |
| vsrw v6,v6,v9 |
| vsrw v7,v7,v9 |
| |
| vpkuwus v4,v4,v5 |
| vpkuwus v6,v6,v7 |
| vpkuhus v1,v4,v6 |
| |
| stvx v1,r9,r4 |
| addi r9,r9,16 |
| |
| bdnz L7 |
| |
| mtspr 256,r11 |
| blr |
| |
| |
| .globl _ppc_brightness_G5 |
| .align 3 |
| _ppc_brightness_G5: |
| |
| ;; PowerPC Altivec G5 code |
| srwi r5,r5,2 |
| mtctr r5 |
| |
| ;;vrsave |
| mfspr r11,256 |
| lis r12,0xCFFC |
| mtspr 256,r12 |
| |
| mflr r0 |
| bcl 20,31,"L00000000002$pb" |
| "L00000000002$pb": |
| mflr r10 |
| mtlr r0 |
| |
| addis r9,r10,ha16(vectorZERO-"L00000000002$pb") |
| addi r9,r9,lo16(vectorZERO-"L00000000002$pb") |
| |
| vxor v0,v0,v0 ;; V0 = NULL vector |
| |
| addi r9,r9,16 |
| lvx v10,0,r9 |
| addi r9,r9,16 |
| lvx v11,0,r9 |
| addi r9,r9,16 |
| lvx v12,0,r9 |
| addi r9,r9,16 |
| lvx v13,0,r9 |
| |
| addis r9,r10,ha16(vectortmpwork-"L00000000002$pb") |
| addi r9,r9,lo16(vectortmpwork-"L00000000002$pb") |
| stw r6,0(r9) |
| li r6,8 |
| stw r6,4(r9) |
| lvx v9,0,r9 |
| li r9,128 |
| vspltw v8,v9,0 |
| vspltw v9,v9,1 |
| |
| ;; elt counter |
| li r9,0 |
| lis r7,0x0F01 |
| b L6 |
| .align 4 |
| L6: |
| lvx v1,r9,r3 |
| |
| vperm v4,v1,v0,v10 |
| ;********************* |
| add r10,r9,r3 |
| ;********************* |
| vperm v5,v1,v0,v11 |
| vperm v6,v1,v0,v12 |
| vperm v7,v1,v0,v13 |
| |
| vmulouh v4,v4,v8 |
| vmulouh v5,v5,v8 |
| vmulouh v6,v6,v8 |
| vmulouh v7,v7,v8 |
| vsrw v4,v4,v9 |
| vsrw v5,v5,v9 |
| vsrw v6,v6,v9 |
| vsrw v7,v7,v9 |
| |
| vpkuwus v4,v4,v5 |
| vpkuwus v6,v6,v7 |
| vpkuhus v1,v4,v6 |
| |
| stvx v1,r9,r4 |
| addi r9,r9,16 |
| |
| bdnz L6 |
| |
| mtspr 256,r11 |
| blr |
| |
| |
| .globl _ppc_brightness_generic |
| .align 3 |
| _ppc_brightness_generic: |
| lis r12,0x00FF |
| ori r12,r12,0x00FF |
| subi r3,r3,4 |
| subi r4,r4,4 |
| mtctr r5 |
| b L1 |
| .align 4 |
| L1: |
| lwzu r7,4(r3) |
| |
| rlwinm r8,r7,16,24,31 |
| rlwinm r9,r7,24,24,31 |
| mullw r8,r8,r6 |
| rlwinm r10,r7,0,24,31 |
| mullw r9,r9,r6 |
| srwi r8,r8,8 |
| mullw r10,r10,r6 |
| srwi r9,r9,8 |
| |
| rlwinm. r11,r8,0,0,23 |
| beq L2 |
| li r8,0xFF |
| L2: |
| srwi r10,r10,8 |
| rlwinm. r11,r9,0,0,23 |
| beq L3 |
| li r9,0xFF |
| L3: |
| rlwinm r7,r8,16,8,15 |
| rlwinm. r11,r10,0,0,23 |
| beq L4 |
| li r10,0xFF |
| L4: |
| rlwimi r7,r9,8,16,23 |
| rlwimi r7,r10,0,24,31 |
| |
| stwu r7,4(r4) |
| bdnz L1 |
| |
| blr |
| |
| |
| |
| .static_data |
| .align 4 |
| vectortmpwork: |
| .long 0,0,0,0 |
| |