| /* | 
 |  * Copyright (C) 2013 The Android Open Source Project | 
 |  * All rights reserved. | 
 |  * | 
 |  * Redistribution and use in source and binary forms, with or without | 
 |  * modification, are permitted provided that the following conditions | 
 |  * are met: | 
 |  *  * Redistributions of source code must retain the above copyright | 
 |  *    notice, this list of conditions and the following disclaimer. | 
 |  *  * Redistributions in binary form must reproduce the above copyright | 
 |  *    notice, this list of conditions and the following disclaimer in | 
 |  *    the documentation and/or other materials provided with the | 
 |  *    distribution. | 
 |  * | 
 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | 
 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | 
 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | 
 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | 
 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | 
 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | 
 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | 
 |  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
 |  * SUCH DAMAGE. | 
 |  */ | 
 |     .text | 
 |     .align 0 | 
 |  | 
 |     .global scanline_t32cb16blend_arm64 | 
 |  | 
 | /* | 
 |  * .macro pixel | 
 |  * | 
 |  *  This macro alpha blends RGB565 original pixel located in either | 
 |  *  top or bottom 16 bits of DREG register with SRC 32 bit pixel value | 
 |  *  and writes the result to FB register | 
 |  * | 
 |  * \DREG is a 32-bit register containing *two* original destination RGB565 | 
 |  *       pixels, with the even one in the low-16 bits, and the odd one in the | 
 |  *       high 16 bits. | 
 |  * | 
 |  * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors. | 
 |  * | 
 |  * \FB is a target register that will contain the blended pixel values. | 
 |  * | 
 |  * \ODD is either 0 or 1 and indicates if we're blending the lower or | 
 |  *      upper 16-bit pixels in DREG into FB | 
 |  * | 
 |  * | 
 |  * clobbered: w6, w7, w16, w17, w18 | 
 |  * | 
 |  */ | 
 |  | 
 | .macro pixel,   DREG, SRC, FB, ODD | 
 |  | 
 |     // SRC = 0xAABBGGRR | 
 |     lsr     w7, \SRC, #24               // sA | 
 |     add     w7, w7, w7, lsr #7          // sA + (sA >> 7) | 
 |     mov     w6, #0x100 | 
 |     sub     w7, w6, w7                  // sA = 0x100 - (sA+(sA>>7)) | 
 |  | 
 | 1: | 
 |  | 
 | .if \ODD //Blending odd pixel present in top 16 bits of DREG register | 
 |  | 
 |     // red | 
 |     lsr     w16, \DREG, #(16 + 11) | 
 |     mul     w16, w7, w16 | 
 |     lsr     w6, \SRC, #3 | 
 |     and     w6, w6, #0x1F | 
 |     add     w16, w6, w16, lsr #8 | 
 |     cmp     w16, #0x1F | 
 |     orr     w17, \FB, #(0x1F<<(16 + 11)) | 
 |     orr     w18, \FB, w16, lsl #(16 + 11) | 
 |     csel    \FB, w17, w18, hi | 
 |         // green | 
 |         and     w6, \DREG, #(0x3F<<(16 + 5)) | 
 |         lsr     w17,w6,#(16+5) | 
 |         mul     w6, w7, w17 | 
 |         lsr     w16, \SRC, #(8+2) | 
 |         and     w16, w16, #0x3F | 
 |         add     w6, w16, w6, lsr #8 | 
 |         cmp     w6, #0x3F | 
 |         orr     w17, \FB, #(0x3F<<(16 + 5)) | 
 |         orr     w18, \FB, w6, lsl #(16 + 5) | 
 |         csel    \FB, w17, w18, hi | 
 |             // blue | 
 |             and     w16, \DREG, #(0x1F << 16) | 
 |             lsr     w17,w16,#16 | 
 |             mul     w16, w7, w17 | 
 |             lsr     w6, \SRC, #(8+8+3) | 
 |             and     w6, w6, #0x1F | 
 |             add     w16, w6, w16, lsr #8 | 
 |             cmp     w16, #0x1F | 
 |             orr     w17, \FB, #(0x1F << 16) | 
 |             orr     w18, \FB, w16, lsl #16 | 
 |             csel    \FB, w17, w18, hi | 
 |  | 
 | .else //Blending even pixel present in bottom 16 bits of DREG register | 
 |  | 
 |     // red | 
 |     lsr     w16, \DREG, #11 | 
 |     and     w16, w16, #0x1F | 
 |     mul     w16, w7, w16 | 
 |     lsr     w6, \SRC, #3 | 
 |     and     w6, w6, #0x1F | 
 |     add     w16, w6, w16, lsr #8 | 
 |     cmp     w16, #0x1F | 
 |     mov     w17, #(0x1F<<11) | 
 |     lsl     w18, w16, #11 | 
 |     csel    \FB, w17, w18, hi | 
 |  | 
 |  | 
 |         // green | 
 |         and     w6, \DREG, #(0x3F<<5) | 
 |         mul     w6, w7, w6 | 
 |         lsr     w16, \SRC, #(8+2) | 
 |         and     w16, w16, #0x3F | 
 |         add     w6, w16, w6, lsr #(5+8) | 
 |         cmp     w6, #0x3F | 
 |         orr     w17, \FB, #(0x3F<<5) | 
 |         orr     w18, \FB, w6, lsl #5 | 
 |         csel    \FB, w17, w18, hi | 
 |  | 
 |             // blue | 
 |             and     w16, \DREG, #0x1F | 
 |             mul     w16, w7, w16 | 
 |             lsr     w6, \SRC, #(8+8+3) | 
 |             and     w6, w6, #0x1F | 
 |             add     w16, w6, w16, lsr #8 | 
 |             cmp     w16, #0x1F | 
 |             orr     w17, \FB, #0x1F | 
 |             orr     w18, \FB, w16 | 
 |             csel    \FB, w17, w18, hi | 
 |  | 
 | .endif // End of blending even pixel | 
 |  | 
 | .endm // End of pixel macro | 
 |  | 
 |  | 
 | // x0:  dst ptr | 
 | // x1:  src ptr | 
 | // w2:  count | 
 | // w3:  d | 
 | // w4:  s0 | 
 | // w5:  s1 | 
 | // w6:  pixel | 
 | // w7:  pixel | 
 | // w8:  free | 
 | // w9:  free | 
 | // w10: free | 
 | // w11: free | 
 | // w12: scratch | 
 | // w14: pixel | 
 |  | 
 | scanline_t32cb16blend_arm64: | 
 |  | 
 |     // align DST to 32 bits | 
 |     tst     x0, #0x3 | 
 |     b.eq    aligned | 
 |     subs    w2, w2, #1 | 
 |     b.lo    return | 
 |  | 
 | last: | 
 |     ldr     w4, [x1], #4 | 
 |     ldrh    w3, [x0] | 
 |     pixel   w3, w4, w12, 0 | 
 |     strh    w12, [x0], #2 | 
 |  | 
 | aligned: | 
 |     subs    w2, w2, #2 | 
 |     b.lo    9f | 
 |  | 
 |     // The main loop is unrolled twice and processes 4 pixels | 
 | 8: | 
 |     ldp   w4,w5, [x1], #8 | 
 |     add     x0, x0, #4 | 
 |     // it's all zero, skip this pixel | 
 |     orr     w3, w4, w5 | 
 |     cbz     w3, 7f | 
 |  | 
 |     // load the destination | 
 |     ldr     w3, [x0, #-4] | 
 |     // stream the destination | 
 |     pixel   w3, w4, w12, 0 | 
 |     pixel   w3, w5, w12, 1 | 
 |     str     w12, [x0, #-4] | 
 |  | 
 |     // 2nd iteration of the loop, don't stream anything | 
 |     subs    w2, w2, #2 | 
 |     csel    w4, w5, w4, lt | 
 |     blt     9f | 
 |     ldp     w4,w5, [x1], #8 | 
 |     add     x0, x0, #4 | 
 |     orr     w3, w4, w5 | 
 |     cbz     w3, 7f | 
 |     ldr     w3, [x0, #-4] | 
 |     pixel   w3, w4, w12, 0 | 
 |     pixel   w3, w5, w12, 1 | 
 |     str     w12, [x0, #-4] | 
 |  | 
 | 7:  subs    w2, w2, #2 | 
 |     bhs     8b | 
 |     mov     w4, w5 | 
 |  | 
 | 9:  adds    w2, w2, #1 | 
 |     b.lo    return | 
 |     b       last | 
 |  | 
 | return: | 
 |     ret |