merge intel x86 patches into gingerbread branch
diff --git a/debuggerd/Android.mk b/debuggerd/Android.mk
index 3c1cf02..752c953 100644
--- a/debuggerd/Android.mk
+++ b/debuggerd/Android.mk
@@ -1,11 +1,15 @@
 # Copyright 2005 The Android Open Source Project
 
-ifeq ($(TARGET_ARCH),arm)
+ifneq ($(filter arm x86,$(TARGET_ARCH)),)
 
 LOCAL_PATH:= $(call my-dir)
 include $(CLEAR_VARS)
 
-LOCAL_SRC_FILES:= debuggerd.c getevent.c unwind-arm.c pr-support.c utility.c
+LOCAL_SRC_FILES:= debuggerd.c utility.c getevent.c $(TARGET_ARCH)/machine.c $(TARGET_ARCH)/unwind.c symbol_table.c
+ifeq ($(TARGET_ARCH),arm)
+LOCAL_SRC_FILES += $(TARGET_ARCH)/pr-support.c
+endif
+
 LOCAL_CFLAGS := -Wall
 LOCAL_MODULE := debuggerd
 
@@ -22,7 +26,7 @@
 
 include $(CLEAR_VARS)
 LOCAL_SRC_FILES := crasher.c
-LOCAL_SRC_FILES += crashglue.S
+LOCAL_SRC_FILES += $(TARGET_ARCH)/crashglue.S
 LOCAL_MODULE := crasher
 LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES)
 LOCAL_MODULE_TAGS := eng
@@ -46,4 +50,4 @@
 include $(BUILD_EXECUTABLE)
 endif # ARCH_ARM_HAVE_VFP == true
 
-endif # TARGET_ARCH == arm
+endif # arm or x86 in TARGET_ARCH
diff --git a/debuggerd/crashglue.S b/debuggerd/arm/crashglue.S
similarity index 99%
rename from debuggerd/crashglue.S
rename to debuggerd/arm/crashglue.S
index 0c1fd9b..eb9d0e3 100644
--- a/debuggerd/crashglue.S
+++ b/debuggerd/arm/crashglue.S
@@ -2,7 +2,7 @@
 .type crash1, %function
 .globl crashnostack
 .type crashnostack, %function
-		
+
 crash1:
 	ldr r0, =0xa5a50000
 	ldr r1, =0xa5a50001
diff --git a/debuggerd/arm/machine.c b/debuggerd/arm/machine.c
new file mode 100644
index 0000000..ccd0baf
--- /dev/null
+++ b/debuggerd/arm/machine.c
@@ -0,0 +1,229 @@
+/* system/debuggerd/debuggerd.c
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <stdio.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <sys/exec_elf.h>
+#include <sys/stat.h>
+
+#include <cutils/sockets.h>
+#include <cutils/properties.h>
+
+#include <linux/input.h>
+
+#include "utility.h"
+
+#ifdef WITH_VFP
+#ifdef WITH_VFP_D32
+#define NUM_VFP_REGS 32
+#else
+#define NUM_VFP_REGS 16
+#endif
+#endif
+
+/* Main entry point to get the backtrace from the crashing process */
+extern int unwind_backtrace_with_ptrace(int tfd, pid_t pid, mapinfo *map,
+                                        unsigned int sp_list[],
+                                        int *frame0_pc_sane,
+                                        bool at_fault);
+
+void dump_stack_and_code(int tfd, int pid, mapinfo *map,
+                         int unwind_depth, unsigned int sp_list[],
+                         bool at_fault)
+{
+    unsigned int sp, pc, p, end, data;
+    struct pt_regs r;
+    int sp_depth;
+    bool only_in_tombstone = !at_fault;
+    char code_buffer[80];
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) return;
+    sp = r.ARM_sp;
+    pc = r.ARM_pc;
+
+    _LOG(tfd, only_in_tombstone, "\ncode around pc:\n");
+
+    end = p = pc & ~3;
+    p -= 32;
+    end += 32;
+
+    /* Dump the code around PC as:
+     *  addr       contents
+     *  00008d34   fffffcd0 4c0eb530 b0934a0e 1c05447c
+     *  00008d44   f7ff18a0 490ced94 68035860 d0012b00
+     */
+    while (p <= end) {
+        int i;
+
+        sprintf(code_buffer, "%08x ", p);
+        for (i = 0; i < 4; i++) {
+            data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+            sprintf(code_buffer + strlen(code_buffer), "%08x ", data);
+            p += 4;
+        }
+        _LOG(tfd, only_in_tombstone, "%s\n", code_buffer);
+    }
+
+    if ((unsigned) r.ARM_lr != pc) {
+        _LOG(tfd, only_in_tombstone, "\ncode around lr:\n");
+
+        end = p = r.ARM_lr & ~3;
+        p -= 32;
+        end += 32;
+
+        /* Dump the code around LR as:
+         *  addr       contents
+         *  00008d34   fffffcd0 4c0eb530 b0934a0e 1c05447c
+         *  00008d44   f7ff18a0 490ced94 68035860 d0012b00
+         */
+        while (p <= end) {
+            int i;
+
+            sprintf(code_buffer, "%08x ", p);
+            for (i = 0; i < 4; i++) {
+                data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+                sprintf(code_buffer + strlen(code_buffer), "%08x ", data);
+                p += 4;
+            }
+            _LOG(tfd, only_in_tombstone, "%s\n", code_buffer);
+        }
+    }
+
+    p = sp - 64;
+    p &= ~3;
+    if (unwind_depth != 0) {
+        if (unwind_depth < STACK_CONTENT_DEPTH) {
+            end = sp_list[unwind_depth-1];
+        }
+        else {
+            end = sp_list[STACK_CONTENT_DEPTH-1];
+        }
+    }
+    else {
+        end = sp | 0x000000ff;
+        end += 0xff;
+    }
+
+    _LOG(tfd, only_in_tombstone, "\nstack:\n");
+
+    /* If the crash is due to PC == 0, there will be two frames that
+     * have identical SP value.
+     */
+    if (sp_list[0] == sp_list[1]) {
+        sp_depth = 1;
+    }
+    else {
+        sp_depth = 0;
+    }
+
+    while (p <= end) {
+         char *prompt;
+         char level[16];
+         data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+         if (p == sp_list[sp_depth]) {
+             sprintf(level, "#%02d", sp_depth++);
+             prompt = level;
+         }
+         else {
+             prompt = "   ";
+         }
+
+         /* Print the stack content in the log for the first 3 frames. For the
+          * rest only print them in the tombstone file.
+          */
+         _LOG(tfd, (sp_depth > 2) || only_in_tombstone,
+              "%s %08x  %08x  %s\n", prompt, p, data,
+              map_to_name(map, data, ""));
+         p += 4;
+    }
+    /* print another 64-byte of stack data after the last frame */
+
+    end = p+64;
+    while (p <= end) {
+         data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+         _LOG(tfd, (sp_depth > 2) || only_in_tombstone,
+              "    %08x  %08x  %s\n", p, data,
+              map_to_name(map, data, ""));
+         p += 4;
+    }
+}
+
+void dump_pc_and_lr(int tfd, int pid, mapinfo *map, int unwound_level,
+                    bool at_fault)
+{
+    struct pt_regs r;
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
+        _LOG(tfd, !at_fault, "tid %d not responding!\n", pid);
+        return;
+    }
+
+    if (unwound_level == 0) {
+        _LOG(tfd, !at_fault, "         #%02d  pc %08x  %s\n", 0, r.ARM_pc,
+             map_to_name(map, r.ARM_pc, "<unknown>"));
+    }
+    _LOG(tfd, !at_fault, "         #%02d  lr %08x  %s\n", 1, r.ARM_lr,
+            map_to_name(map, r.ARM_lr, "<unknown>"));
+}
+
+void dump_registers(int tfd, int pid, bool at_fault)
+{
+    struct pt_regs r;
+    bool only_in_tombstone = !at_fault;
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
+        _LOG(tfd, only_in_tombstone,
+             "cannot get registers: %s\n", strerror(errno));
+        return;
+    }
+
+    _LOG(tfd, only_in_tombstone, " r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
+         r.ARM_r0, r.ARM_r1, r.ARM_r2, r.ARM_r3);
+    _LOG(tfd, only_in_tombstone, " r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
+         r.ARM_r4, r.ARM_r5, r.ARM_r6, r.ARM_r7);
+    _LOG(tfd, only_in_tombstone, " r8 %08x  r9 %08x  10 %08x  fp %08x\n",
+         r.ARM_r8, r.ARM_r9, r.ARM_r10, r.ARM_fp);
+    _LOG(tfd, only_in_tombstone,
+         " ip %08x  sp %08x  lr %08x  pc %08x  cpsr %08x\n",
+         r.ARM_ip, r.ARM_sp, r.ARM_lr, r.ARM_pc, r.ARM_cpsr);
+
+#ifdef WITH_VFP
+    struct user_vfp vfp_regs;
+    int i;
+
+    if(ptrace(PTRACE_GETVFPREGS, pid, 0, &vfp_regs)) {
+        _LOG(tfd, only_in_tombstone,
+             "cannot get registers: %s\n", strerror(errno));
+        return;
+    }
+
+    for (i = 0; i < NUM_VFP_REGS; i += 2) {
+        _LOG(tfd, only_in_tombstone,
+             " d%-2d %016llx  d%-2d %016llx\n",
+              i, vfp_regs.fpregs[i], i+1, vfp_regs.fpregs[i+1]);
+    }
+    _LOG(tfd, only_in_tombstone, " scr %08lx\n\n", vfp_regs.fpscr);
+#endif
+}
diff --git a/debuggerd/pr-support.c b/debuggerd/arm/pr-support.c
similarity index 100%
rename from debuggerd/pr-support.c
rename to debuggerd/arm/pr-support.c
diff --git a/debuggerd/unwind-arm.c b/debuggerd/arm/unwind.c
similarity index 96%
rename from debuggerd/unwind-arm.c
rename to debuggerd/arm/unwind.c
index 9642d2e..d9600b7 100644
--- a/debuggerd/unwind-arm.c
+++ b/debuggerd/arm/unwind.c
@@ -37,6 +37,8 @@
 #include <unwind.h>
 #include "utility.h"
 
+#include "symbol_table.h"
+
 typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
 
 void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
@@ -393,6 +395,7 @@
     phase2_vrs *vrs = (phase2_vrs*) context;
     const mapinfo *mi;
     bool only_in_tombstone = !at_fault;
+    const struct symbol* sym = 0;
 
     if (stack_level < STACK_CONTENT_DEPTH) {
         sp_list[stack_level] = vrs->core.r[R_SP];
@@ -427,7 +430,7 @@
         if (pc & 1) {
             _uw prev_word;
             pc = (pc & ~1);
-            prev_word = get_remote_word(pid, (void *) pc-4);
+            prev_word = get_remote_word(pid, (char *) pc-4);
             // Long offset 
             if ((prev_word & 0xf0000000) == 0xf0000000 && 
                 (prev_word & 0x0000e000) == 0x0000e000) {
@@ -448,12 +451,22 @@
      * 1MB boundaries, and the library may be larger than 1MB. So for .so 
      * addresses we print the relative offset in back trace.
      */
-    rel_pc = pc;
     mi = pc_to_mapinfo(map, pc, &rel_pc);
 
-    _LOG(tfd, only_in_tombstone, 
-         "         #%02d  pc %08x  %s\n", stack_level, rel_pc, 
-         mi ? mi->name : "");
+    /* See if we can determine what symbol this stack frame resides in */
+    if (mi != 0 && mi->symbols != 0) {
+        sym = symbol_table_lookup(mi->symbols, rel_pc);
+    }
+
+    if (sym) {
+        _LOG(tfd, only_in_tombstone,
+            "         #%02d  pc %08x  %s (%s)\n", stack_level, rel_pc,
+            mi ? mi->name : "", sym->name);
+    } else {
+        _LOG(tfd, only_in_tombstone,
+            "         #%02d  pc %08x  %s\n", stack_level, rel_pc,
+            mi ? mi->name : "");
+    }
 
     return _URC_NO_REASON;
 }
diff --git a/debuggerd/crasher.c b/debuggerd/crasher.c
index f4a5a62..00652e9 100644
--- a/debuggerd/crasher.c
+++ b/debuggerd/crasher.c
@@ -19,6 +19,7 @@
 
 void crash1(void);
 void crashnostack(void);
+void maybeabort(void);
 
 static void debuggerd_connect()
 {
diff --git a/debuggerd/debuggerd.c b/debuggerd/debuggerd.c
index b557cea..ecfe01f 100644
--- a/debuggerd/debuggerd.c
+++ b/debuggerd/debuggerd.c
@@ -16,8 +16,6 @@
 */
 
 #include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <pthread.h>
@@ -33,37 +31,21 @@
 
 #include <cutils/sockets.h>
 #include <cutils/logd.h>
-#include <cutils/sockets.h>
 #include <cutils/properties.h>
 
 #include <linux/input.h>
 
 #include <private/android_filesystem_config.h>
 
+#include "debuggerd.h"
 #include "utility.h"
 
-#ifdef WITH_VFP
-#ifdef WITH_VFP_D32
-#define NUM_VFP_REGS 32
-#else
-#define NUM_VFP_REGS 16
-#endif
-#endif
-
-/* Main entry point to get the backtrace from the crashing process */
-extern int unwind_backtrace_with_ptrace(int tfd, pid_t pid, mapinfo *map,
-                                        unsigned int sp_list[],
-                                        int *frame0_pc_sane,
-                                        bool at_fault);
-
-static int logsocket = -1;
-
 #define ANDROID_LOG_INFO 4
 
 /* Log information onto the tombstone */
 void _LOG(int tfd, bool in_tombstone_only, const char *fmt, ...)
 {
-    char buf[128];
+    char buf[512];
 
     va_list ap;
     va_start(ap, fmt);
@@ -79,13 +61,6 @@
         __android_log_vprint(ANDROID_LOG_INFO, "DEBUG", fmt, ap);
 }
 
-#define LOG(fmt...) _LOG(-1, 0, fmt)
-#if 0
-#define XLOG(fmt...) _LOG(-1, 0, fmt)
-#else
-#define XLOG(fmt...) do {} while(0)
-#endif
-
 // 6f000000-6f01e000 rwxp 00000000 00:0c 16389419   /system/lib/libcomposer.so
 // 012345678901234567890123456789012345678901234567890123456789
 // 0         1         2         3         4         5
@@ -106,10 +81,11 @@
 
     mi->start = strtoul(line, 0, 16);
     mi->end = strtoul(line + 9, 0, 16);
-    /* To be filled in parse_exidx_info if the mapped section starts with
+    /* To be filled in parse_elf_info if the mapped section starts with
      * elf_header
      */
     mi->exidx_start = mi->exidx_end = 0;
+    mi->symbols = 0;
     mi->next = 0;
     strcpy(mi->name, line + 49);
 
@@ -125,186 +101,6 @@
     _LOG(tfd, false, "Build fingerprint: '%s'\n", fingerprint);
 }
 
-
-void dump_stack_and_code(int tfd, int pid, mapinfo *map,
-                         int unwind_depth, unsigned int sp_list[],
-                         bool at_fault)
-{
-    unsigned int sp, pc, p, end, data;
-    struct pt_regs r;
-    int sp_depth;
-    bool only_in_tombstone = !at_fault;
-    char code_buffer[80];
-
-    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) return;
-    sp = r.ARM_sp;
-    pc = r.ARM_pc;
-
-    _LOG(tfd, only_in_tombstone, "\ncode around pc:\n");
-
-    end = p = pc & ~3;
-    p -= 32;
-    end += 32;
-
-    /* Dump the code around PC as:
-     *  addr       contents
-     *  00008d34   fffffcd0 4c0eb530 b0934a0e 1c05447c
-     *  00008d44   f7ff18a0 490ced94 68035860 d0012b00
-     */
-    while (p <= end) {
-        int i;
-
-        sprintf(code_buffer, "%08x ", p);
-        for (i = 0; i < 4; i++) {
-            data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
-            sprintf(code_buffer + strlen(code_buffer), "%08x ", data);
-            p += 4;
-        }
-        _LOG(tfd, only_in_tombstone, "%s\n", code_buffer);
-    }
-
-    if ((unsigned) r.ARM_lr != pc) {
-        _LOG(tfd, only_in_tombstone, "\ncode around lr:\n");
-
-        end = p = r.ARM_lr & ~3;
-        p -= 32;
-        end += 32;
-
-        /* Dump the code around LR as:
-         *  addr       contents
-         *  00008d34   fffffcd0 4c0eb530 b0934a0e 1c05447c
-         *  00008d44   f7ff18a0 490ced94 68035860 d0012b00
-         */
-        while (p <= end) {
-            int i;
-
-            sprintf(code_buffer, "%08x ", p);
-            for (i = 0; i < 4; i++) {
-                data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
-                sprintf(code_buffer + strlen(code_buffer), "%08x ", data);
-                p += 4;
-            }
-            _LOG(tfd, only_in_tombstone, "%s\n", code_buffer);
-        }
-    }
-
-    p = sp - 64;
-    p &= ~3;
-    if (unwind_depth != 0) {
-        if (unwind_depth < STACK_CONTENT_DEPTH) {
-            end = sp_list[unwind_depth-1];
-        }
-        else {
-            end = sp_list[STACK_CONTENT_DEPTH-1];
-        }
-    }
-    else {
-        end = sp | 0x000000ff;
-        end += 0xff;
-    }
-
-    _LOG(tfd, only_in_tombstone, "\nstack:\n");
-
-    /* If the crash is due to PC == 0, there will be two frames that
-     * have identical SP value.
-     */
-    if (sp_list[0] == sp_list[1]) {
-        sp_depth = 1;
-    }
-    else {
-        sp_depth = 0;
-    }
-
-    while (p <= end) {
-         char *prompt;
-         char level[16];
-         data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
-         if (p == sp_list[sp_depth]) {
-             sprintf(level, "#%02d", sp_depth++);
-             prompt = level;
-         }
-         else {
-             prompt = "   ";
-         }
-
-         /* Print the stack content in the log for the first 3 frames. For the
-          * rest only print them in the tombstone file.
-          */
-         _LOG(tfd, (sp_depth > 2) || only_in_tombstone,
-              "%s %08x  %08x  %s\n", prompt, p, data,
-              map_to_name(map, data, ""));
-         p += 4;
-    }
-    /* print another 64-byte of stack data after the last frame */
-
-    end = p+64;
-    while (p <= end) {
-         data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
-         _LOG(tfd, (sp_depth > 2) || only_in_tombstone,
-              "    %08x  %08x  %s\n", p, data,
-              map_to_name(map, data, ""));
-         p += 4;
-    }
-}
-
-void dump_pc_and_lr(int tfd, int pid, mapinfo *map, int unwound_level,
-                    bool at_fault)
-{
-    struct pt_regs r;
-
-    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
-        _LOG(tfd, !at_fault, "tid %d not responding!\n", pid);
-        return;
-    }
-
-    if (unwound_level == 0) {
-        _LOG(tfd, !at_fault, "         #%02d  pc %08x  %s\n", 0, r.ARM_pc,
-             map_to_name(map, r.ARM_pc, "<unknown>"));
-    }
-    _LOG(tfd, !at_fault, "         #%02d  lr %08x  %s\n", 1, r.ARM_lr,
-            map_to_name(map, r.ARM_lr, "<unknown>"));
-}
-
-void dump_registers(int tfd, int pid, bool at_fault)
-{
-    struct pt_regs r;
-    bool only_in_tombstone = !at_fault;
-
-    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
-        _LOG(tfd, only_in_tombstone,
-             "cannot get registers: %s\n", strerror(errno));
-        return;
-    }
-
-    _LOG(tfd, only_in_tombstone, " r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
-         r.ARM_r0, r.ARM_r1, r.ARM_r2, r.ARM_r3);
-    _LOG(tfd, only_in_tombstone, " r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
-         r.ARM_r4, r.ARM_r5, r.ARM_r6, r.ARM_r7);
-    _LOG(tfd, only_in_tombstone, " r8 %08x  r9 %08x  10 %08x  fp %08x\n",
-         r.ARM_r8, r.ARM_r9, r.ARM_r10, r.ARM_fp);
-    _LOG(tfd, only_in_tombstone,
-         " ip %08x  sp %08x  lr %08x  pc %08x  cpsr %08x\n",
-         r.ARM_ip, r.ARM_sp, r.ARM_lr, r.ARM_pc, r.ARM_cpsr);
-
-#ifdef WITH_VFP
-    struct user_vfp vfp_regs;
-    int i;
-
-    if(ptrace(PTRACE_GETVFPREGS, pid, 0, &vfp_regs)) {
-        _LOG(tfd, only_in_tombstone,
-             "cannot get registers: %s\n", strerror(errno));
-        return;
-    }
-
-    for (i = 0; i < NUM_VFP_REGS; i += 2) {
-        _LOG(tfd, only_in_tombstone,
-             " d%-2d %016llx  d%-2d %016llx\n",
-              i, vfp_regs.fpregs[i], i+1, vfp_regs.fpregs[i+1]);
-    }
-    _LOG(tfd, only_in_tombstone, " scr %08lx\n\n", vfp_regs.fpscr);
-#endif
-}
-
 const char *get_signame(int sig)
 {
     switch(sig) {
@@ -399,7 +195,7 @@
     if(sig) dump_fault_addr(tfd, tid, sig);
 }
 
-static void parse_exidx_info(mapinfo *milist, pid_t pid)
+static void parse_elf_info(mapinfo *milist, pid_t pid)
 {
     mapinfo *mi;
     for (mi = milist; mi != NULL; mi = mi->next) {
@@ -422,13 +218,18 @@
                 /* Parse the program header */
                 get_remote_struct(pid, (char *) (ptr+i), &phdr,
                                   sizeof(Elf32_Phdr));
+#ifdef __arm__
                 /* Found a EXIDX segment? */
                 if (phdr.p_type == PT_ARM_EXIDX) {
                     mi->exidx_start = mi->start + phdr.p_offset;
                     mi->exidx_end = mi->exidx_start + phdr.p_filesz;
                     break;
                 }
+#endif
             }
+
+            /* Try to load symbols from this file */
+            mi->symbols = symbol_table_create(mi->name);
         }
     }
 }
@@ -440,7 +241,9 @@
     mapinfo *milist = 0;
     unsigned int sp_list[STACK_CONTENT_DEPTH];
     int stack_depth;
+#ifdef __arm__
     int frame0_pc_sane = 1;
+#endif
 
     if (!at_fault) {
         _LOG(tfd, true,
@@ -466,8 +269,9 @@
         fclose(fp);
     }
 
-    parse_exidx_info(milist, tid);
+    parse_elf_info(milist, tid);
 
+#if __arm__
     /* If stack unwinder fails, use the default solution to dump the stack
      * content.
      */
@@ -482,9 +286,18 @@
     }
 
     dump_stack_and_code(tfd, tid, milist, stack_depth, sp_list, at_fault);
+#elif __i386__
+    /* If stack unwinder fails, use the default solution to dump the stack
+    * content.
+    */
+    stack_depth = unwind_backtrace_with_ptrace_x86(tfd, tid, milist,at_fault);
+#else
+#error "Unsupported architecture"
+#endif
 
     while(milist) {
         mapinfo *next = milist->next;
+        symbol_table_free(milist->symbols);
         free(milist);
         milist = next;
     }
@@ -863,10 +676,12 @@
     if(fd != -1) close(fd);
 }
 
+
 int main()
 {
     int s;
     struct sigaction act;
+    int logsocket = -1;
 
     logsocket = socket_local_client("logd",
             ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_DGRAM);
diff --git a/debuggerd/debuggerd.c.orig b/debuggerd/debuggerd.c.orig
new file mode 100644
index 0000000..0b3d9ba
--- /dev/null
+++ b/debuggerd/debuggerd.c.orig
@@ -0,0 +1,911 @@
+/* system/debuggerd/debuggerd.c
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <sys/exec_elf.h>
+#include <sys/stat.h>
+
+#include <cutils/sockets.h>
+#include <cutils/logd.h>
+#include <cutils/sockets.h>
+#include <cutils/properties.h>
+
+#include <linux/input.h>
+
+#include <private/android_filesystem_config.h>
+
+#include "utility.h"
+
+#ifdef WITH_VFP
+#ifdef WITH_VFP_D32
+#define NUM_VFP_REGS 32
+#else
+#define NUM_VFP_REGS 16
+#endif
+#endif
+
+/* Main entry point to get the backtrace from the crashing process */
+extern int unwind_backtrace_with_ptrace(int tfd, pid_t pid, mapinfo *map,
+                                        unsigned int sp_list[],
+                                        int *frame0_pc_sane,
+                                        bool at_fault);
+
+static int logsocket = -1;
+
+#define ANDROID_LOG_INFO 4
+
+/* Log information onto the tombstone */
+void _LOG(int tfd, bool in_tombstone_only, const char *fmt, ...)
+{
+    char buf[512];
+
+    va_list ap;
+    va_start(ap, fmt);
+
+    if (tfd >= 0) {
+        int len;
+        vsnprintf(buf, sizeof(buf), fmt, ap);
+        len = strlen(buf);
+        if(tfd >= 0) write(tfd, buf, len);
+    }
+
+    if (!in_tombstone_only)
+        __android_log_vprint(ANDROID_LOG_INFO, "DEBUG", fmt, ap);
+}
+
+#define LOG(fmt...) _LOG(-1, 0, fmt)
+#if 0
+#define XLOG(fmt...) _LOG(-1, 0, fmt)
+#else
+#define XLOG(fmt...) do {} while(0)
+#endif
+
+// 6f000000-6f01e000 rwxp 00000000 00:0c 16389419   /system/lib/libcomposer.so
+// 012345678901234567890123456789012345678901234567890123456789
+// 0         1         2         3         4         5
+
+mapinfo *parse_maps_line(char *line)
+{
+    mapinfo *mi;
+    int len = strlen(line);
+
+    if(len < 1) return 0;
+    line[--len] = 0;
+
+    if(len < 50) return 0;
+    if(line[20] != 'x') return 0;
+
+    mi = malloc(sizeof(mapinfo) + (len - 47));
+    if(mi == 0) return 0;
+
+    mi->start = strtoul(line, 0, 16);
+    mi->end = strtoul(line + 9, 0, 16);
+    /* To be filled in parse_elf_info if the mapped section starts with
+     * elf_header
+     */
+    mi->exidx_start = mi->exidx_end = 0;
+    mi->symbols = 0;
+    mi->next = 0;
+    strcpy(mi->name, line + 49);
+
+    return mi;
+}
+
+void dump_build_info(int tfd)
+{
+    char fingerprint[PROPERTY_VALUE_MAX];
+
+    property_get("ro.build.fingerprint", fingerprint, "unknown");
+
+    _LOG(tfd, false, "Build fingerprint: '%s'\n", fingerprint);
+}
+
+
+void dump_stack_and_code(int tfd, int pid, mapinfo *map,
+                         int unwind_depth, unsigned int sp_list[],
+                         bool at_fault)
+{
+    unsigned int sp, pc, p, end, data;
+    struct pt_regs r;
+    int sp_depth;
+    bool only_in_tombstone = !at_fault;
+    char code_buffer[80];
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) return;
+    sp = r.ARM_sp;
+    pc = r.ARM_pc;
+
+    _LOG(tfd, only_in_tombstone, "\ncode around pc:\n");
+
+    end = p = pc & ~3;
+    p -= 32;
+    end += 32;
+
+    /* Dump the code around PC as:
+     *  addr       contents
+     *  00008d34   fffffcd0 4c0eb530 b0934a0e 1c05447c
+     *  00008d44   f7ff18a0 490ced94 68035860 d0012b00
+     */
+    while (p <= end) {
+        int i;
+
+        sprintf(code_buffer, "%08x ", p);
+        for (i = 0; i < 4; i++) {
+            data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+            sprintf(code_buffer + strlen(code_buffer), "%08x ", data);
+            p += 4;
+        }
+        _LOG(tfd, only_in_tombstone, "%s\n", code_buffer);
+    }
+
+    if ((unsigned) r.ARM_lr != pc) {
+        _LOG(tfd, only_in_tombstone, "\ncode around lr:\n");
+
+        end = p = r.ARM_lr & ~3;
+        p -= 32;
+        end += 32;
+
+        /* Dump the code around LR as:
+         *  addr       contents
+         *  00008d34   fffffcd0 4c0eb530 b0934a0e 1c05447c
+         *  00008d44   f7ff18a0 490ced94 68035860 d0012b00
+         */
+        while (p <= end) {
+            int i;
+
+            sprintf(code_buffer, "%08x ", p);
+            for (i = 0; i < 4; i++) {
+                data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+                sprintf(code_buffer + strlen(code_buffer), "%08x ", data);
+                p += 4;
+            }
+            _LOG(tfd, only_in_tombstone, "%s\n", code_buffer);
+        }
+    }
+
+    p = sp - 64;
+    p &= ~3;
+    if (unwind_depth != 0) {
+        if (unwind_depth < STACK_CONTENT_DEPTH) {
+            end = sp_list[unwind_depth-1];
+        }
+        else {
+            end = sp_list[STACK_CONTENT_DEPTH-1];
+        }
+    }
+    else {
+        end = sp | 0x000000ff;
+        end += 0xff;
+    }
+
+    _LOG(tfd, only_in_tombstone, "\nstack:\n");
+
+    /* If the crash is due to PC == 0, there will be two frames that
+     * have identical SP value.
+     */
+    if (sp_list[0] == sp_list[1]) {
+        sp_depth = 1;
+    }
+    else {
+        sp_depth = 0;
+    }
+
+    while (p <= end) {
+         char *prompt;
+         char level[16];
+         data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+         if (p == sp_list[sp_depth]) {
+             sprintf(level, "#%02d", sp_depth++);
+             prompt = level;
+         }
+         else {
+             prompt = "   ";
+         }
+
+         /* Print the stack content in the log for the first 3 frames. For the
+          * rest only print them in the tombstone file.
+          */
+         _LOG(tfd, (sp_depth > 2) || only_in_tombstone,
+              "%s %08x  %08x  %s\n", prompt, p, data,
+              map_to_name(map, data, ""));
+         p += 4;
+    }
+    /* print another 64-byte of stack data after the last frame */
+
+    end = p+64;
+    while (p <= end) {
+         data = ptrace(PTRACE_PEEKTEXT, pid, (void*)p, NULL);
+         _LOG(tfd, (sp_depth > 2) || only_in_tombstone,
+              "    %08x  %08x  %s\n", p, data,
+              map_to_name(map, data, ""));
+         p += 4;
+    }
+}
+
+void dump_pc_and_lr(int tfd, int pid, mapinfo *map, int unwound_level,
+                    bool at_fault)
+{
+    struct pt_regs r;
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
+        _LOG(tfd, !at_fault, "tid %d not responding!\n", pid);
+        return;
+    }
+
+    if (unwound_level == 0) {
+        _LOG(tfd, !at_fault, "         #%02d  pc %08x  %s\n", 0, r.ARM_pc,
+             map_to_name(map, r.ARM_pc, "<unknown>"));
+    }
+    _LOG(tfd, !at_fault, "         #%02d  lr %08x  %s\n", 1, r.ARM_lr,
+            map_to_name(map, r.ARM_lr, "<unknown>"));
+}
+
+void dump_registers(int tfd, int pid, bool at_fault)
+{
+    struct pt_regs r;
+    bool only_in_tombstone = !at_fault;
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
+        _LOG(tfd, only_in_tombstone,
+             "cannot get registers: %s\n", strerror(errno));
+        return;
+    }
+
+    _LOG(tfd, only_in_tombstone, " r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
+         r.ARM_r0, r.ARM_r1, r.ARM_r2, r.ARM_r3);
+    _LOG(tfd, only_in_tombstone, " r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
+         r.ARM_r4, r.ARM_r5, r.ARM_r6, r.ARM_r7);
+    _LOG(tfd, only_in_tombstone, " r8 %08x  r9 %08x  10 %08x  fp %08x\n",
+         r.ARM_r8, r.ARM_r9, r.ARM_r10, r.ARM_fp);
+    _LOG(tfd, only_in_tombstone,
+         " ip %08x  sp %08x  lr %08x  pc %08x  cpsr %08x\n",
+         r.ARM_ip, r.ARM_sp, r.ARM_lr, r.ARM_pc, r.ARM_cpsr);
+
+#ifdef WITH_VFP
+    struct user_vfp vfp_regs;
+    int i;
+
+    if(ptrace(PTRACE_GETVFPREGS, pid, 0, &vfp_regs)) {
+        _LOG(tfd, only_in_tombstone,
+             "cannot get registers: %s\n", strerror(errno));
+        return;
+    }
+
+    for (i = 0; i < NUM_VFP_REGS; i += 2) {
+        _LOG(tfd, only_in_tombstone,
+             " d%-2d %016llx  d%-2d %016llx\n",
+              i, vfp_regs.fpregs[i], i+1, vfp_regs.fpregs[i+1]);
+    }
+    _LOG(tfd, only_in_tombstone, " scr %08lx\n\n", vfp_regs.fpscr);
+#endif
+}
+
+const char *get_signame(int sig)
+{
+    switch(sig) {
+    case SIGILL:     return "SIGILL";
+    case SIGABRT:    return "SIGABRT";
+    case SIGBUS:     return "SIGBUS";
+    case SIGFPE:     return "SIGFPE";
+    case SIGSEGV:    return "SIGSEGV";
+    case SIGSTKFLT:  return "SIGSTKFLT";
+    default:         return "?";
+    }
+}
+
+const char *get_sigcode(int signo, int code)
+{
+    switch (signo) {
+    case SIGILL:
+        switch (code) {
+        case ILL_ILLOPC: return "ILL_ILLOPC";
+        case ILL_ILLOPN: return "ILL_ILLOPN";
+        case ILL_ILLADR: return "ILL_ILLADR";
+        case ILL_ILLTRP: return "ILL_ILLTRP";
+        case ILL_PRVOPC: return "ILL_PRVOPC";
+        case ILL_PRVREG: return "ILL_PRVREG";
+        case ILL_COPROC: return "ILL_COPROC";
+        case ILL_BADSTK: return "ILL_BADSTK";
+        }
+        break;
+    case SIGBUS:
+        switch (code) {
+        case BUS_ADRALN: return "BUS_ADRALN";
+        case BUS_ADRERR: return "BUS_ADRERR";
+        case BUS_OBJERR: return "BUS_OBJERR";
+        }
+        break;
+    case SIGFPE:
+        switch (code) {
+        case FPE_INTDIV: return "FPE_INTDIV";
+        case FPE_INTOVF: return "FPE_INTOVF";
+        case FPE_FLTDIV: return "FPE_FLTDIV";
+        case FPE_FLTOVF: return "FPE_FLTOVF";
+        case FPE_FLTUND: return "FPE_FLTUND";
+        case FPE_FLTRES: return "FPE_FLTRES";
+        case FPE_FLTINV: return "FPE_FLTINV";
+        case FPE_FLTSUB: return "FPE_FLTSUB";
+        }
+        break;
+    case SIGSEGV:
+        switch (code) {
+        case SEGV_MAPERR: return "SEGV_MAPERR";
+        case SEGV_ACCERR: return "SEGV_ACCERR";
+        }
+        break;
+    }
+    return "?";
+}
+
+void dump_fault_addr(int tfd, int pid, int sig)
+{
+    siginfo_t si;
+
+    memset(&si, 0, sizeof(si));
+    if(ptrace(PTRACE_GETSIGINFO, pid, 0, &si)){
+        _LOG(tfd, false, "cannot get siginfo: %s\n", strerror(errno));
+    } else {
+        _LOG(tfd, false, "signal %d (%s), code %d (%s), fault addr %08x\n",
+             sig, get_signame(sig),
+             si.si_code, get_sigcode(sig, si.si_code),
+             si.si_addr);
+    }
+}
+
+void dump_crash_banner(int tfd, unsigned pid, unsigned tid, int sig)
+{
+    char data[1024];
+    char *x = 0;
+    FILE *fp;
+
+    sprintf(data, "/proc/%d/cmdline", pid);
+    fp = fopen(data, "r");
+    if(fp) {
+        x = fgets(data, 1024, fp);
+        fclose(fp);
+    }
+
+    _LOG(tfd, false,
+         "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");
+    dump_build_info(tfd);
+    _LOG(tfd, false, "pid: %d, tid: %d  >>> %s <<<\n",
+         pid, tid, x ? x : "UNKNOWN");
+
+    if(sig) dump_fault_addr(tfd, tid, sig);
+}
+
+static void parse_elf_info(mapinfo *milist, pid_t pid)
+{
+    mapinfo *mi;
+    for (mi = milist; mi != NULL; mi = mi->next) {
+        Elf32_Ehdr ehdr;
+
+        memset(&ehdr, 0, sizeof(Elf32_Ehdr));
+        /* Read in sizeof(Elf32_Ehdr) worth of data from the beginning of
+         * mapped section.
+         */
+        get_remote_struct(pid, (void *) (mi->start), &ehdr,
+                          sizeof(Elf32_Ehdr));
+        /* Check if it has the matching magic words */
+        if (IS_ELF(ehdr)) {
+            Elf32_Phdr phdr;
+            Elf32_Phdr *ptr;
+            int i;
+
+            ptr = (Elf32_Phdr *) (mi->start + ehdr.e_phoff);
+            for (i = 0; i < ehdr.e_phnum; i++) {
+                /* Parse the program header */
+                get_remote_struct(pid, (char *) (ptr+i), &phdr,
+                                  sizeof(Elf32_Phdr));
+                /* Found a EXIDX segment? */
+                if (phdr.p_type == PT_ARM_EXIDX) {
+                    mi->exidx_start = mi->start + phdr.p_offset;
+                    mi->exidx_end = mi->exidx_start + phdr.p_filesz;
+                    break;
+                }
+            }
+
+            /* Try to load symbols from this file */
+            mi->symbols = symbol_table_create(mi->name);
+        }
+    }
+}
+
+void dump_crash_report(int tfd, unsigned pid, unsigned tid, bool at_fault)
+{
+    char data[1024];
+    FILE *fp;
+    mapinfo *milist = 0;
+    unsigned int sp_list[STACK_CONTENT_DEPTH];
+    int stack_depth;
+    int frame0_pc_sane = 1;
+
+    if (!at_fault) {
+        _LOG(tfd, true,
+         "--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---\n");
+        _LOG(tfd, true, "pid: %d, tid: %d\n", pid, tid);
+    }
+
+    dump_registers(tfd, tid, at_fault);
+
+    /* Clear stack pointer records */
+    memset(sp_list, 0, sizeof(sp_list));
+
+    sprintf(data, "/proc/%d/maps", pid);
+    fp = fopen(data, "r");
+    if(fp) {
+        while(fgets(data, 1024, fp)) {
+            mapinfo *mi = parse_maps_line(data);
+            if(mi) {
+                mi->next = milist;
+                milist = mi;
+            }
+        }
+        fclose(fp);
+    }
+
+    parse_elf_info(milist, tid);
+
+    /* If stack unwinder fails, use the default solution to dump the stack
+     * content.
+     */
+    stack_depth = unwind_backtrace_with_ptrace(tfd, tid, milist, sp_list,
+                                               &frame0_pc_sane, at_fault);
+
+    /* The stack unwinder should at least unwind two levels of stack. If less
+     * level is seen we make sure at lease pc and lr are dumped.
+     */
+    if (stack_depth < 2) {
+        dump_pc_and_lr(tfd, tid, milist, stack_depth, at_fault);
+    }
+
+    dump_stack_and_code(tfd, tid, milist, stack_depth, sp_list, at_fault);
+
+    while(milist) {
+        mapinfo *next = milist->next;
+        symbol_table_free(milist->symbols);
+        free(milist);
+        milist = next;
+    }
+}
+
+#define MAX_TOMBSTONES	10
+
+#define typecheck(x,y) {    \
+    typeof(x) __dummy1;     \
+    typeof(y) __dummy2;     \
+    (void)(&__dummy1 == &__dummy2); }
+
+#define TOMBSTONE_DIR	"/data/tombstones"
+
+/*
+ * find_and_open_tombstone - find an available tombstone slot, if any, of the
+ * form tombstone_XX where XX is 00 to MAX_TOMBSTONES-1, inclusive. If no
+ * file is available, we reuse the least-recently-modified file.
+ */
+static int find_and_open_tombstone(void)
+{
+    unsigned long mtime = ULONG_MAX;
+    struct stat sb;
+    char path[128];
+    int fd, i, oldest = 0;
+
+    /*
+     * XXX: Our stat.st_mtime isn't time_t. If it changes, as it probably ought
+     * to, our logic breaks. This check will generate a warning if that happens.
+     */
+    typecheck(mtime, sb.st_mtime);
+
+    /*
+     * In a single wolf-like pass, find an available slot and, in case none
+     * exist, find and record the least-recently-modified file.
+     */
+    for (i = 0; i < MAX_TOMBSTONES; i++) {
+        snprintf(path, sizeof(path), TOMBSTONE_DIR"/tombstone_%02d", i);
+
+        if (!stat(path, &sb)) {
+            if (sb.st_mtime < mtime) {
+                oldest = i;
+                mtime = sb.st_mtime;
+            }
+            continue;
+        }
+        if (errno != ENOENT)
+            continue;
+
+        fd = open(path, O_CREAT | O_EXCL | O_WRONLY, 0600);
+        if (fd < 0)
+            continue;	/* raced ? */
+
+        fchown(fd, AID_SYSTEM, AID_SYSTEM);
+        return fd;
+    }
+
+    /* we didn't find an available file, so we clobber the oldest one */
+    snprintf(path, sizeof(path), TOMBSTONE_DIR"/tombstone_%02d", oldest);
+    fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0600);
+    fchown(fd, AID_SYSTEM, AID_SYSTEM);
+
+    return fd;
+}
+
+/* Return true if some thread is not detached cleanly */
+static bool dump_sibling_thread_report(int tfd, unsigned pid, unsigned tid)
+{
+    char task_path[1024];
+
+    sprintf(task_path, "/proc/%d/task", pid);
+    DIR *d;
+    struct dirent *de;
+    int need_cleanup = 0;
+
+    d = opendir(task_path);
+    /* Bail early if cannot open the task directory */
+    if (d == NULL) {
+        XLOG("Cannot open /proc/%d/task\n", pid);
+        return false;
+    }
+    while ((de = readdir(d)) != NULL) {
+        unsigned new_tid;
+        /* Ignore "." and ".." */
+        if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
+            continue;
+        new_tid = atoi(de->d_name);
+        /* The main thread at fault has been handled individually */
+        if (new_tid == tid)
+            continue;
+
+        /* Skip this thread if cannot ptrace it */
+        if (ptrace(PTRACE_ATTACH, new_tid, 0, 0) < 0)
+            continue;
+
+        dump_crash_report(tfd, pid, new_tid, false);
+        need_cleanup |= ptrace(PTRACE_DETACH, new_tid, 0, 0);
+    }
+    closedir(d);
+    return need_cleanup != 0;
+}
+
+/* Return true if some thread is not detached cleanly */
+static bool engrave_tombstone(unsigned pid, unsigned tid, int debug_uid,
+                              int signal)
+{
+    int fd;
+    bool need_cleanup = false;
+
+    mkdir(TOMBSTONE_DIR, 0755);
+    chown(TOMBSTONE_DIR, AID_SYSTEM, AID_SYSTEM);
+
+    fd = find_and_open_tombstone();
+    if (fd < 0)
+        return need_cleanup;
+
+    dump_crash_banner(fd, pid, tid, signal);
+    dump_crash_report(fd, pid, tid, true);
+    /*
+     * If the user has requested to attach gdb, don't collect the per-thread
+     * information as it increases the chance to lose track of the process.
+     */
+    if ((signed)pid > debug_uid) {
+        need_cleanup = dump_sibling_thread_report(fd, pid, tid);
+    }
+
+    close(fd);
+    return need_cleanup;
+}
+
+static int
+write_string(const char* file, const char* string)
+{
+    int len;
+    int fd;
+    ssize_t amt;
+    fd = open(file, O_RDWR);
+    len = strlen(string);
+    if (fd < 0)
+        return -errno;
+    amt = write(fd, string, len);
+    close(fd);
+    return amt >= 0 ? 0 : -errno;
+}
+
+static
+void init_debug_led(void)
+{
+    // trout leds
+    write_string("/sys/class/leds/red/brightness", "0");
+    write_string("/sys/class/leds/green/brightness", "0");
+    write_string("/sys/class/leds/blue/brightness", "0");
+    write_string("/sys/class/leds/red/device/blink", "0");
+    // sardine leds
+    write_string("/sys/class/leds/left/cadence", "0,0");
+}
+
+static
+void enable_debug_led(void)
+{
+    // trout leds
+    write_string("/sys/class/leds/red/brightness", "255");
+    // sardine leds
+    write_string("/sys/class/leds/left/cadence", "1,0");
+}
+
+static
+void disable_debug_led(void)
+{
+    // trout leds
+    write_string("/sys/class/leds/red/brightness", "0");
+    // sardine leds
+    write_string("/sys/class/leds/left/cadence", "0,0");
+}
+
+extern int init_getevent();
+extern void uninit_getevent();
+extern int get_event(struct input_event* event, int timeout);
+
+static void wait_for_user_action(unsigned tid, struct ucred* cr)
+{
+    (void)tid;
+    /* First log a helpful message */
+    LOG(    "********************************************************\n"
+            "* Process %d has been suspended while crashing.  To\n"
+            "* attach gdbserver for a gdb connection on port 5039:\n"
+            "*\n"
+            "*     adb shell gdbserver :5039 --attach %d &\n"
+            "*\n"
+            "* Press HOME key to let the process continue crashing.\n"
+            "********************************************************\n",
+            cr->pid, cr->pid);
+
+    /* wait for HOME key (TODO: something useful for devices w/o HOME key) */
+    if (init_getevent() == 0) {
+        int ms = 1200 / 10;
+        int dit = 1;
+        int dah = 3*dit;
+        int _       = -dit;
+        int ___     = 3*_;
+        int _______ = 7*_;
+        const signed char codes[] = {
+           dit,_,dit,_,dit,___,dah,_,dah,_,dah,___,dit,_,dit,_,dit,_______
+        };
+        size_t s = 0;
+        struct input_event e;
+        int home = 0;
+        init_debug_led();
+        enable_debug_led();
+        do {
+            int timeout = abs((int)(codes[s])) * ms;
+            int res = get_event(&e, timeout);
+            if (res == 0) {
+                if (e.type==EV_KEY && e.code==KEY_HOME && e.value==0)
+                    home = 1;
+            } else if (res == 1) {
+                if (++s >= sizeof(codes)/sizeof(*codes))
+                    s = 0;
+                if (codes[s] > 0) {
+                    enable_debug_led();
+                } else {
+                    disable_debug_led();
+                }
+            }
+        } while (!home);
+        uninit_getevent();
+    }
+
+    /* don't forget to turn debug led off */
+    disable_debug_led();
+
+    /* close filedescriptor */
+    LOG("debuggerd resuming process %d", cr->pid);
+ }
+
+static void handle_crashing_process(int fd)
+{
+    char buf[64];
+    struct stat s;
+    unsigned tid;
+    struct ucred cr;
+    int n, len, status;
+    int tid_attach_status = -1;
+    unsigned retry = 30;
+    bool need_cleanup = false;
+
+    char value[PROPERTY_VALUE_MAX];
+    property_get("debug.db.uid", value, "-1");
+    int debug_uid = atoi(value);
+
+    XLOG("handle_crashing_process(%d)\n", fd);
+
+    len = sizeof(cr);
+    n = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
+    if(n != 0) {
+        LOG("cannot get credentials\n");
+        goto done;
+    }
+
+    XLOG("reading tid\n");
+    fcntl(fd, F_SETFL, O_NONBLOCK);
+    while((n = read(fd, &tid, sizeof(unsigned))) != sizeof(unsigned)) {
+        if(errno == EINTR) continue;
+        if(errno == EWOULDBLOCK) {
+            if(retry-- > 0) {
+                usleep(100 * 1000);
+                continue;
+            }
+            LOG("timed out reading tid\n");
+            goto done;
+        }
+        LOG("read failure? %s\n", strerror(errno));
+        goto done;
+    }
+
+    sprintf(buf,"/proc/%d/task/%d", cr.pid, tid);
+    if(stat(buf, &s)) {
+        LOG("tid %d does not exist in pid %d. ignoring debug request\n",
+            tid, cr.pid);
+        close(fd);
+        return;
+    }
+
+    XLOG("BOOM: pid=%d uid=%d gid=%d tid=%d\n", cr.pid, cr.uid, cr.gid, tid);
+
+    tid_attach_status = ptrace(PTRACE_ATTACH, tid, 0, 0);
+    if(tid_attach_status < 0) {
+        LOG("ptrace attach failed: %s\n", strerror(errno));
+        goto done;
+    }
+
+    close(fd);
+    fd = -1;
+
+    for(;;) {
+        n = waitpid(tid, &status, __WALL);
+
+        if(n < 0) {
+            if(errno == EAGAIN) continue;
+            LOG("waitpid failed: %s\n", strerror(errno));
+            goto done;
+        }
+
+        XLOG("waitpid: n=%d status=%08x\n", n, status);
+
+        if(WIFSTOPPED(status)){
+            n = WSTOPSIG(status);
+            switch(n) {
+            case SIGSTOP:
+                XLOG("stopped -- continuing\n");
+                n = ptrace(PTRACE_CONT, tid, 0, 0);
+                if(n) {
+                    LOG("ptrace failed: %s\n", strerror(errno));
+                    goto done;
+                }
+                continue;
+
+            case SIGILL:
+            case SIGABRT:
+            case SIGBUS:
+            case SIGFPE:
+            case SIGSEGV:
+            case SIGSTKFLT: {
+                XLOG("stopped -- fatal signal\n");
+                need_cleanup = engrave_tombstone(cr.pid, tid, debug_uid, n);
+                kill(tid, SIGSTOP);
+                goto done;
+            }
+
+            default:
+                XLOG("stopped -- unexpected signal\n");
+                goto done;
+            }
+        } else {
+            XLOG("unexpected waitpid response\n");
+            goto done;
+        }
+    }
+
+done:
+    XLOG("detaching\n");
+
+    /* stop the process so we can debug */
+    kill(cr.pid, SIGSTOP);
+
+    /*
+     * If a thread has been attached by ptrace, make sure it is detached
+     * successfully otherwise we will get a zombie.
+     */
+    if (tid_attach_status == 0) {
+        int detach_status;
+        /* detach so we can attach gdbserver */
+        detach_status = ptrace(PTRACE_DETACH, tid, 0, 0);
+        need_cleanup |= (detach_status != 0);
+    }
+
+    /*
+     * if debug.db.uid is set, its value indicates if we should wait
+     * for user action for the crashing process.
+     * in this case, we log a message and turn the debug LED on
+     * waiting for a gdb connection (for instance)
+     */
+
+    if ((signed)cr.uid <= debug_uid) {
+        wait_for_user_action(tid, &cr);
+    }
+
+    /* resume stopped process (so it can crash in peace) */
+    kill(cr.pid, SIGCONT);
+
+    if (need_cleanup) {
+        LOG("debuggerd committing suicide to free the zombie!\n");
+        kill(getpid(), SIGKILL);
+    }
+
+    if(fd != -1) close(fd);
+}
+
+int main()
+{
+    int s;
+    struct sigaction act;
+
+    logsocket = socket_local_client("logd",
+            ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_DGRAM);
+    if(logsocket < 0) {
+        logsocket = -1;
+    } else {
+        fcntl(logsocket, F_SETFD, FD_CLOEXEC);
+    }
+
+    act.sa_handler = SIG_DFL;
+    sigemptyset(&act.sa_mask);
+    sigaddset(&act.sa_mask,SIGCHLD);
+    act.sa_flags = SA_NOCLDWAIT;
+    sigaction(SIGCHLD, &act, 0);
+
+    s = socket_local_server("android:debuggerd",
+            ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM);
+    if(s < 0) return -1;
+    fcntl(s, F_SETFD, FD_CLOEXEC);
+
+    LOG("debuggerd: " __DATE__ " " __TIME__ "\n");
+
+    for(;;) {
+        struct sockaddr addr;
+        socklen_t alen;
+        int fd;
+
+        alen = sizeof(addr);
+        fd = accept(s, &addr, &alen);
+        if(fd < 0) continue;
+
+        fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+        handle_crashing_process(fd);
+    }
+    return 0;
+}
diff --git a/debuggerd/debuggerd.h b/debuggerd/debuggerd.h
new file mode 100644
index 0000000..e3cdc7c
--- /dev/null
+++ b/debuggerd/debuggerd.h
@@ -0,0 +1,39 @@
+/* system/debuggerd/debuggerd.h
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <cutils/logd.h>
+#include <sys/ptrace.h>
+#include <unwind.h>
+#include "utility.h"
+#include "symbol_table.h"
+
+
+/* Main entry point to get the backtrace from the crashing process */
+extern int unwind_backtrace_with_ptrace(int tfd, pid_t pid, mapinfo *map,
+                                        unsigned int sp_list[],
+                                        int *frame0_pc_sane,
+                                        bool at_fault);
+
+extern void dump_registers(int tfd, int pid, bool at_fault);
+
+extern int unwind_backtrace_with_ptrace_x86(int tfd, pid_t pid, mapinfo *map, bool at_fault);
+
+void dump_pc_and_lr(int tfd, int pid, mapinfo *map, int unwound_level, bool at_fault);
+
+void dump_stack_and_code(int tfd, int pid, mapinfo *map,
+                         int unwind_depth, unsigned int sp_list[],
+                         bool at_fault);
diff --git a/debuggerd/symbol_table.c b/debuggerd/symbol_table.c
new file mode 100644
index 0000000..e76df33
--- /dev/null
+++ b/debuggerd/symbol_table.c
@@ -0,0 +1,231 @@
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "symbol_table.h"
+#include "utility.h"
+
+#include <linux/elf.h>
+
+// Compare func for qsort
+static int qcompar(const void *a, const void *b)
+{
+    return ((struct symbol*)a)->addr - ((struct symbol*)b)->addr;
+}
+
+// Compare func for bsearch
+static int bcompar(const void *addr, const void *element)
+{
+    struct symbol *symbol = (struct symbol*)element;
+
+    if((unsigned int)addr < symbol->addr) {
+        return -1;
+    }
+
+    if((unsigned int)addr - symbol->addr >= symbol->size) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ *  Create a symbol table from a given file
+ *
+ *  Parameters:
+ *      filename - Filename to process
+ *
+ *  Returns:
+ *      A newly-allocated SymbolTable structure, or NULL if error.
+ *      Free symbol table with symbol_table_free()
+ */
+struct symbol_table *symbol_table_create(const char *filename)
+{
+    struct symbol_table *table = NULL;
+
+    // Open the file, and map it into memory
+    struct stat sb;
+    int length;
+    char *base;
+
+    XLOG("Creating symbol table for %s\n", filename);
+    int fd = open(filename, O_RDONLY);
+
+    if(fd < 0) {
+        goto out;
+    }
+
+    fstat(fd, &sb);
+    length = sb.st_size;
+
+    base = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0);
+
+    if(!base) {
+        goto out_close;
+    }
+
+    // Parse the file header
+    Elf32_Ehdr *hdr = (Elf32_Ehdr*)base;
+    Elf32_Shdr *shdr = (Elf32_Shdr*)(base + hdr->e_shoff);
+
+    // Search for the dynamic symbols section
+    int sym_idx = -1;
+    int dynsym_idx = -1;
+    int i;
+
+    for(i = 0; i < hdr->e_shnum; i++) {
+        if(shdr[i].sh_type == SHT_SYMTAB ) {
+            sym_idx = i;
+        }
+        if(shdr[i].sh_type == SHT_DYNSYM ) {
+            dynsym_idx = i;
+        }
+    }
+    if ((dynsym_idx == -1) && (sym_idx == -1)) {
+        goto out_unmap;
+    }
+
+    table = malloc(sizeof(struct symbol_table));
+    if(!table) {
+        goto out_unmap;
+    }
+    table->name = strdup(filename);
+    table->num_symbols = 0;
+
+    Elf32_Sym *dynsyms = (Elf32_Sym*)(base + shdr[dynsym_idx].sh_offset);
+    Elf32_Sym *syms = (Elf32_Sym*)(base + shdr[sym_idx].sh_offset);
+
+    int dynnumsyms = shdr[dynsym_idx].sh_size / shdr[dynsym_idx].sh_entsize;
+    int numsyms = shdr[sym_idx].sh_size / shdr[sym_idx].sh_entsize;
+
+    int dynstr_idx = shdr[dynsym_idx].sh_link;
+    int str_idx = shdr[sym_idx].sh_link;
+
+    char *dynstr = base + shdr[dynstr_idx].sh_offset;
+    char *str = base + shdr[str_idx].sh_offset;
+
+    int symbol_count = 0;
+    int dynsymbol_count = 0;
+
+    if (dynsym_idx != -1) {
+        // Iterate through the dynamic symbol table, and count how many symbols
+        // are actually defined
+        for(i = 0; i < dynnumsyms; i++) {
+            if(dynsyms[i].st_shndx != SHN_UNDEF) {
+                dynsymbol_count++;
+            }
+        }
+        XLOG("Dynamic Symbol count: %d\n", dynsymbol_count);
+    }
+
+    if (sym_idx != -1) {
+        // Iterate through the symbol table, and count how many symbols
+        // are actually defined
+        for(i = 0; i < numsyms; i++) {
+            if((syms[i].st_shndx != SHN_UNDEF) &&
+                (strlen(str+syms[i].st_name)) &&
+                (syms[i].st_value != 0) && (syms[i].st_size != 0)) {
+                symbol_count++;
+            }
+        }
+        XLOG("Symbol count: %d\n", symbol_count);
+    }
+
+    // Now, create an entry in our symbol table structure for each symbol...
+    table->num_symbols += symbol_count + dynsymbol_count;;
+    table->symbols = malloc(table->num_symbols * sizeof(struct symbol));
+    if(!table->symbols) {
+        free(table);
+        table = NULL;
+        goto out_unmap;
+    }
+
+
+    int j = 0;
+    if (dynsym_idx != -1) {
+        // ...and populate them
+        for(i = 0; i < dynnumsyms; i++) {
+            if(dynsyms[i].st_shndx != SHN_UNDEF) {
+                table->symbols[j].name = strdup(dynstr + dynsyms[i].st_name);
+                table->symbols[j].addr = dynsyms[i].st_value;
+                table->symbols[j].size = dynsyms[i].st_size;
+                XLOG("name: %s, addr: %x, size: %x\n",
+                    table->symbols[j].name, table->symbols[j].addr, table->symbols[j].size);
+                j++;
+            }
+        }
+    }
+
+    if (sym_idx != -1) {
+        // ...and populate them
+        for(i = 0; i < numsyms; i++) {
+            if((syms[i].st_shndx != SHN_UNDEF) &&
+                (strlen(str+syms[i].st_name)) &&
+                (syms[i].st_value != 0) && (syms[i].st_size != 0)) {
+                table->symbols[j].name = strdup(str + syms[i].st_name);
+                table->symbols[j].addr = syms[i].st_value;
+                table->symbols[j].size = syms[i].st_size;
+                XLOG("name: %s, addr: %x, size: %x\n",
+                    table->symbols[j].name, table->symbols[j].addr, table->symbols[j].size);
+                j++;
+            }
+        }
+    }
+
+    // Sort the symbol table entries, so they can be bsearched later
+    qsort(table->symbols, table->num_symbols, sizeof(struct symbol), qcompar);
+
+out_unmap:
+    munmap(base, length);
+
+out_close:
+    close(fd);
+
+out:
+    return table;
+}
+
+/*
+ * Free a symbol table
+ *
+ * Parameters:
+ *     table - Table to free
+ */
+void symbol_table_free(struct symbol_table *table)
+{
+    int i;
+
+    if(!table) {
+        return;
+    }
+
+    for(i=0; i<table->num_symbols; i++) {
+        free(table->symbols[i].name);
+    }
+
+    free(table->symbols);
+    free(table);
+}
+
+/*
+ * Search for an address in the symbol table
+ *
+ * Parameters:
+ *      table - Table to search in
+ *      addr - Address to search for.
+ *
+ * Returns:
+ *      A pointer to the Symbol structure corresponding to the
+ *      symbol which contains this address, or NULL if no symbol
+ *      contains it.
+ */
+const struct symbol *symbol_table_lookup(struct symbol_table *table, unsigned int addr)
+{
+    if(!table) {
+        return NULL;
+    }
+
+    return bsearch((void*)addr, table->symbols, table->num_symbols, sizeof(struct symbol), bcompar);
+}
diff --git a/debuggerd/symbol_table.h b/debuggerd/symbol_table.h
new file mode 100644
index 0000000..7f41f91
--- /dev/null
+++ b/debuggerd/symbol_table.h
@@ -0,0 +1,20 @@
+#ifndef SYMBOL_TABLE_H
+#define SYMBOL_TABLE_H
+
+struct symbol {
+    unsigned int addr;
+    unsigned int size;
+    char *name;
+};
+
+struct symbol_table {
+    struct symbol *symbols;
+    int num_symbols;
+    char *name;
+};
+
+struct symbol_table *symbol_table_create(const char *filename);
+void symbol_table_free(struct symbol_table *table);
+const struct symbol *symbol_table_lookup(struct symbol_table *table, unsigned int addr);
+
+#endif
diff --git a/debuggerd/utility.c b/debuggerd/utility.c
index 8f3931c..2afdb46 100644
--- a/debuggerd/utility.c
+++ b/debuggerd/utility.c
@@ -38,14 +38,14 @@
     unsigned int i;
 
     for (i = 0; i+4 <= size; i+=4) {
-        *(int *)(dst+i) = ptrace(PTRACE_PEEKTEXT, pid, src+i, NULL);
+        *(int *)((char *)dst+i) = ptrace(PTRACE_PEEKTEXT, pid, (char *)src+i, NULL);
     }
 
     if (i < size) {
         int val;
 
         assert((size - i) < 4);
-        val = ptrace(PTRACE_PEEKTEXT, pid, src+i, NULL);
+        val = ptrace(PTRACE_PEEKTEXT, pid, (char *)src+i, NULL);
         while (i < size) {
             ((unsigned char *)dst)[i] = val & 0xff;
             i++;
@@ -69,11 +69,12 @@
 /* Find the containing map info for the pc */
 const mapinfo *pc_to_mapinfo(mapinfo *mi, unsigned pc, unsigned *rel_pc)
 {
+    *rel_pc = pc;
     while(mi) {
         if((pc >= mi->start) && (pc < mi->end)){
             // Only calculate the relative offset for shared libraries
             if (strstr(mi->name, ".so")) {
-                *rel_pc = pc - mi->start;
+                *rel_pc -= mi->start;
             }
             return mi;
         }
diff --git a/debuggerd/utility.h b/debuggerd/utility.h
index 49f5951..0682b85 100644
--- a/debuggerd/utility.h
+++ b/debuggerd/utility.h
@@ -21,6 +21,8 @@
 #include <stddef.h>
 #include <stdbool.h>
 
+#include "symbol_table.h"
+
 #ifndef PT_ARM_EXIDX
 #define PT_ARM_EXIDX    0x70000001      /* .ARM.exidx segment */
 #endif
@@ -33,6 +35,7 @@
     unsigned end;
     unsigned exidx_start;
     unsigned exidx_end;
+    struct symbol_table *symbols;
     char name[];
 } mapinfo;
 
@@ -53,4 +56,11 @@
 /* Log information onto the tombstone */
 extern void _LOG(int tfd, bool in_tombstone_only, const char *fmt, ...);
 
+#define LOG(fmt...) _LOG(-1, 0, fmt)
+#if 0
+#define XLOG(fmt...) _LOG(-1, 0, fmt)
+#else
+#define XLOG(fmt...) do {} while(0)
+#endif
+
 #endif
diff --git a/debuggerd/x86/crashglue.S b/debuggerd/x86/crashglue.S
new file mode 100644
index 0000000..59df432
--- /dev/null
+++ b/debuggerd/x86/crashglue.S
@@ -0,0 +1,15 @@
+.globl crash1
+.globl crashnostack
+
+crash1:
+	movl $0xa5a50000, %eax
+	movl $0xa5a50001, %ebx
+	movl $0xa5a50002, %ecx
+
+	movl $0, %edx
+	jmp *%edx
+
+
+crashnostack:
+	movl $0, %ebp
+	jmp *%ebp
diff --git a/debuggerd/x86/machine.c b/debuggerd/x86/machine.c
new file mode 100644
index 0000000..9d418cf
--- /dev/null
+++ b/debuggerd/x86/machine.c
@@ -0,0 +1,61 @@
+/* system/debuggerd/debuggerd.c
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <stdio.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <sys/exec_elf.h>
+#include <sys/stat.h>
+
+#include <cutils/sockets.h>
+#include <cutils/properties.h>
+
+#include <linux/input.h>
+
+#include "../utility.h"
+#include "x86_utility.h"
+
+void dump_registers(int tfd, int pid, bool at_fault)
+{
+    struct pt_regs_x86 r;
+    bool only_in_tombstone = !at_fault;
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) {
+        _LOG(tfd, only_in_tombstone,
+             "cannot get registers: %s\n", strerror(errno));
+        return;
+    }
+//if there is no stack, no print just like arm
+    if(!r.ebp)
+        return;
+    _LOG(tfd, only_in_tombstone, " eax %08x  ebx %08x  ecx %08x  edx %08x\n",
+         r.eax, r.ebx, r.ecx, r.edx);
+    _LOG(tfd, only_in_tombstone, " esi %08x  edi %08x\n",
+         r.esi, r.edi);
+    _LOG(tfd, only_in_tombstone, " xcs %08x  xds %08x  xes %08x  xfs %08x xss %08x\n",
+         r.xcs, r.xds, r.xes, r.xfs, r.xss);
+    _LOG(tfd, only_in_tombstone,
+         " eip %08x  ebp %08x  esp %08x  flags %08x\n",
+         r.eip, r.ebp, r.esp, r.eflags);
+}
diff --git a/debuggerd/x86/unwind.c b/debuggerd/x86/unwind.c
new file mode 100644
index 0000000..8f84e01
--- /dev/null
+++ b/debuggerd/x86/unwind.c
@@ -0,0 +1,85 @@
+#include <cutils/logd.h>
+#include <sys/ptrace.h>
+#include "../utility.h"
+#include "x86_utility.h"
+
+
+int unwind_backtrace_with_ptrace_x86(int tfd, pid_t pid, mapinfo *map,
+                                 bool at_fault)
+{
+    struct pt_regs_x86 r;
+    unsigned int stack_level = 0;
+    unsigned int stack_depth = 0;
+    unsigned int rel_pc;
+    unsigned int stack_ptr;
+    unsigned int stack_content;
+
+    if(ptrace(PTRACE_GETREGS, pid, 0, &r)) return 0;
+    unsigned int eip = (unsigned int)r.eip;
+    unsigned int ebp = (unsigned int)r.ebp;
+    unsigned int cur_sp = (unsigned int)r.esp;
+    const mapinfo *mi;
+    const struct symbol* sym = 0;
+
+
+//ebp==0, it indicates that the stack is poped to the bottom or there is no stack at all.
+    while (ebp) {
+        _LOG(tfd, !at_fault, "#0%d ",stack_level);
+        mi = pc_to_mapinfo(map, eip, &rel_pc);
+
+        /* See if we can determine what symbol this stack frame resides in */
+        if (mi != 0 && mi->symbols != 0) {
+            sym = symbol_table_lookup(mi->symbols, rel_pc);
+        }
+        if (sym) {
+            _LOG(tfd, !at_fault, "    eip: %08x  %s (%s)\n", eip, mi ? mi->name : "", sym->name);
+        } else {
+            _LOG(tfd, !at_fault, "    eip: %08x  %s\n", eip, mi ? mi->name : "");
+        }
+
+        stack_level++;
+        if (stack_level >= STACK_DEPTH || eip == 0)
+            break;
+        eip = ptrace(PTRACE_PEEKTEXT, pid, (void*)(ebp + 4), NULL);
+        ebp = ptrace(PTRACE_PEEKTEXT, pid, (void*)ebp, NULL);
+    }
+    ebp = (unsigned int)r.ebp;
+    stack_depth = stack_level;
+    stack_level = 0;
+    if (ebp)
+        _LOG(tfd, !at_fault, "stack: \n");
+    while (ebp) {
+        _LOG(tfd, !at_fault, "#0%d \n",stack_level);
+        stack_ptr = cur_sp;
+        while((int)(ebp - stack_ptr) >= 0) {
+            stack_content = ptrace(PTRACE_PEEKTEXT, pid, (void*)stack_ptr, NULL);
+            mi = pc_to_mapinfo(map, stack_content, &rel_pc);
+
+            /* See if we can determine what symbol this stack frame resides in */
+            if (mi != 0 && mi->symbols != 0) {
+                sym = symbol_table_lookup(mi->symbols, rel_pc);
+            }
+            if (sym) {
+                _LOG(tfd, !at_fault, "    %08x  %08x  %s (%s)\n",
+                    stack_ptr, stack_content, mi ? mi->name : "", sym->name);
+            } else {
+                _LOG(tfd, !at_fault, "    %08x  %08x  %s\n", stack_ptr, stack_content, mi ? mi->name : "");
+            }
+
+            stack_ptr = stack_ptr + 4;
+            //the stack frame may be very deep.
+            if((int)(stack_ptr - cur_sp) >= STACK_FRAME_DEPTH) {
+                _LOG(tfd, !at_fault, "    ......  ......  \n");
+                break;
+            }
+        }
+        cur_sp = ebp + 4;
+        stack_level++;
+        if (stack_level >= STACK_DEPTH || stack_level >= stack_depth)
+            break;
+        ebp = ptrace(PTRACE_PEEKTEXT, pid, (void*)ebp, NULL);
+    }
+
+    return stack_depth;
+}
+
diff --git a/debuggerd/x86/x86_utility.h b/debuggerd/x86/x86_utility.h
new file mode 100644
index 0000000..ac6a885
--- /dev/null
+++ b/debuggerd/x86/x86_utility.h
@@ -0,0 +1,40 @@
+/*
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#define STACK_DEPTH 8
+#define STACK_FRAME_DEPTH 64
+
+typedef struct pt_regs_x86 {
+    long ebx;
+    long ecx;
+    long edx;
+    long esi;
+    long edi;
+    long ebp;
+    long eax;
+    int  xds;
+    int  xes;
+    int  xfs;
+    int  xgs;
+    long orig_eax;
+    long eip;
+    int  xcs;
+    long eflags;
+    long esp;
+    int  xss;
+}pt_regs_x86;
+
diff --git a/include/arch/target_linux-x86/AndroidConfig.h b/include/arch/target_linux-x86/AndroidConfig.h
index 617e7fa..d6ce3f2 100644
--- a/include/arch/target_linux-x86/AndroidConfig.h
+++ b/include/arch/target_linux-x86/AndroidConfig.h
@@ -227,7 +227,7 @@
 /*
  * Define if we have Linux's dbus 
  */
-#define HAVE_DBUS 1
+/* #define HAVE_DBUS 1 */
 
 /*
  * Define if tm struct has tm_gmtoff field
diff --git a/include/private/android_filesystem_config.h b/include/private/android_filesystem_config.h
index 848b100..844ad02 100644
--- a/include/private/android_filesystem_config.h
+++ b/include/private/android_filesystem_config.h
@@ -173,6 +173,7 @@
     { 00640, AID_SYSTEM,    AID_SYSTEM,    "system/etc/bluetooth/auto_pairing.conf" },
     { 00444, AID_RADIO,     AID_AUDIO,     "system/etc/AudioPara4.csv" },
     { 00555, AID_ROOT,      AID_ROOT,      "system/etc/ppp/*" },
+    { 00555, AID_ROOT,      AID_ROOT,      "system/etc/rc.*" },
     { 00644, AID_SYSTEM,    AID_SYSTEM,    "data/app/*" },
     { 00644, AID_SYSTEM,    AID_SYSTEM,    "data/app-private/*" },
     { 00644, AID_APP,       AID_APP,       "data/data/*" },
diff --git a/libcutils/Android.mk b/libcutils/Android.mk
index e8c7775..3dc3d69 100644
--- a/libcutils/Android.mk
+++ b/libcutils/Android.mk
@@ -112,12 +112,17 @@
 LOCAL_SRC_FILES := $(commonSources) ashmem-dev.c mq.c
 
 ifeq ($(TARGET_ARCH),arm)
-LOCAL_SRC_FILES += memset32.S
+LOCAL_SRC_FILES += arch-arm/memset32.S
 else  # !arm
 ifeq ($(TARGET_ARCH),sh)
 LOCAL_SRC_FILES += memory.c atomic-android-sh.c
 else  # !sh
+ifeq ($(TARGET_ARCH_VARIANT),x86-atom)
+LOCAL_CFLAGS += -DHAVE_MEMSET16 -DHAVE_MEMSET32
+LOCAL_SRC_FILES += arch-x86/android_memset16.S arch-x86/android_memset32.S memory.c
+else # !x86-atom
 LOCAL_SRC_FILES += memory.c
+endif # !x86-atom
 endif # !sh
 endif # !arm
 
diff --git a/libcutils/memset32.S b/libcutils/arch-arm/memset32.S
similarity index 100%
rename from libcutils/memset32.S
rename to libcutils/arch-arm/memset32.S
diff --git a/libcutils/arch-x86/android_memset16.S b/libcutils/arch-x86/android_memset16.S
new file mode 100644
index 0000000..b1f09cb
--- /dev/null
+++ b/libcutils/arch-x86/android_memset16.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Contributed by: Intel Corporation
+ */
+
+#if defined(USE_SSE2)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define USE_AS_ANDROID
+# define sse2_memset16_atom android_memset16
+# include "sse2-memset16-atom.S"
+
+#else
+
+# include "memset16.S"
+
+#endif
diff --git a/libcutils/arch-x86/android_memset32.S b/libcutils/arch-x86/android_memset32.S
new file mode 100644
index 0000000..1fb2ffe
--- /dev/null
+++ b/libcutils/arch-x86/android_memset32.S
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Contributed by: Intel Corporation
+ */
+
+#if defined(USE_SSE2)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define USE_AS_ANDROID
+# define sse2_memset32_atom android_memset32
+# include "sse2-memset32-atom.S"
+
+#else
+
+# include "memset32.S"
+
+#endif
+
diff --git a/libcutils/arch-x86/cache_wrapper.S b/libcutils/arch-x86/cache_wrapper.S
new file mode 100644
index 0000000..508fdd3
--- /dev/null
+++ b/libcutils/arch-x86/cache_wrapper.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Contributed by: Intel Corporation
+ */
+
+/* Values are optimized for Atom */
+#define SHARED_CACHE_SIZE       (512*1024)            /* Atom L2 Cache */
+#define DATA_CACHE_SIZE         (24*1024)             /* Atom L1 Data Cache */
+#define SHARED_CACHE_SIZE_HALF  (SHARED_CACHE_SIZE / 2)
+#define DATA_CACHE_SIZE_HALF    (DATA_CACHE_SIZE / 2)
diff --git a/libcutils/arch-x86/sse2-memset16-atom.S b/libcutils/arch-x86/sse2-memset16-atom.S
new file mode 100644
index 0000000..cafec82
--- /dev/null
+++ b/libcutils/arch-x86/sse2-memset16-atom.S
@@ -0,0 +1,722 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Contributed by: Intel Corporation
+ */
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n)	.p2align n
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc			.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc			.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)		.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)			\
+	.type name,  @function; 	\
+	.globl name;			\
+	.p2align 4;			\
+name:					\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)			\
+	cfi_endproc;			\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_BZERO16
+# define DEST		PARMS
+# define LEN		DEST+4
+#else
+# define DEST		PARMS
+# define CHR		DEST+4
+# define LEN		CHR+4
+#endif
+
+#if 1
+# define SETRTNVAL
+#else
+# define SETRTNVAL	movl DEST(%esp), %eax
+#endif
+
+#ifdef SHARED
+# define ENTRANCE	PUSH (%ebx);
+# define RETURN_END	POP (%ebx); ret
+# define RETURN		RETURN_END; CFI_PUSH (%ebx)
+# define PARMS		8		/* Preserve EBX.  */
+# define JMPTBL(I, B)	I - B
+
+/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
+   jump table with relative offsets.   */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    /* We first load PC into EBX.  */				\
+    call	__i686.get_pc_thunk.bx;				\
+    /* Get the address of the jump table.  */			\
+    add		$(TABLE - .), %ebx;				\
+    /* Get the entry and convert the relative offset to the	\
+       absolute address.  */					\
+    add		(%ebx,%ecx,4), %ebx;				\
+    /* We loaded the jump table and adjuested EDX. Go.  */	\
+    jmp		*%ebx
+
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	ALIGN (4)
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+#else
+# define ENTRANCE
+# define RETURN_END	ret
+# define RETURN		RETURN_END
+# define PARMS		4
+# define JMPTBL(I, B)	I
+
+/* Branch to an entry in a jump table.  TABLE is a jump table with
+   absolute offsets.  */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    jmp		*TABLE(,%ecx,4)
+#endif
+
+	.section .text.sse2,"ax",@progbits
+	ALIGN (4)
+ENTRY (sse2_memset16_atom)
+	ENTRANCE
+
+	movl	LEN(%esp), %ecx
+#ifdef USE_AS_ANDROID
+	shr	$1, %ecx
+#endif
+#ifdef USE_AS_BZERO16
+	xor	%eax, %eax
+#else
+	movzwl	CHR(%esp), %eax
+	mov	%eax, %edx
+	shl	$16, %eax
+	or	%edx, %eax
+#endif
+	movl	DEST(%esp), %edx
+	cmp	$32, %ecx
+	jae	L(32wordsormore)
+
+L(write_less32words):
+	lea	(%edx, %ecx, 2), %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_less32words))
+
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_less32words):
+	.int	JMPTBL (L(write_0words), L(table_less32words))
+	.int	JMPTBL (L(write_1words), L(table_less32words))
+	.int	JMPTBL (L(write_2words), L(table_less32words))
+	.int	JMPTBL (L(write_3words), L(table_less32words))
+	.int	JMPTBL (L(write_4words), L(table_less32words))
+	.int	JMPTBL (L(write_5words), L(table_less32words))
+	.int	JMPTBL (L(write_6words), L(table_less32words))
+	.int	JMPTBL (L(write_7words), L(table_less32words))
+	.int	JMPTBL (L(write_8words), L(table_less32words))
+	.int	JMPTBL (L(write_9words), L(table_less32words))
+	.int	JMPTBL (L(write_10words), L(table_less32words))
+	.int	JMPTBL (L(write_11words), L(table_less32words))
+	.int	JMPTBL (L(write_12words), L(table_less32words))
+	.int	JMPTBL (L(write_13words), L(table_less32words))
+	.int	JMPTBL (L(write_14words), L(table_less32words))
+	.int	JMPTBL (L(write_15words), L(table_less32words))
+	.int	JMPTBL (L(write_16words), L(table_less32words))
+	.int	JMPTBL (L(write_17words), L(table_less32words))
+	.int	JMPTBL (L(write_18words), L(table_less32words))
+	.int	JMPTBL (L(write_19words), L(table_less32words))
+	.int	JMPTBL (L(write_20words), L(table_less32words))
+	.int	JMPTBL (L(write_21words), L(table_less32words))
+	.int	JMPTBL (L(write_22words), L(table_less32words))
+	.int	JMPTBL (L(write_23words), L(table_less32words))
+	.int	JMPTBL (L(write_24words), L(table_less32words))
+	.int	JMPTBL (L(write_25words), L(table_less32words))
+	.int	JMPTBL (L(write_26words), L(table_less32words))
+	.int	JMPTBL (L(write_27words), L(table_less32words))
+	.int	JMPTBL (L(write_28words), L(table_less32words))
+	.int	JMPTBL (L(write_29words), L(table_less32words))
+	.int	JMPTBL (L(write_30words), L(table_less32words))
+	.int	JMPTBL (L(write_31words), L(table_less32words))
+	.popsection
+
+	ALIGN (4)
+L(write_28words):
+	movl	%eax, -56(%edx)
+	movl	%eax, -52(%edx)
+L(write_24words):
+	movl	%eax, -48(%edx)
+	movl	%eax, -44(%edx)
+L(write_20words):
+	movl	%eax, -40(%edx)
+	movl	%eax, -36(%edx)
+L(write_16words):
+	movl	%eax, -32(%edx)
+	movl	%eax, -28(%edx)
+L(write_12words):
+	movl	%eax, -24(%edx)
+	movl	%eax, -20(%edx)
+L(write_8words):
+	movl	%eax, -16(%edx)
+	movl	%eax, -12(%edx)
+L(write_4words):
+	movl	%eax, -8(%edx)
+	movl	%eax, -4(%edx)
+L(write_0words):
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(write_29words):
+	movl	%eax, -58(%edx)
+	movl	%eax, -54(%edx)
+L(write_25words):
+	movl	%eax, -50(%edx)
+	movl	%eax, -46(%edx)
+L(write_21words):
+	movl	%eax, -42(%edx)
+	movl	%eax, -38(%edx)
+L(write_17words):
+	movl	%eax, -34(%edx)
+	movl	%eax, -30(%edx)
+L(write_13words):
+	movl	%eax, -26(%edx)
+	movl	%eax, -22(%edx)
+L(write_9words):
+	movl	%eax, -18(%edx)
+	movl	%eax, -14(%edx)
+L(write_5words):
+	movl	%eax, -10(%edx)
+	movl	%eax, -6(%edx)
+L(write_1words):
+	mov	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(write_30words):
+	movl	%eax, -60(%edx)
+	movl	%eax, -56(%edx)
+L(write_26words):
+	movl	%eax, -52(%edx)
+	movl	%eax, -48(%edx)
+L(write_22words):
+	movl	%eax, -44(%edx)
+	movl	%eax, -40(%edx)
+L(write_18words):
+	movl	%eax, -36(%edx)
+	movl	%eax, -32(%edx)
+L(write_14words):
+	movl	%eax, -28(%edx)
+	movl	%eax, -24(%edx)
+L(write_10words):
+	movl	%eax, -20(%edx)
+	movl	%eax, -16(%edx)
+L(write_6words):
+	movl	%eax, -12(%edx)
+	movl	%eax, -8(%edx)
+L(write_2words):
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(write_31words):
+	movl	%eax, -62(%edx)
+	movl	%eax, -58(%edx)
+L(write_27words):
+	movl	%eax, -54(%edx)
+	movl	%eax, -50(%edx)
+L(write_23words):
+	movl	%eax, -46(%edx)
+	movl	%eax, -42(%edx)
+L(write_19words):
+	movl	%eax, -38(%edx)
+	movl	%eax, -34(%edx)
+L(write_15words):
+	movl	%eax, -30(%edx)
+	movl	%eax, -26(%edx)
+L(write_11words):
+	movl	%eax, -22(%edx)
+	movl	%eax, -18(%edx)
+L(write_7words):
+	movl	%eax, -14(%edx)
+	movl	%eax, -10(%edx)
+L(write_3words):
+	movl	%eax, -6(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+
+L(32wordsormore):
+	shl	$1, %ecx
+	test	$0x01, %edx
+	jz	L(aligned2bytes)
+	mov	%eax, (%edx)
+	mov	%eax, -4(%edx, %ecx)
+	sub	$2, %ecx
+	add	$1, %edx
+	rol	$8, %eax
+L(aligned2bytes):
+#ifdef USE_AS_BZERO16
+	pxor	%xmm0, %xmm0
+#else
+	movd	%eax, %xmm0
+	pshufd	$0, %xmm0, %xmm0
+#endif
+	testl	$0xf, %edx
+	jz	L(aligned_16)
+/* ECX > 32 and EDX is not 16 byte aligned.  */
+L(not_aligned_16):
+	movdqu	%xmm0, (%edx)
+	movl	%edx, %eax
+	and	$-16, %edx
+	add	$16, %edx
+	sub	%edx, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+
+	ALIGN (4)
+L(aligned_16):
+	cmp	$128, %ecx
+	jae	L(128bytesormore)
+
+L(aligned_16_less128bytes):
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytesormore):
+#ifdef SHARED_CACHE_SIZE
+	PUSH (%ebx)
+	mov	$SHARED_CACHE_SIZE, %ebx
+#else
+# ifdef SHARED
+	call	__i686.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
+# else
+	PUSH (%ebx)
+	mov	__x86_shared_cache_size, %ebx
+# endif
+#endif
+	cmp	%ebx, %ecx
+	jae	L(128bytesormore_nt_start)
+
+	
+#ifdef DATA_CACHE_SIZE
+	POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	$DATA_CACHE_SIZE, %ecx
+#else
+# ifdef SHARED
+#  define RESTORE_EBX_STATE
+	call	__i686.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
+# else
+	POP (%ebx)
+#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	__x86_data_cache_size, %ecx
+# endif
+#endif
+
+	jae	L(128bytes_L2_normal)
+	subl	$128, %ecx
+L(128bytesormore_normal):
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jb	L(128bytesless_normal)
+
+
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jae	L(128bytesormore_normal)
+
+L(128bytesless_normal):
+	lea	128(%ecx), %ecx
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytes_L2_normal):
+	prefetcht0	0x380(%edx)
+	prefetcht0	0x3c0(%edx)
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movaps	%xmm0, 0x10(%edx)
+	movaps	%xmm0, 0x20(%edx)
+	movaps	%xmm0, 0x30(%edx)
+	movaps	%xmm0, 0x40(%edx)
+	movaps	%xmm0, 0x50(%edx)
+	movaps	%xmm0, 0x60(%edx)
+	movaps	%xmm0, 0x70(%edx)
+	add	$128, %edx
+	cmp	$128, %ecx 	
+	jae	L(128bytes_L2_normal)
+
+L(128bytesless_L2_normal):
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	RESTORE_EBX_STATE
+L(128bytesormore_nt_start):
+	sub	%ebx, %ecx
+	mov	%ebx, %eax
+	and	$0x7f, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+	ALIGN (4)
+L(128bytesormore_shared_cache_loop):
+	prefetcht0	0x3c0(%edx)
+	prefetcht0	0x380(%edx)
+	sub	$0x80, %ebx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ebx
+	jae	L(128bytesormore_shared_cache_loop)
+	cmp	$0x80, %ecx
+	jb	L(shared_cache_loop_end)
+	ALIGN (4)
+L(128bytesormore_nt):
+	sub	$0x80, %ecx
+	movntdq	%xmm0, (%edx)
+	movntdq	%xmm0, 0x10(%edx)
+	movntdq	%xmm0, 0x20(%edx)
+	movntdq	%xmm0, 0x30(%edx)
+	movntdq	%xmm0, 0x40(%edx)
+	movntdq	%xmm0, 0x50(%edx)
+	movntdq	%xmm0, 0x60(%edx)
+	movntdq	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ecx
+	jae	L(128bytesormore_nt)
+	sfence
+L(shared_cache_loop_end):
+#if defined DATA_CACHE_SIZE || !defined SHARED
+	POP (%ebx)
+#endif
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_16_128bytes):
+	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
+	.popsection
+
+
+	ALIGN (4)
+L(aligned_16_112bytes):
+	movdqa	%xmm0, -112(%edx)
+L(aligned_16_96bytes):
+	movdqa	%xmm0, -96(%edx)
+L(aligned_16_80bytes):
+	movdqa	%xmm0, -80(%edx)
+L(aligned_16_64bytes):
+	movdqa	%xmm0, -64(%edx)
+L(aligned_16_48bytes):
+	movdqa	%xmm0, -48(%edx)
+L(aligned_16_32bytes):
+	movdqa	%xmm0, -32(%edx)
+L(aligned_16_16bytes):
+	movdqa	%xmm0, -16(%edx)
+L(aligned_16_0bytes):
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_114bytes):
+	movdqa	%xmm0, -114(%edx)
+L(aligned_16_98bytes):
+	movdqa	%xmm0, -98(%edx)
+L(aligned_16_82bytes):
+	movdqa	%xmm0, -82(%edx)
+L(aligned_16_66bytes):
+	movdqa	%xmm0, -66(%edx)
+L(aligned_16_50bytes):
+	movdqa	%xmm0, -50(%edx)
+L(aligned_16_34bytes):
+	movdqa	%xmm0, -34(%edx)
+L(aligned_16_18bytes):
+	movdqa	%xmm0, -18(%edx)
+L(aligned_16_2bytes):
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_116bytes):
+	movdqa	%xmm0, -116(%edx)
+L(aligned_16_100bytes):
+	movdqa	%xmm0, -100(%edx)
+L(aligned_16_84bytes):
+	movdqa	%xmm0, -84(%edx)
+L(aligned_16_68bytes):
+	movdqa	%xmm0, -68(%edx)
+L(aligned_16_52bytes):
+	movdqa	%xmm0, -52(%edx)
+L(aligned_16_36bytes):
+	movdqa	%xmm0, -36(%edx)
+L(aligned_16_20bytes):
+	movdqa	%xmm0, -20(%edx)
+L(aligned_16_4bytes):
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_118bytes):
+	movdqa	%xmm0, -118(%edx)
+L(aligned_16_102bytes):
+	movdqa	%xmm0, -102(%edx)
+L(aligned_16_86bytes):
+	movdqa	%xmm0, -86(%edx)
+L(aligned_16_70bytes):
+	movdqa	%xmm0, -70(%edx)
+L(aligned_16_54bytes):
+	movdqa	%xmm0, -54(%edx)
+L(aligned_16_38bytes):
+	movdqa	%xmm0, -38(%edx)
+L(aligned_16_22bytes):
+	movdqa	%xmm0, -22(%edx)
+L(aligned_16_6bytes):
+	movl	%eax, -6(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_120bytes):
+	movdqa	%xmm0, -120(%edx)
+L(aligned_16_104bytes):
+	movdqa	%xmm0, -104(%edx)
+L(aligned_16_88bytes):
+	movdqa	%xmm0, -88(%edx)
+L(aligned_16_72bytes):
+	movdqa	%xmm0, -72(%edx)
+L(aligned_16_56bytes):
+	movdqa	%xmm0, -56(%edx)
+L(aligned_16_40bytes):
+	movdqa	%xmm0, -40(%edx)
+L(aligned_16_24bytes):
+	movdqa	%xmm0, -24(%edx)
+L(aligned_16_8bytes):
+	movq	%xmm0, -8(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_122bytes):
+	movdqa	%xmm0, -122(%edx)
+L(aligned_16_106bytes):
+	movdqa	%xmm0, -106(%edx)
+L(aligned_16_90bytes):
+	movdqa	%xmm0, -90(%edx)
+L(aligned_16_74bytes):
+	movdqa	%xmm0, -74(%edx)
+L(aligned_16_58bytes):
+	movdqa	%xmm0, -58(%edx)
+L(aligned_16_42bytes):
+	movdqa	%xmm0, -42(%edx)
+L(aligned_16_26bytes):
+	movdqa	%xmm0, -26(%edx)
+L(aligned_16_10bytes):
+	movq	%xmm0, -10(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_124bytes):
+	movdqa	%xmm0, -124(%edx)
+L(aligned_16_108bytes):
+	movdqa	%xmm0, -108(%edx)
+L(aligned_16_92bytes):
+	movdqa	%xmm0, -92(%edx)
+L(aligned_16_76bytes):
+	movdqa	%xmm0, -76(%edx)
+L(aligned_16_60bytes):
+	movdqa	%xmm0, -60(%edx)
+L(aligned_16_44bytes):
+	movdqa	%xmm0, -44(%edx)
+L(aligned_16_28bytes):
+	movdqa	%xmm0, -28(%edx)
+L(aligned_16_12bytes):
+	movq	%xmm0, -12(%edx)
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_126bytes):
+	movdqa	%xmm0, -126(%edx)
+L(aligned_16_110bytes):
+	movdqa	%xmm0, -110(%edx)
+L(aligned_16_94bytes):
+	movdqa	%xmm0, -94(%edx)
+L(aligned_16_78bytes):
+	movdqa	%xmm0, -78(%edx)
+L(aligned_16_62bytes):
+	movdqa	%xmm0, -62(%edx)
+L(aligned_16_46bytes):
+	movdqa	%xmm0, -46(%edx)
+L(aligned_16_30bytes):
+	movdqa	%xmm0, -30(%edx)
+L(aligned_16_14bytes):
+	movq	%xmm0, -14(%edx)
+	movl	%eax, -6(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+END (sse2_memset16_atom)
diff --git a/libcutils/arch-x86/sse2-memset32-atom.S b/libcutils/arch-x86/sse2-memset32-atom.S
new file mode 100644
index 0000000..4a52484
--- /dev/null
+++ b/libcutils/arch-x86/sse2-memset32-atom.S
@@ -0,0 +1,513 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Contributed by: Intel Corporation
+ */
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n)	.p2align n
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc			.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc			.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)		.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)			\
+	.type name,  @function; 	\
+	.globl name;			\
+	.p2align 4;			\
+name:					\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)			\
+	cfi_endproc;			\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_BZERO32
+# define DEST		PARMS
+# define LEN		DEST+4
+#else
+# define DEST		PARMS
+# define DWDS		DEST+4
+# define LEN		DWDS+4
+#endif
+
+#ifdef USE_AS_WMEMSET32
+# define SETRTNVAL	movl DEST(%esp), %eax
+#else
+# define SETRTNVAL
+#endif
+
+#ifdef SHARED
+# define ENTRANCE	PUSH (%ebx);
+# define RETURN_END	POP (%ebx); ret
+# define RETURN		RETURN_END; CFI_PUSH (%ebx)
+# define PARMS		8		/* Preserve EBX.  */
+# define JMPTBL(I, B)	I - B
+
+/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
+   jump table with relative offsets.   */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    /* We first load PC into EBX.  */				\
+    call	__i686.get_pc_thunk.bx;				\
+    /* Get the address of the jump table.  */			\
+    add		$(TABLE - .), %ebx;				\
+    /* Get the entry and convert the relative offset to the	\
+       absolute address.  */					\
+    add		(%ebx,%ecx,4), %ebx;				\
+    /* We loaded the jump table and adjuested EDX. Go.  */	\
+    jmp		*%ebx
+
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	ALIGN (4)
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+#else
+# define ENTRANCE
+# define RETURN_END	ret
+# define RETURN		RETURN_END
+# define PARMS		4
+# define JMPTBL(I, B)	I
+
+/* Branch to an entry in a jump table.  TABLE is a jump table with
+   absolute offsets.  */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    jmp		*TABLE(,%ecx,4)
+#endif
+
+	.section .text.sse2,"ax",@progbits
+	ALIGN (4)
+ENTRY (sse2_memset32_atom)
+	ENTRANCE
+
+	movl	LEN(%esp), %ecx
+#ifdef USE_AS_ANDROID
+	shr     $2, %ecx
+#endif
+#ifdef USE_AS_BZERO32
+	xor	%eax, %eax
+#else
+	mov	DWDS(%esp), %eax
+	mov	%eax, %edx
+#endif
+	movl	DEST(%esp), %edx
+	cmp	$16, %ecx
+	jae	L(16dbwordsormore)
+
+L(write_less16dbwords):
+	lea	(%edx, %ecx, 4), %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_less16dbwords))
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_less16dbwords):
+	.int	JMPTBL (L(write_0dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_1dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_2dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_3dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_4dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_5dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_6dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_7dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_8dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_9dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_10dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_11dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_12dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_13dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_14dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_15dbwords), L(table_less16dbwords))
+	.popsection
+
+	ALIGN (4)
+L(write_15dbwords):
+	movl	%eax, -60(%edx)
+L(write_14dbwords):
+	movl	%eax, -56(%edx)
+L(write_13dbwords):
+	movl	%eax, -52(%edx)
+L(write_12dbwords):
+	movl	%eax, -48(%edx)
+L(write_11dbwords):
+	movl	%eax, -44(%edx)
+L(write_10dbwords):
+	movl	%eax, -40(%edx)
+L(write_9dbwords):
+	movl	%eax, -36(%edx)
+L(write_8dbwords):
+	movl	%eax, -32(%edx)
+L(write_7dbwords):
+	movl	%eax, -28(%edx)
+L(write_6dbwords):
+	movl	%eax, -24(%edx)
+L(write_5dbwords):
+	movl	%eax, -20(%edx)
+L(write_4dbwords):
+	movl	%eax, -16(%edx)
+L(write_3dbwords):
+	movl	%eax, -12(%edx)
+L(write_2dbwords):
+	movl	%eax, -8(%edx)
+L(write_1dbwords):
+	movl	%eax, -4(%edx)
+L(write_0dbwords):
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(16dbwordsormore):
+	test	$3, %edx
+	jz	L(aligned4bytes)
+	mov	%eax, (%edx)
+	mov	%eax, -4(%edx, %ecx, 4)
+	sub	$1, %ecx
+	rol	$24, %eax
+	add	$1, %edx
+	test	$3, %edx
+	jz	L(aligned4bytes)
+	ror	$8, %eax
+	add	$1, %edx
+	test	$3, %edx
+	jz	L(aligned4bytes)
+	ror	$8, %eax
+	add	$1, %edx
+L(aligned4bytes):
+	shl	$2, %ecx
+
+#ifdef USE_AS_BZERO32
+	pxor	%xmm0, %xmm0
+#else
+	movd	%eax, %xmm0
+	pshufd	$0, %xmm0, %xmm0
+#endif
+	testl	$0xf, %edx
+	jz	L(aligned_16)
+/* ECX > 32 and EDX is not 16 byte aligned.  */
+L(not_aligned_16):
+	movdqu	%xmm0, (%edx)
+	movl	%edx, %eax
+	and	$-16, %edx
+	add	$16, %edx
+	sub	%edx, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+	ALIGN (4)
+L(aligned_16):
+	cmp	$128, %ecx
+	jae	L(128bytesormore)
+
+L(aligned_16_less128bytes):
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytesormore):
+#ifdef SHARED_CACHE_SIZE
+	PUSH (%ebx)
+	mov	$SHARED_CACHE_SIZE, %ebx
+#else
+# ifdef SHARED
+	call	__i686.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
+# else
+	PUSH (%ebx)
+	mov	__x86_shared_cache_size, %ebx
+# endif
+#endif
+	cmp	%ebx, %ecx
+	jae	L(128bytesormore_nt_start)
+	
+#ifdef DATA_CACHE_SIZE
+	POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	$DATA_CACHE_SIZE, %ecx
+#else
+# ifdef SHARED
+#  define RESTORE_EBX_STATE
+	call	__i686.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
+# else
+	POP (%ebx)
+#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	__x86_data_cache_size, %ecx
+# endif
+#endif
+
+	jae	L(128bytes_L2_normal)
+	subl	$128, %ecx
+L(128bytesormore_normal):
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jb	L(128bytesless_normal)
+
+
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jae	L(128bytesormore_normal)
+
+L(128bytesless_normal):
+	lea	128(%ecx), %ecx
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytes_L2_normal):
+	prefetcht0	0x380(%edx)
+	prefetcht0	0x3c0(%edx)
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movaps	%xmm0, 0x10(%edx)
+	movaps	%xmm0, 0x20(%edx)
+	movaps	%xmm0, 0x30(%edx)
+	movaps	%xmm0, 0x40(%edx)
+	movaps	%xmm0, 0x50(%edx)
+	movaps	%xmm0, 0x60(%edx)
+	movaps	%xmm0, 0x70(%edx)
+	add	$128, %edx
+	cmp	$128, %ecx 	
+	jae	L(128bytes_L2_normal)
+
+L(128bytesless_L2_normal):
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	RESTORE_EBX_STATE
+L(128bytesormore_nt_start):
+	sub	%ebx, %ecx
+	mov	%ebx, %eax
+	and	$0x7f, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+	ALIGN (4)
+L(128bytesormore_shared_cache_loop):
+	prefetcht0	0x3c0(%edx)
+	prefetcht0	0x380(%edx)
+	sub	$0x80, %ebx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ebx
+	jae	L(128bytesormore_shared_cache_loop)
+	cmp	$0x80, %ecx
+	jb	L(shared_cache_loop_end)
+
+	ALIGN (4)
+L(128bytesormore_nt):
+	sub	$0x80, %ecx
+	movntdq	%xmm0, (%edx)
+	movntdq	%xmm0, 0x10(%edx)
+	movntdq	%xmm0, 0x20(%edx)
+	movntdq	%xmm0, 0x30(%edx)
+	movntdq	%xmm0, 0x40(%edx)
+	movntdq	%xmm0, 0x50(%edx)
+	movntdq	%xmm0, 0x60(%edx)
+	movntdq	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ecx
+	jae	L(128bytesormore_nt)
+	sfence
+L(shared_cache_loop_end):
+#if defined DATA_CACHE_SIZE || !defined SHARED
+	POP (%ebx)
+#endif
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_16_128bytes):
+	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
+	.popsection
+
+	ALIGN (4)
+L(aligned_16_112bytes):
+	movdqa	%xmm0, -112(%edx)
+L(aligned_16_96bytes):
+	movdqa	%xmm0, -96(%edx)
+L(aligned_16_80bytes):
+	movdqa	%xmm0, -80(%edx)
+L(aligned_16_64bytes):
+	movdqa	%xmm0, -64(%edx)
+L(aligned_16_48bytes):
+	movdqa	%xmm0, -48(%edx)
+L(aligned_16_32bytes):
+	movdqa	%xmm0, -32(%edx)
+L(aligned_16_16bytes):
+	movdqa	%xmm0, -16(%edx)
+L(aligned_16_0bytes):
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_116bytes):
+	movdqa	%xmm0, -116(%edx)
+L(aligned_16_100bytes):
+	movdqa	%xmm0, -100(%edx)
+L(aligned_16_84bytes):
+	movdqa	%xmm0, -84(%edx)
+L(aligned_16_68bytes):
+	movdqa	%xmm0, -68(%edx)
+L(aligned_16_52bytes):
+	movdqa	%xmm0, -52(%edx)
+L(aligned_16_36bytes):
+	movdqa	%xmm0, -36(%edx)
+L(aligned_16_20bytes):
+	movdqa	%xmm0, -20(%edx)
+L(aligned_16_4bytes):
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_120bytes):
+	movdqa	%xmm0, -120(%edx)
+L(aligned_16_104bytes):
+	movdqa	%xmm0, -104(%edx)
+L(aligned_16_88bytes):
+	movdqa	%xmm0, -88(%edx)
+L(aligned_16_72bytes):
+	movdqa	%xmm0, -72(%edx)
+L(aligned_16_56bytes):
+	movdqa	%xmm0, -56(%edx)
+L(aligned_16_40bytes):
+	movdqa	%xmm0, -40(%edx)
+L(aligned_16_24bytes):
+	movdqa	%xmm0, -24(%edx)
+L(aligned_16_8bytes):
+	movq	%xmm0, -8(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_124bytes):
+	movdqa	%xmm0, -124(%edx)
+L(aligned_16_108bytes):
+	movdqa	%xmm0, -108(%edx)
+L(aligned_16_92bytes):
+	movdqa	%xmm0, -92(%edx)
+L(aligned_16_76bytes):
+	movdqa	%xmm0, -76(%edx)
+L(aligned_16_60bytes):
+	movdqa	%xmm0, -60(%edx)
+L(aligned_16_44bytes):
+	movdqa	%xmm0, -44(%edx)
+L(aligned_16_28bytes):
+	movdqa	%xmm0, -28(%edx)
+L(aligned_16_12bytes):
+	movq	%xmm0, -12(%edx)
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+END (sse2_memset32_atom)
diff --git a/libcutils/memory.c b/libcutils/memory.c
index ef6c7e6..6486b45 100644
--- a/libcutils/memory.c
+++ b/libcutils/memory.c
@@ -16,6 +16,7 @@
 
 #include <cutils/memory.h>
 
+#if !HAVE_MEMSET16
 void android_memset16(uint16_t* dst, uint16_t value, size_t size)
 {
     size >>= 1;
@@ -23,7 +24,9 @@
         *dst++ = value;
     }
 }
+#endif
 
+#if !HAVE_MEMSET32
 void android_memset32(uint32_t* dst, uint32_t value, size_t size)
 {
     size >>= 2;
@@ -31,6 +34,7 @@
         *dst++ = value;
     }
 }
+#endif
 
 #if !HAVE_STRLCPY
 /*
diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk
index 6491d24..ed2ab5e 100644
--- a/libpixelflinger/Android.mk
+++ b/libpixelflinger/Android.mk
@@ -2,17 +2,6 @@
 include $(CLEAR_VARS)
 
 #
-# ARMv6 specific objects
-#
-
-ifeq ($(TARGET_ARCH),arm)
-LOCAL_ASFLAGS := -march=armv6
-LOCAL_SRC_FILES := rotate90CW_4x4_16v6.S
-LOCAL_MODULE := libpixelflinger_armv6
-include $(BUILD_STATIC_LIBRARY)
-endif
-
-#
 # C/C++ and ARMv5 objects
 #
 
@@ -77,10 +66,6 @@
 LOCAL_SHARED_LIBRARIES += libhardware_legacy
 LOCAL_CFLAGS += -DWITH_LIB_HARDWARE
 endif
-
-ifeq ($(TARGET_ARCH),arm)
-LOCAL_WHOLE_STATIC_LIBRARIES := libpixelflinger_armv6
-endif
 include $(BUILD_SHARED_LIBRARY)
 
 #
@@ -91,9 +76,6 @@
 LOCAL_MODULE:= libpixelflinger_static
 LOCAL_SRC_FILES := $(PIXELFLINGER_SRC_FILES)
 LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS) 
-ifeq ($(TARGET_ARCH),arm)
-LOCAL_WHOLE_STATIC_LIBRARIES := libpixelflinger_armv6
-endif
 include $(BUILD_STATIC_LIBRARY)
 
 
diff --git a/libpixelflinger/codeflinger/ARMAssembler.cpp b/libpixelflinger/codeflinger/ARMAssembler.cpp
index d3720c3..4726a08 100644
--- a/libpixelflinger/codeflinger/ARMAssembler.cpp
+++ b/libpixelflinger/codeflinger/ARMAssembler.cpp
@@ -334,7 +334,7 @@
 
 void ARMAssembler::STM(int cc, int dir,
         int Rn, int W, uint32_t reg_list)
-{   //                    FA EA FD ED      IB IA DB DA
+{   //                    ED FD EA FA      IB IA DB DA
     const uint8_t P[8] = { 0, 1, 0, 1,      1, 0, 1, 0 };
     const uint8_t U[8] = { 0, 0, 1, 1,      1, 1, 0, 0 };
     *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
@@ -433,6 +433,16 @@
 {
     *mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm;
 }
+#if 0
+#pragma mark -
+#pragma mark Bit manipulation (ARMv7+ only)...
+#endif
+
+// Bit manipulation (ARMv7+ only)...
+void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width)
+{
+    *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn;
+}
 
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h
index a667cb5..e7f038a 100644
--- a/libpixelflinger/codeflinger/ARMAssembler.h
+++ b/libpixelflinger/codeflinger/ARMAssembler.h
@@ -124,6 +124,7 @@
     virtual void SMLAW(int cc, int y,
                 int Rd, int Rm, int Rs, int Rn);
     virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
+    virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
 
 private:
                 ARMAssembler(const ARMAssembler& rhs);
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
index ff6af2a..796342a 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -206,6 +206,9 @@
     // byte/half word extract...
     virtual void UXTB16(int cc, int Rd, int Rm, int rotate) = 0;
 
+    // bit manipulation...
+    virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width) = 0;
+
     // -----------------------------------------------------------------------
     // convenience...
     // -----------------------------------------------------------------------
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
index 7c422db..c57d7da 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
@@ -199,5 +199,9 @@
     mTarget->UXTB16(cc, Rd, Rm, rotate);
 }
 
+void ARMAssemblerProxy::UBFX(int cc, int Rd, int Rn, int lsb, int width) {
+    mTarget->UBFX(cc, Rd, Rn, lsb, width);
+}
+
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
index 9134cce..8c7f270 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
@@ -115,6 +115,7 @@
                 int Rd, int Rm, int Rs, int Rn);
 
     virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
+    virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
 
 private:
     ARMAssemblerInterface*  mTarget;
diff --git a/libpixelflinger/codeflinger/disassem.c b/libpixelflinger/codeflinger/disassem.c
index c17f3ec..aeb8034 100644
--- a/libpixelflinger/codeflinger/disassem.c
+++ b/libpixelflinger/codeflinger/disassem.c
@@ -81,6 +81,8 @@
  * g - 2nd fp operand (register) (bits 16-18)
  * h - 3rd fp operand (register/immediate) (bits 0-4)
  * j - xtb rotate literal (bits 10-11)
+ * i - bfx lsb literal (bits 7-11)
+ * w - bfx width literal (bits 16-20)
  * b - branch address
  * t - thumb branch address (bits 24, 0-23)
  * k - breakpoint comment (bits 0-3, 8-19)
@@ -124,6 +126,7 @@
     { 0x0fe000f0, 0x00a00090, "umlal",	"Sdnms" },
     { 0x0fe000f0, 0x00e00090, "smlal",	"Sdnms" },
     { 0x0fff03f0, 0x06cf0070, "uxtb16", "dmj" },
+    { 0x0fe00070, 0x07e00050, "ubfx",   "dmiw" },
     { 0x0d700000, 0x04200000, "strt",	"daW" },
     { 0x0d700000, 0x04300000, "ldrt",	"daW" },
     { 0x0d700000, 0x04600000, "strbt",	"daW" },
@@ -412,6 +415,14 @@
 		case 'j':
 			di->di_printf("ror #%d", ((insn >> 10) & 3) << 3);
 			break;
+        /* i - bfx lsb literal (bits 7-11) */
+        case 'i':
+            di->di_printf("#%d", (insn >> 7) & 31);
+            break;
+        /* w - bfx width literal (bits 16-20) */
+        case 'w':
+            di->di_printf("#%d", 1 + ((insn >> 16) & 31));
+            break;
 		/* b - branch address */
 		case 'b':
 			branch = ((insn << 2) & 0x03ffffff);
diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp
index 93c5825..ed20a00 100644
--- a/libpixelflinger/codeflinger/load_store.cpp
+++ b/libpixelflinger/codeflinger/load_store.cpp
@@ -18,9 +18,12 @@
 #include <assert.h>
 #include <stdio.h>
 #include <cutils/log.h>
-
 #include "codeflinger/GGLAssembler.h"
 
+#ifdef __ARM_ARCH__
+#include <machine/cpu-features.h>
+#endif
+
 namespace android {
 
 // ----------------------------------------------------------------------------
@@ -110,6 +113,20 @@
     assert(maskLen<=8);
     assert(h);
     
+#if __ARM_ARCH__ >= 7
+    const int mask = (1<<maskLen)-1;
+    if ((h == bits) && !l && (s != d.reg)) {
+        MOV(AL, 0, d.reg, s);                   // component = packed;
+    } else if ((h == bits) && l) {
+        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
+    } else if (!l && isValidImmediate(mask)) {
+        AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
+    } else if (!l && isValidImmediate(~mask)) {
+        BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
+    } else {
+        UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
+    }
+#else
     if (h != bits) {
         const int mask = ((1<<maskLen)-1) << l;
         if (isValidImmediate(mask)) {
@@ -132,6 +149,7 @@
     if (s != d.reg) {
         MOV(AL, 0, d.reg, s);
     }
+#endif
 
     d.s = maskLen;
 }
diff --git a/libpixelflinger/col32cb16blend.S b/libpixelflinger/col32cb16blend.S
index 1450bde..1831255 100644
--- a/libpixelflinger/col32cb16blend.S
+++ b/libpixelflinger/col32cb16blend.S
@@ -1,20 +1,19 @@
 /* libs/pixelflinger/col32cb16blend.S
-**
-** (C) COPYRIGHT 2009 ARM Limited.
-**
-** Licensed under the Apache License, Version 2.0 (the "License"); 
-** you may not use this file except in compliance with the License. 
-** You may obtain a copy of the License at 
-**
-**     http://www.apache.org/licenses/LICENSE-2.0 
-**
-** Unless required by applicable law or agreed to in writing, software 
-** distributed under the License is distributed on an "AS IS" BASIS, 
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-** See the License for the specific language governing permissions and 
-** limitations under the License.
-**
-*/
+ *
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
     .text
     .align
diff --git a/libpixelflinger/col32cb16blend_neon.S b/libpixelflinger/col32cb16blend_neon.S
index 17b0d01..cbd54d1 100644
--- a/libpixelflinger/col32cb16blend_neon.S
+++ b/libpixelflinger/col32cb16blend_neon.S
@@ -1,20 +1,20 @@
 /* libs/pixelflinger/col32cb16blend_neon.S
-**
-** (C) COPYRIGHT 2009 ARM Limited.
-**
-** Licensed under the Apache License, Version 2.0 (the "License"); 
-** you may not use this file except in compliance with the License. 
-** You may obtain a copy of the License at 
-**
-**     http://www.apache.org/licenses/LICENSE-2.0 
-**
-** Unless required by applicable law or agreed to in writing, software 
-** distributed under the License is distributed on an "AS IS" BASIS, 
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
-** See the License for the specific language governing permissions and 
-** limitations under the License.
-**
-*/
+ *
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 
     .text
     .align
diff --git a/libpixelflinger/raster.cpp b/libpixelflinger/raster.cpp
index d751202..32b2a97 100644
--- a/libpixelflinger/raster.cpp
+++ b/libpixelflinger/raster.cpp
@@ -143,7 +143,7 @@
 
 using namespace android;
 
-GGLint gglBitBlti(GGLContext* con, int tmu, GGLint crop[4], GGLint where[4])
+GGLint gglBitBlit(GGLContext* con, int tmu, GGLint crop[4], GGLint where[4])
 {
     GGL_CONTEXT(c, (void*)con);
 
diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp
index a2f43eb..8fba147 100644
--- a/libpixelflinger/scanline.cpp
+++ b/libpixelflinger/scanline.cpp
@@ -1,6 +1,6 @@
 /* libs/pixelflinger/scanline.cpp
 **
-** Copyright 2006, The Android Open Source Project
+** Copyright 2006-2011, The Android Open Source Project
 **
 ** Licensed under the Apache License, Version 2.0 (the "License"); 
 ** you may not use this file except in compliance with the License. 
@@ -57,6 +57,11 @@
 
 #define DEBUG__CODEGEN_ONLY     0
 
+/* Set to 1 to dump to the log the states that need a new
+ * code-generated scanline callback, i.e. those that don't
+ * have a corresponding shortcut function.
+ */
+#define DEBUG_NEEDS  0
 
 #define ASSEMBLY_SCRATCH_SIZE   2048
 
@@ -79,8 +84,21 @@
 static void scanline_perspective(context_t* c);
 static void scanline_perspective_single(context_t* c);
 static void scanline_t32cb16blend(context_t* c);
+static void scanline_t32cb16blend_dither(context_t* c);
+static void scanline_t32cb16blend_srca(context_t* c);
+static void scanline_t32cb16blend_clamp(context_t* c);
+static void scanline_t32cb16blend_clamp_dither(context_t* c);
+static void scanline_t32cb16blend_clamp_mod(context_t* c);
+static void scanline_x32cb16blend_clamp_mod(context_t* c);
+static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
+static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
 static void scanline_t32cb16(context_t* c);
+static void scanline_t32cb16_dither(context_t* c);
+static void scanline_t32cb16_clamp(context_t* c);
+static void scanline_t32cb16_clamp_dither(context_t* c);
 static void scanline_col32cb16blend(context_t* c);
+static void scanline_t16cb16_clamp(context_t* c);
+static void scanline_t16cb16blend_clamp_mod(context_t* c);
 static void scanline_memcpy(context_t* c);
 static void scanline_memset8(context_t* c);
 static void scanline_memset16(context_t* c);
@@ -99,6 +117,13 @@
 
 // ----------------------------------------------------------------------------
 
+static inline uint16_t  convertAbgr8888ToRgb565(uint32_t  pix)
+{
+    return uint16_t( ((pix << 8) & 0xf800) |
+                      ((pix >> 5) & 0x07e0) |
+                      ((pix >> 19) & 0x001f) );
+}
+
 struct shortcut_t {
     needs_filter_t  filter;
     const char*     desc;
@@ -107,13 +132,95 @@
 };
 
 // Keep in sync with needs
+
+/* To understand the values here, have a look at:
+ *     system/core/include/private/pixelflinger/ggl_context.h
+ *
+ * Especially the lines defining and using GGL_RESERVE_NEEDS
+ *
+ * Quick reminders:
+ *   - the last nibble of the first value is the destination buffer format.
+ *   - the last nibble of the third value is the source texture format
+ *   - formats: 4=rgb565 1=abgr8888 2=xbgr8888
+ *
+ * In the descriptions below:
+ *
+ *   SRC      means we copy the source pixels to the destination
+ *
+ *   SRC_OVER means we blend the source pixels to the destination
+ *            with dstFactor = 1-srcA, srcFactor=1  (premultiplied source).
+ *            This mode is otherwise called 'blend'.
+ *
+ *   SRCA_OVER means we blend the source pixels to the destination
+ *             with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
+ *             This mode is otherwise called 'blend_srca'
+ *
+ *   clamp    means we fetch source pixels from a texture with u/v clamping
+ *
+ *   mod      means the source pixels are modulated (multiplied) by the
+ *            a/r/g/b of the current context's color. Typically used for
+ *            fade-in / fade-out.
+ *
+ *   dither   means we dither 32 bit values to 16 bits
+ */
 static shortcut_t shortcuts[] = {
     { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
         { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
-        "565 fb, 8888 tx, blend", scanline_t32cb16blend, init_y_noop },
+        "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
     { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
         { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
-        "565 fb, 8888 tx", scanline_t32cb16, init_y_noop  },  
+        "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop  },
+    /* same as first entry, but with dithering */
+    { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
+    /* same as second entry, but with dithering */
+    { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop  },
+    /* this is used during the boot animation - CHEAT: ignore dithering */
+    { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
+    /* special case for arbitrary texture coordinates (think scaling) */
+    { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
+    { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
+    /* another case used during emulation */
+    { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
+    /* and this */
+    { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
+    { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
+    { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
+    { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y  },
+    { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y  },
+    { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y  },
+    { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y  },
+    { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y  },
+    { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
+        { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+        "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y  },
     { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
         { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
         "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed  },  
@@ -243,6 +350,12 @@
         }
     }
 
+#ifdef DEBUG_NEEDS
+    LOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
+         c->state.needs.n, c->state.needs.p,
+         c->state.needs.t[0], c->state.needs.t[1]);
+#endif
+
 #endif // DEBUG__CODEGEN_ONLY
 
     c->init_y = init_y;
@@ -797,6 +910,678 @@
 #pragma mark Scanline
 #endif
 
+/* Used to parse a 32-bit source texture linearly. Usage is:
+ *
+ * horz_iterator32  hi(context);
+ * while (...) {
+ *    uint32_t  src_pixel = hi.get_pixel32();
+ *    ...
+ * }
+ *
+ * Use only for one-to-one texture mapping.
+ */
+struct horz_iterator32 {
+    horz_iterator32(context_t* c) {
+        const int x = c->iterators.xl;
+        const int y = c->iterators.y;
+        texture_t& tx = c->state.texture[0];
+        const int32_t u = (tx.shade.is0>>16) + x;
+        const int32_t v = (tx.shade.it0>>16) + y;
+        m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
+    }
+    uint32_t  get_pixel32() {
+        return *m_src++;
+    }
+protected:
+    uint32_t* m_src;
+};
+
+/* A variant for 16-bit source textures. */
+struct horz_iterator16 {
+    horz_iterator16(context_t* c) {
+        const int x = c->iterators.xl;
+        const int y = c->iterators.y;
+        texture_t& tx = c->state.texture[0];
+        const int32_t u = (tx.shade.is0>>16) + x;
+        const int32_t v = (tx.shade.it0>>16) + y;
+        m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
+    }
+    uint16_t  get_pixel16() {
+        return *m_src++;
+    }
+protected:
+    uint16_t* m_src;
+};
+
+/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
+ * After initialization, call get_src16() or get_src32() to get the current
+ * texture pixel value.
+ */
+struct clamp_iterator {
+    clamp_iterator(context_t* c) {
+        const int xs = c->iterators.xl;
+        texture_t& tx = c->state.texture[0];
+        texture_iterators_t& ti = tx.iterators;
+        m_s = (xs * ti.dsdx) + ti.ydsdy;
+        m_t = (xs * ti.dtdx) + ti.ydtdy;
+        m_ds = ti.dsdx;
+        m_dt = ti.dtdx;
+        m_width_m1 = tx.surface.width - 1;
+        m_height_m1 = tx.surface.height - 1;
+        m_data = tx.surface.data;
+        m_stride = tx.surface.stride;
+    }
+    uint16_t get_pixel16() {
+        int  u, v;
+        get_uv(u, v);
+        uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v));
+        return src[0];
+    }
+    uint32_t get_pixel32() {
+        int  u, v;
+        get_uv(u, v);
+        uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v));
+        return src[0];
+    }
+private:
+    void   get_uv(int& u, int& v) {
+        int  uu = m_s >> 16;
+        int  vv = m_t >> 16;
+        if (uu < 0)
+            uu = 0;
+        if (uu > m_width_m1)
+            uu = m_width_m1;
+        if (vv < 0)
+            vv = 0;
+        if (vv > m_height_m1)
+            vv = m_height_m1;
+        u = uu;
+        v = vv;
+        m_s += m_ds;
+        m_t += m_dt;
+    }
+
+    GGLfixed  m_s, m_t;
+    GGLfixed  m_ds, m_dt;
+    int       m_width_m1, m_height_m1;
+    uint8_t*  m_data;
+    int       m_stride;
+};
+
+/*
+ * The 'horizontal clamp iterator' variant corresponds to the case where
+ * the 'v' coordinate doesn't change. This is useful to avoid one mult and
+ * extra adds / checks per pixels, if the blending/processing operation after
+ * this is very fast.
+ */
+static int is_context_horizontal(const context_t* c) {
+    return (c->state.texture[0].iterators.dtdx == 0);
+}
+
+struct horz_clamp_iterator {
+    uint16_t  get_pixel16() {
+        int  u = m_s >> 16;
+        m_s += m_ds;
+        if (u < 0)
+            u = 0;
+        if (u > m_width_m1)
+            u = m_width_m1;
+        const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
+        return src[u];
+    }
+    uint32_t  get_pixel32() {
+        int  u = m_s >> 16;
+        m_s += m_ds;
+        if (u < 0)
+            u = 0;
+        if (u > m_width_m1)
+            u = m_width_m1;
+        const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
+        return src[u];
+    }
+protected:
+    void init(const context_t* c, int shift);
+    GGLfixed       m_s;
+    GGLfixed       m_ds;
+    int            m_width_m1;
+    const uint8_t* m_data;
+};
+
+void horz_clamp_iterator::init(const context_t* c, int shift)
+{
+    const int xs = c->iterators.xl;
+    const texture_t& tx = c->state.texture[0];
+    const texture_iterators_t& ti = tx.iterators;
+    m_s = (xs * ti.dsdx) + ti.ydsdy;
+    m_ds = ti.dsdx;
+    m_width_m1 = tx.surface.width-1;
+    m_data = tx.surface.data;
+
+    GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
+    int      v = t >> 16;
+    if (v < 0)
+        v = 0;
+    else if (v >= (int)tx.surface.height)
+        v = (int)tx.surface.height-1;
+
+    m_data += (tx.surface.stride*v) << shift;
+}
+
+struct horz_clamp_iterator16 : horz_clamp_iterator {
+    horz_clamp_iterator16(const context_t* c) {
+        init(c,1);
+    };
+};
+
+struct horz_clamp_iterator32 : horz_clamp_iterator {
+    horz_clamp_iterator32(context_t* c) {
+        init(c,2);
+    };
+};
+
+/* This is used to perform dithering operations.
+ */
+struct ditherer {
+    ditherer(const context_t* c) {
+        const int x = c->iterators.xl;
+        const int y = c->iterators.y;
+        m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
+        m_index = x & GGL_DITHER_MASK;
+    }
+    void step(void) {
+        m_index++;
+    }
+    int  get_value(void) {
+        int ret = m_line[m_index & GGL_DITHER_MASK];
+        m_index++;
+        return ret;
+    }
+    uint16_t abgr8888ToRgb565(uint32_t s) {
+        uint32_t r = s & 0xff;
+        uint32_t g = (s >> 8) & 0xff;
+        uint32_t b = (s >> 16) & 0xff;
+        return rgb888ToRgb565(r,g,b);
+    }
+    /* The following assumes that r/g/b are in the 0..255 range each */
+    uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
+        int threshold = get_value();
+        /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
+        r += (threshold >> (GGL_DITHER_BITS-8 +5));
+        g += (threshold >> (GGL_DITHER_BITS-8 +6));
+        b += (threshold >> (GGL_DITHER_BITS-8 +5));
+        if (r > 0xff)
+            r = 0xff;
+        if (g > 0xff)
+            g = 0xff;
+        if (b > 0xff)
+            b = 0xff;
+        return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3));
+    }
+protected:
+    const uint8_t* m_line;
+    int            m_index;
+};
+
+/* This structure is used to blend (SRC_OVER) 32-bit source pixels
+ * onto 16-bit destination ones. Usage is simply:
+ *
+ *   blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
+ */
+struct blender_32to16 {
+    blender_32to16(context_t* c) { }
+    void write(uint32_t s, uint16_t* dst) {
+        if (s == 0)
+            return;
+        s = GGL_RGBA_TO_HOST(s);
+        int sA = (s>>24);
+        if (sA == 0xff) {
+            *dst = convertAbgr8888ToRgb565(s);
+        } else {
+            int f = 0x100 - (sA + (sA>>7));
+            int sR = (s >> (   3))&0x1F;
+            int sG = (s >> ( 8+2))&0x3F;
+            int sB = (s >> (16+3))&0x1F;
+            uint16_t d = *dst;
+            int dR = (d>>11)&0x1f;
+            int dG = (d>>5)&0x3f;
+            int dB = (d)&0x1f;
+            sR += (f*dR)>>8;
+            sG += (f*dG)>>8;
+            sB += (f*dB)>>8;
+            *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+        }
+    }
+    void write(uint32_t s, uint16_t* dst, ditherer& di) {
+        if (s == 0) {
+            di.step();
+            return;
+        }
+        s = GGL_RGBA_TO_HOST(s);
+        int sA = (s>>24);
+        if (sA == 0xff) {
+            *dst = di.abgr8888ToRgb565(s);
+        } else {
+            int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
+            int f = 0x100 - (sA + (sA>>7));
+            int sR = (s >> (   3))&0x1F;
+            int sG = (s >> ( 8+2))&0x3F;
+            int sB = (s >> (16+3))&0x1F;
+            uint16_t d = *dst;
+            int dR = (d>>11)&0x1f;
+            int dG = (d>>5)&0x3f;
+            int dB = (d)&0x1f;
+            sR = ((sR << 8) + f*dR + threshold)>>8;
+            sG = ((sG << 8) + f*dG + threshold)>>8;
+            sB = ((sB << 8) + f*dB + threshold)>>8;
+            if (sR > 0x1f) sR = 0x1f;
+            if (sG > 0x3f) sG = 0x3f;
+            if (sB > 0x1f) sB = 0x1f;
+            *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+        }
+    }
+};
+
+/* This blender does the same for the 'blend_srca' operation.
+ * where dstFactor=srcA*(1-srcA) srcFactor=srcA
+ */
+struct blender_32to16_srcA {
+    blender_32to16_srcA(const context_t* c) { }
+    void write(uint32_t s, uint16_t* dst) {
+        if (!s) {
+            return;
+        }
+        uint16_t d = *dst;
+        s = GGL_RGBA_TO_HOST(s);
+        int sR = (s >> (   3))&0x1F;
+        int sG = (s >> ( 8+2))&0x3F;
+        int sB = (s >> (16+3))&0x1F;
+        int sA = (s>>24);
+        int f1 = (sA + (sA>>7));
+        int f2 = 0x100-f1;
+        int dR = (d>>11)&0x1f;
+        int dG = (d>>5)&0x3f;
+        int dB = (d)&0x1f;
+        sR = (f1*sR + f2*dR)>>8;
+        sG = (f1*sG + f2*dG)>>8;
+        sB = (f1*sB + f2*dB)>>8;
+        *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+    }
+};
+
+/* Common init code the modulating blenders */
+struct blender_modulate {
+    void init(const context_t* c) {
+        const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
+        const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
+        const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
+        const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
+        m_r = r + (r >> 7);
+        m_g = g + (g >> 7);
+        m_b = b + (b >> 7);
+        m_a = a + (a >> 7);
+    }
+protected:
+    int m_r, m_g, m_b, m_a;
+};
+
+/* This blender does a normal blend after modulation.
+ */
+struct blender_32to16_modulate : blender_modulate {
+    blender_32to16_modulate(const context_t* c) {
+        init(c);
+    }
+    void write(uint32_t s, uint16_t* dst) {
+        // blend source and destination
+        if (!s) {
+            return;
+        }
+        s = GGL_RGBA_TO_HOST(s);
+
+        /* We need to modulate s */
+        uint32_t  sA = (s >> 24);
+        uint32_t  sB = (s >> 16) & 0xff;
+        uint32_t  sG = (s >> 8) & 0xff;
+        uint32_t  sR = s & 0xff;
+
+        sA = (sA*m_a) >> 8;
+        /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
+        sR = (sR*m_r) >> (8 - 5);
+        sG = (sG*m_g) >> (8 - 6);
+        sB = (sB*m_b) >> (8 - 5);
+
+        /* Now do a normal blend */
+        int f = 0x100 - (sA + (sA>>7));
+        uint16_t d = *dst;
+        int dR = (d>>11)&0x1f;
+        int dG = (d>>5)&0x3f;
+        int dB = (d)&0x1f;
+        sR = (sR + f*dR)>>8;
+        sG = (sG + f*dG)>>8;
+        sB = (sB + f*dB)>>8;
+        *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+    }
+    void write(uint32_t s, uint16_t* dst, ditherer& di) {
+        // blend source and destination
+        if (!s) {
+            di.step();
+            return;
+        }
+        s = GGL_RGBA_TO_HOST(s);
+
+        /* We need to modulate s */
+        uint32_t  sA = (s >> 24);
+        uint32_t  sB = (s >> 16) & 0xff;
+        uint32_t  sG = (s >> 8) & 0xff;
+        uint32_t  sR = s & 0xff;
+
+        sA = (sA*m_a) >> 8;
+        /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
+        sR = (sR*m_r) >> (8 - 5);
+        sG = (sG*m_g) >> (8 - 6);
+        sB = (sB*m_b) >> (8 - 5);
+
+        /* Scale threshold to 0.8 fixed float format */
+        int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
+        int f = 0x100 - (sA + (sA>>7));
+        uint16_t d = *dst;
+        int dR = (d>>11)&0x1f;
+        int dG = (d>>5)&0x3f;
+        int dB = (d)&0x1f;
+        sR = (sR + f*dR + threshold)>>8;
+        sG = (sG + f*dG + threshold)>>8;
+        sB = (sB + f*dB + threshold)>>8;
+        if (sR > 0x1f) sR = 0x1f;
+        if (sG > 0x3f) sG = 0x3f;
+        if (sB > 0x1f) sB = 0x1f;
+        *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+    }
+};
+
+/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
+struct blender_x32to16_modulate : blender_modulate {
+    blender_x32to16_modulate(const context_t* c) {
+        init(c);
+    }
+    void write(uint32_t s, uint16_t* dst) {
+        s = GGL_RGBA_TO_HOST(s);
+
+        uint32_t  sB = (s >> 16) & 0xff;
+        uint32_t  sG = (s >> 8) & 0xff;
+        uint32_t  sR = s & 0xff;
+
+        /* Keep R/G/B in 5.8 or 6.8 format */
+        sR = (sR*m_r) >> (8 - 5);
+        sG = (sG*m_g) >> (8 - 6);
+        sB = (sB*m_b) >> (8 - 5);
+
+        int f = 0x100 - m_a;
+        uint16_t d = *dst;
+        int dR = (d>>11)&0x1f;
+        int dG = (d>>5)&0x3f;
+        int dB = (d)&0x1f;
+        sR = (sR + f*dR)>>8;
+        sG = (sG + f*dG)>>8;
+        sB = (sB + f*dB)>>8;
+        *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+    }
+    void write(uint32_t s, uint16_t* dst, ditherer& di) {
+        s = GGL_RGBA_TO_HOST(s);
+
+        uint32_t  sB = (s >> 16) & 0xff;
+        uint32_t  sG = (s >> 8) & 0xff;
+        uint32_t  sR = s & 0xff;
+
+        sR = (sR*m_r) >> (8 - 5);
+        sG = (sG*m_g) >> (8 - 6);
+        sB = (sB*m_b) >> (8 - 5);
+
+        /* Now do a normal blend */
+        int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
+        int f = 0x100 - m_a;
+        uint16_t d = *dst;
+        int dR = (d>>11)&0x1f;
+        int dG = (d>>5)&0x3f;
+        int dB = (d)&0x1f;
+        sR = (sR + f*dR + threshold)>>8;
+        sG = (sG + f*dG + threshold)>>8;
+        sB = (sB + f*dB + threshold)>>8;
+        if (sR > 0x1f) sR = 0x1f;
+        if (sG > 0x3f) sG = 0x3f;
+        if (sB > 0x1f) sB = 0x1f;
+        *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+    }
+};
+
+/* Same as above, but source is 16bit rgb565 */
+struct blender_16to16_modulate : blender_modulate {
+    blender_16to16_modulate(const context_t* c) {
+        init(c);
+    }
+    void write(uint16_t s16, uint16_t* dst) {
+        uint32_t  s = s16;
+
+        uint32_t  sR = s >> 11;
+        uint32_t  sG = (s >> 5) & 0x3f;
+        uint32_t  sB = s & 0x1f;
+
+        sR = (sR*m_r);
+        sG = (sG*m_g);
+        sB = (sB*m_b);
+
+        int f = 0x100 - m_a;
+        uint16_t d = *dst;
+        int dR = (d>>11)&0x1f;
+        int dG = (d>>5)&0x3f;
+        int dB = (d)&0x1f;
+        sR = (sR + f*dR)>>8;
+        sG = (sG + f*dG)>>8;
+        sB = (sB + f*dB)>>8;
+        *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+    }
+};
+
+/* This is used to iterate over a 16-bit destination color buffer.
+ * Usage is:
+ *
+ *   dst_iterator16  di(context);
+ *   while (di.count--) {
+ *       <do stuff with dest pixel at di.dst>
+ *       di.dst++;
+ *   }
+ */
+struct dst_iterator16 {
+    dst_iterator16(const context_t* c) {
+        const int x = c->iterators.xl;
+        const int width = c->iterators.xr - x;
+        const int32_t y = c->iterators.y;
+        const surface_t* cb = &(c->state.buffers.color);
+        count = width;
+        dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
+    }
+    int        count;
+    uint16_t*  dst;
+};
+
+
+static void scanline_t32cb16_clamp(context_t* c)
+{
+    dst_iterator16  di(c);
+
+    if (is_context_horizontal(c)) {
+        /* Special case for simple horizontal scaling */
+        horz_clamp_iterator32 ci(c);
+        while (di.count--) {
+            uint32_t s = ci.get_pixel32();
+            *di.dst++ = convertAbgr8888ToRgb565(s);
+        }
+    } else {
+        /* General case */
+        clamp_iterator ci(c);
+        while (di.count--) {
+            uint32_t s = ci.get_pixel32();
+            *di.dst++ = convertAbgr8888ToRgb565(s);
+        }
+    }
+}
+
+static void scanline_t32cb16_dither(context_t* c)
+{
+    horz_iterator32 si(c);
+    dst_iterator16  di(c);
+    ditherer        dither(c);
+
+    while (di.count--) {
+        uint32_t s = si.get_pixel32();
+        *di.dst++ = dither.abgr8888ToRgb565(s);
+    }
+}
+
+static void scanline_t32cb16_clamp_dither(context_t* c)
+{
+    dst_iterator16  di(c);
+    ditherer        dither(c);
+
+    if (is_context_horizontal(c)) {
+        /* Special case for simple horizontal scaling */
+        horz_clamp_iterator32 ci(c);
+        while (di.count--) {
+            uint32_t s = ci.get_pixel32();
+            *di.dst++ = dither.abgr8888ToRgb565(s);
+        }
+    } else {
+        /* General case */
+        clamp_iterator ci(c);
+        while (di.count--) {
+            uint32_t s = ci.get_pixel32();
+            *di.dst++ = dither.abgr8888ToRgb565(s);
+        }
+    }
+}
+
+static void scanline_t32cb16blend_dither(context_t* c)
+{
+    dst_iterator16 di(c);
+    ditherer       dither(c);
+    blender_32to16 bl(c);
+    horz_iterator32  hi(c);
+    while (di.count--) {
+        uint32_t s = hi.get_pixel32();
+        bl.write(s, di.dst, dither);
+        di.dst++;
+    }
+}
+
+static void scanline_t32cb16blend_clamp(context_t* c)
+{
+    dst_iterator16  di(c);
+    blender_32to16  bl(c);
+
+    if (is_context_horizontal(c)) {
+        horz_clamp_iterator32 ci(c);
+        while (di.count--) {
+            uint32_t s = ci.get_pixel32();
+            bl.write(s, di.dst);
+            di.dst++;
+        }
+    } else {
+        clamp_iterator ci(c);
+        while (di.count--) {
+            uint32_t s = ci.get_pixel32();
+            bl.write(s, di.dst);
+            di.dst++;
+        }
+    }
+}
+
+static void scanline_t32cb16blend_clamp_dither(context_t* c)
+{
+    dst_iterator16 di(c);
+    ditherer       dither(c);
+    blender_32to16 bl(c);
+
+    clamp_iterator ci(c);
+    while (di.count--) {
+        uint32_t s = ci.get_pixel32();
+        bl.write(s, di.dst, dither);
+        di.dst++;
+    }
+}
+
+void scanline_t32cb16blend_clamp_mod(context_t* c)
+{
+    dst_iterator16 di(c);
+    blender_32to16_modulate bl(c);
+
+    clamp_iterator ci(c);
+    while (di.count--) {
+        uint32_t s = ci.get_pixel32();
+        bl.write(s, di.dst);
+        di.dst++;
+    }
+}
+
+void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
+{
+    dst_iterator16 di(c);
+    blender_32to16_modulate bl(c);
+    ditherer dither(c);
+
+    clamp_iterator ci(c);
+    while (di.count--) {
+        uint32_t s = ci.get_pixel32();
+        bl.write(s, di.dst, dither);
+        di.dst++;
+    }
+}
+
+/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
+void scanline_x32cb16blend_clamp_mod(context_t* c)
+{
+    dst_iterator16 di(c);
+    blender_x32to16_modulate  bl(c);
+
+    clamp_iterator ci(c);
+    while (di.count--) {
+        uint32_t s = ci.get_pixel32();
+        bl.write(s, di.dst);
+        di.dst++;
+    }
+}
+
+void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
+{
+    dst_iterator16 di(c);
+    blender_x32to16_modulate  bl(c);
+    ditherer dither(c);
+
+    clamp_iterator ci(c);
+    while (di.count--) {
+        uint32_t s = ci.get_pixel32();
+        bl.write(s, di.dst, dither);
+        di.dst++;
+    }
+}
+
+void scanline_t16cb16_clamp(context_t* c)
+{
+    dst_iterator16  di(c);
+
+    /* Special case for simple horizontal scaling */
+    if (is_context_horizontal(c)) {
+        horz_clamp_iterator16 ci(c);
+        while (di.count--) {
+            *di.dst++ = ci.get_pixel16();
+        }
+    } else {
+        clamp_iterator ci(c);
+        while (di.count--) {
+            *di.dst++ = ci.get_pixel16();
+        }
+    }
+}
+
+
+
 template <typename T, typename U>
 static inline __attribute__((const))
 T interpolate(int y, T v0, U dvdx, U dvdy) {
@@ -1322,30 +2107,24 @@
     if (ct==1 || uint32_t(dst)&2) {
 last_one:
         s = GGL_RGBA_TO_HOST( *src++ );
-        sR = (s >> (   3))&0x1F;
-        sG = (s >> ( 8+2))&0x3F;
-        sB = (s >> (16+3))&0x1F;
-        *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
+        *dst++ = convertAbgr8888ToRgb565(s);
         ct--;
     }
 
     while (ct >= 2) {
-        s = GGL_RGBA_TO_HOST( *src++ );
-        sR = (s >> (   3))&0x1F;
-        sG = (s >> ( 8+2))&0x3F;
-        sB = (s >> (16+3))&0x1F;
-        d = (sR<<11)|(sG<<5)|sB;
-        
-        s = GGL_RGBA_TO_HOST( *src++ );
-        sR = (s >> (   3))&0x1F;
-        sG = (s >> ( 8+2))&0x3F;
-        sB = (s >> (16+3))&0x1F;        
-        d |= ((sR<<11)|(sG<<5)|sB)<<16;
-
 #if BYTE_ORDER == BIG_ENDIAN
-        d = (d>>16) | (d<<16);
-#endif
+        s = GGL_RGBA_TO_HOST( *src++ );
+        d = convertAbgr8888ToRgb565_hi16(s);
 
+        s = GGL_RGBA_TO_HOST( *src++ );
+        d |= convertAbgr8888ToRgb565(s);
+#else
+        s = GGL_RGBA_TO_HOST( *src++ );
+        d = convertAbgr8888ToRgb565(s);
+
+        s = GGL_RGBA_TO_HOST( *src++ );
+        d |= convertAbgr8888ToRgb565(s) << 16;
+#endif
         *dst32++ = d;
         ct -= 2;
     }
@@ -1357,6 +2136,7 @@
 
 void scanline_t32cb16blend(context_t* c)
 {
+#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
     int32_t x = c->iterators.xl;
     size_t ct = c->iterators.xr - x;
     int32_t y = c->iterators.y;
@@ -1368,33 +2148,55 @@
     const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
     uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
 
-#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
     scanline_t32cb16blend_arm(dst, src, ct);
 #else
-    while (ct--) {
-        uint32_t s = *src++;
-        if (!s) {
-            dst++;
-            continue;
-        }
-        uint16_t d = *dst;
-        s = GGL_RGBA_TO_HOST(s);
-        int sR = (s >> (   3))&0x1F;
-        int sG = (s >> ( 8+2))&0x3F;
-        int sB = (s >> (16+3))&0x1F;
-        int sA = (s>>24);
-        int f = 0x100 - (sA + (sA>>7));
-        int dR = (d>>11)&0x1f;
-        int dG = (d>>5)&0x3f;
-        int dB = (d)&0x1f;
-        sR += (f*dR)>>8;
-        sG += (f*dG)>>8;
-        sB += (f*dB)>>8;
-        *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
+    dst_iterator16  di(c);
+    horz_iterator32  hi(c);
+    blender_32to16  bl(c);
+    while (di.count--) {
+        uint32_t s = hi.get_pixel32();
+        bl.write(s, di.dst);
+        di.dst++;
     }
 #endif
 }
 
+void scanline_t32cb16blend_srca(context_t* c)
+{
+    dst_iterator16  di(c);
+    horz_iterator32  hi(c);
+    blender_32to16_srcA  blender(c);
+
+    while (di.count--) {
+        uint32_t s = hi.get_pixel32();
+        blender.write(s,di.dst);
+        di.dst++;
+    }
+}
+
+void scanline_t16cb16blend_clamp_mod(context_t* c)
+{
+    const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
+    if (a == 0) {
+        return;
+    }
+
+    if (a == 255) {
+        scanline_t16cb16_clamp(c);
+        return;
+    }
+
+    dst_iterator16  di(c);
+    blender_16to16_modulate  blender(c);
+    clamp_iterator  ci(c);
+
+    while (di.count--) {
+        uint16_t s = ci.get_pixel16();
+        blender.write(s, di.dst);
+        di.dst++;
+    }
+}
+
 void scanline_memcpy(context_t* c)
 {
     int32_t x = c->iterators.xl;
@@ -1518,26 +2320,3 @@
 // ----------------------------------------------------------------------------
 }; // namespace android
 
-using namespace android;
-extern "C" void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1)
-{
-#if ANDROID_ARM_CODEGEN
-    GGLContext* c;
-    gglInit(&c);
-    needs_t needs;
-    needs.n = n;
-    needs.p = p;
-    needs.t[0] = t0;
-    needs.t[1] = t1;
-    sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE));
-    GGLAssembler assembler( new ARMAssembler(a) );
-    int err = assembler.scanline(needs, (context_t*)c);
-    if (err != 0) {
-        printf("error %08x (%s)\n", err, strerror(-err));
-    }
-    gglUninit(c);
-#else
-    printf("This test runs only on ARM\n");
-#endif
-}
-
diff --git a/libpixelflinger/tests/codegen/Android.mk b/libpixelflinger/tests/codegen/Android.mk
index 1bc4214..aa320fc 100644
--- a/libpixelflinger/tests/codegen/Android.mk
+++ b/libpixelflinger/tests/codegen/Android.mk
@@ -2,12 +2,15 @@
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES:= \
-	codegen.cpp
+	codegen.cpp.arm
 
 LOCAL_SHARED_LIBRARIES := \
 	libcutils \
     libpixelflinger
 
+LOCAL_C_INCLUDES := \
+	system/core/libpixelflinger
+
 LOCAL_MODULE:= test-opengl-codegen
 
 LOCAL_MODULE_TAGS := tests
diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp
index 1865888..94e2481 100644
--- a/libpixelflinger/tests/codegen/codegen.cpp
+++ b/libpixelflinger/tests/codegen/codegen.cpp
@@ -1,9 +1,54 @@
 #include <stdio.h>
 #include <stdint.h>
 
-extern "C" void ggl_test_codegen(
-        uint32_t n, uint32_t p, uint32_t t0, uint32_t t1);
+#include "private/pixelflinger/ggl_context.h"
 
+#include "buffer.h"
+#include "scanline.h"
+
+#include "codeflinger/CodeCache.h"
+#include "codeflinger/GGLAssembler.h"
+#include "codeflinger/ARMAssembler.h"
+
+#if defined(__arm__)
+#   define ANDROID_ARM_CODEGEN  1
+#else
+#   define ANDROID_ARM_CODEGEN  0
+#endif
+
+#define ASSEMBLY_SCRATCH_SIZE   2048
+
+using namespace android;
+
+class ScanlineAssembly : public Assembly {
+    AssemblyKey<needs_t> mKey;
+public:
+    ScanlineAssembly(needs_t needs, size_t size)
+        : Assembly(size), mKey(needs) { }
+    const AssemblyKey<needs_t>& key() const { return mKey; }
+};
+
+static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1)
+{
+#if ANDROID_ARM_CODEGEN
+    GGLContext* c;
+    gglInit(&c);
+    needs_t needs;
+    needs.n = n;
+    needs.p = p;
+    needs.t[0] = t0;
+    needs.t[1] = t1;
+    sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE));
+    GGLAssembler assembler( new ARMAssembler(a) );
+    int err = assembler.scanline(needs, (context_t*)c);
+    if (err != 0) {
+        printf("error %08x (%s)\n", err, strerror(-err));
+    }
+    gglUninit(c);
+#else
+    printf("This test runs only on ARM\n");
+#endif
+}
 
 int main(int argc, char** argv)
 {
diff --git a/rootdir/Android.mk b/rootdir/Android.mk
index 329be7f..6d6012e 100644
--- a/rootdir/Android.mk
+++ b/rootdir/Android.mk
@@ -11,6 +11,10 @@
 copy_from += etc/vold.fstab
 endif
 
+ifeq ($(TARGET_PRODUCT),full_x86)
+copy_from += etc/vold.fstab
+endif
+
 # the /system/etc/init.goldfish.sh is needed to enable emulator support
 # in the system image. In theory, we don't need these for -user builds
 # which are device-specific. However, these builds require at the moment