blob: 95204a47b9b7cf9d14d0a19cf36c91925d1bc85c [file] [log] [blame]
#!/system/bin/sh
# Copyright (C) 2010 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# Default product ID in crash report (used if GOOGLE_CRASH_* is undefined).
BRILLO_PRODUCT=Brillo
# Base directory that contains any crash reporter state files.
CRASH_STATE_DIR="/data/misc/crash_reporter"
# File containing crash_reporter's anonymized guid.
GUID_FILE="${CRASH_STATE_DIR}/guid"
# Crash sender lock in case the sender is already running.
CRASH_SENDER_LOCK="${CRASH_STATE_DIR}/lock/crash_sender"
# Path to file that indicates a crash test is currently running.
CRASH_TEST_IN_PROGRESS_FILE="${CRASH_STATE_DIR}/tmp/crash-test-in-progress"
# Set this to 1 in the environment to allow uploading crash reports
# for unofficial versions.
FORCE_OFFICIAL=${FORCE_OFFICIAL:-0}
# Path to hardware class description.
HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID"
# Path to file that indicates this is a developer image.
LEAVE_CORE_FILE="${CRASH_STATE_DIR}/.leave_core"
# Path to list_proxies.
LIST_PROXIES="list_proxies"
# Maximum crashes to send per day.
MAX_CRASH_RATE=${MAX_CRASH_RATE:-32}
# File whose existence mocks crash sending. If empty we pretend the
# crash sending was successful, otherwise unsuccessful.
MOCK_CRASH_SENDING="${CRASH_STATE_DIR}/tmp/mock-crash-sending"
# Set this to 1 in the environment to pretend to have booted in developer
# mode. This is used by autotests.
MOCK_DEVELOPER_MODE=${MOCK_DEVELOPER_MODE:-0}
# Ignore PAUSE_CRASH_SENDING file if set.
OVERRIDE_PAUSE_SENDING=${OVERRIDE_PAUSE_SENDING:-0}
# File whose existence causes crash sending to be delayed (for testing).
# Must be stateful to enable testing kernel crashes.
PAUSE_CRASH_SENDING="${CRASH_STATE_DIR}/lock/crash_sender_paused"
# Path to a directory of restricted certificates which includes
# a certificate for the crash server.
RESTRICTED_CERTIFICATES_PATH="/system/etc/security/cacerts"
# File whose existence implies we're running and not to start again.
RUN_FILE="${CRASH_STATE_DIR}/run/crash_sender.pid"
# Maximum time to sleep between sends.
SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600}
# Set this to 1 to allow uploading of device coredumps.
DEVCOREDUMP_UPLOAD_FLAG_FILE="${CRASH_STATE_DIR}/device_coredump_upload_allowed"
# The weave configuration file.
WEAVE_CONF_FILE="/etc/weaved/weaved.conf"
# The os-release.d folder.
OSRELEASED_FOLDER="/etc/os-release.d"
# The syslog tag for all logging we emit.
TAG="$(basename $0)[$$]"
# Directory to store timestamp files indicating the uploads in the past 24
# hours.
TIMESTAMPS_DIR="${CRASH_STATE_DIR}/crash_sender"
# Temp directory for this process.
TMP_DIR=""
# Crash report log file.
CRASH_LOG="${CRASH_STATE_DIR}/log/uploads.log"
lecho() {
log -t "${TAG}" "$@"
}
lwarn() {
lecho -psyslog.warn "$@"
}
# Returns true if mock is enabled.
is_mock() {
[ -f "${MOCK_CRASH_SENDING}" ] && return 0
return 1
}
is_mock_successful() {
local mock_in=$(cat "${MOCK_CRASH_SENDING}")
[ "${mock_in}" = "" ] && return 0 # empty file means success
return 1
}
cleanup() {
if [ -n "${TMP_DIR}" ]; then
rm -rf "${TMP_DIR}"
fi
rm -f "${RUN_FILE}"
if [ -n "${CRASH_SENDER_LOCK}" ]; then
rm -rf "${CRASH_SENDER_LOCK}"
fi
crash_done
}
crash_done() {
if is_mock; then
# For testing purposes, emit a message to log so that we
# know when the test has received all the messages from this run.
lecho "crash_sender done."
fi
}
is_official_image() {
[ ${FORCE_OFFICIAL} -ne 0 ] && return 0
if [ "$(getprop ro.secure)" = "1" ]; then
return 0
else
return 1
fi
}
# Returns 0 if the a crash test is currently running. NOTE: Mirrors
# crash_collector.cc:CrashCollector::IsCrashTestInProgress().
is_crash_test_in_progress() {
[ -f "${CRASH_TEST_IN_PROGRESS_FILE}" ] && return 0
return 1
}
# Returns 0 if we should consider ourselves to be running on a developer
# image. NOTE: Mirrors crash_collector.cc:CrashCollector::IsDeveloperImage().
is_developer_image() {
# If we're testing crash reporter itself, we don't want to special-case
# for developer images.
is_crash_test_in_progress && return 1
[ -f "${LEAVE_CORE_FILE}" ] && return 0
return 1
}
# Returns 0 if we should consider ourselves to be running on a test image.
is_test_image() {
# If we're testing crash reporter itself, we don't want to special-case
# for test images.
is_crash_test_in_progress && return 1
case $(get_channel) in
test*) return 0;;
esac
return 1
}
# Returns 0 if the machine booted up in developer mode.
is_developer_mode() {
[ ${MOCK_DEVELOPER_MODE} -ne 0 ] && return 0
# If we're testing crash reporter itself, we don't want to special-case
# for developer mode.
is_crash_test_in_progress && return 1
if [ "$(getprop ro.debuggable)" = "1" ]; then
return 0
else
return 1
fi
}
# Return 0 if the uploading of device coredumps is allowed.
is_device_coredump_upload_allowed() {
[ -f "${DEVCOREDUMP_UPLOAD_FLAG_FILE}" ] && return 0
return 1
}
# Generate a uniform random number in 0..max-1.
# POSIX arithmetic expansion requires support of at least signed long integers.
# On 32-bit systems, that may mean 32-bit signed integers, in which case the
# 32-bit random number read from /dev/urandom may be interpreted as negative
# when used inside an arithmetic expansion (since the high bit might be set).
# mksh at least is known to behave this way.
# For this case, simply take the absolute value, which will still give a
# roughly uniform random distribution for the modulo (as we are merely ignoring
# the high/sign bit).
# See corresponding Arithmetic Expansion and Arithmetic Expression sections:
# POSIX: http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_04
# mksh: http://linux.die.net/man/1/mksh
generate_uniform_random() {
local max=$1
local random="$(od -An -N4 -tu /dev/urandom)"
echo $(((random < 0 ? -random : random) % max))
}
# Check if sending a crash now does not exceed the maximum 24hr rate and
# commit to doing so, if not.
check_rate() {
mkdir -p ${TIMESTAMPS_DIR}
# Only consider minidumps written in the past 24 hours by removing all older.
find "${TIMESTAMPS_DIR}" -mindepth 1 -mtime +1 \
-exec rm -- '{}' ';'
local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w)
lecho "Current send rate: ${sends_in_24hrs}sends/24hrs"
if [ ${sends_in_24hrs} -ge ${MAX_CRASH_RATE} ]; then
lecho "Cannot send more crashes:"
lecho " current ${sends_in_24hrs}send/24hrs >= " \
"max ${MAX_CRASH_RATE}send/24hrs"
return 1
fi
mktemp "${TIMESTAMPS_DIR}"/XXXXXX > /dev/null
return 0
}
# Gets the base part of a crash report file, such as name.01234.5678.9012 from
# name.01234.5678.9012.meta or name.01234.5678.9012.log.tar.xz. We make sure
# "name" is sanitized in CrashCollector::Sanitize to not include any periods.
get_base() {
echo "$1" | cut -d. -f-4
}
get_extension() {
local extension="${1##*.}"
local filename="${1%.*}"
# For gzipped file, we ignore .gz and get the real extension
if [ "${extension}" = "gz" ]; then
echo "${filename##*.}"
else
echo "${extension}"
fi
}
# Return which kind of report the given metadata file relates to
get_kind() {
local payload="$(get_key_value "$1" "payload")"
if [ ! -r "${payload}" ]; then
lecho "Missing payload: ${payload}"
echo "undefined"
return
fi
local kind="$(get_extension "${payload}")"
if [ "${kind}" = "dmp" ]; then
echo "minidump"
return
fi
echo "${kind}"
}
get_key_value() {
local file="$1" key="$2" value
if [ -f "${file}/${key}" ]; then
# Get the value from a folder where each key is its own file. The key
# file's entire contents is the value.
value=$(cat "${file}/${key}")
elif [ -f "${file}" ]; then
# Get the value from a file that has multiple key=value combinations.
# Return the first entry. There shouldn't be more than one anyways.
# Substr at length($1) + 2 skips past the key and following = sign (awk
# uses 1-based indexes), but preserves embedded = characters.
value=$(sed -n "/^${key}[[:space:]]*=/{s:^[^=]*=::p;q}" "${file}")
fi
echo "${value:-undefined}"
}
get_keys() {
local file="$1" regex="$2"
cut -d '=' -f1 "${file}" | grep --color=never "${regex}"
}
# Return the channel name (sans "-channel" suffix).
get_channel() {
getprop ro.product.channel | sed 's:-channel$::'
}
# Return the hardware class or "undefined".
get_hardware_class() {
if [ -r "${HWCLASS_PATH}" ]; then
cat "${HWCLASS_PATH}"
else
echo "undefined"
fi
}
# Return the log string filtered with only JSON-safe white-listed characters.
filter_log_string() {
echo "$1" | tr -cd '[:alnum:]_.\-:;'
}
send_crash() {
local meta_path="$1"
local report_payload="$(get_key_value "${meta_path}" "payload")"
local kind="$(get_kind "${meta_path}")"
local exec_name="$(get_key_value "${meta_path}" "exec_name")"
local url="$(get_key_value "${OSRELEASED_FOLDER}" "crash_server")"
local bdk_version="$(get_key_value "${meta_path}" "bdk_version")"
local hwclass="$(get_hardware_class)"
local write_payload_size="$(get_key_value "${meta_path}" "payload_size")"
local log="$(get_key_value "${meta_path}" "log")"
local sig="$(get_key_value "${meta_path}" "sig")"
local send_payload_size="$(stat -c "%s" "${report_payload}" 2>/dev/null)"
local product="$(get_key_value "${meta_path}" "product_id")"
local version="$(get_key_value "${meta_path}" "product_version")"
local upload_prefix="$(get_key_value "${meta_path}" "upload_prefix")"
local guid
local model_manifest_id="$(get_key_value "${WEAVE_CONF_FILE}" "model_id")"
# If crash_reporter.server is not set return with an error.
if [ -z "${url}" ]; then
lecho "Configuration error: crash_reporter.server not set."
return 1
fi
set -- \
-F "write_payload_size=${write_payload_size}" \
-F "send_payload_size=${send_payload_size}"
if [ "${sig}" != "undefined" ]; then
set -- "$@" \
-F "sig=${sig}" \
-F "sig2=${sig}"
fi
if [ -r "${report_payload}" ]; then
set -- "$@" \
-F "upload_file_${kind}=@${report_payload}"
fi
if [ "${log}" != "undefined" -a -r "${log}" ]; then
set -- "$@" \
-F "log=@${log}"
fi
if [ "${upload_prefix}" = "undefined" ]; then
upload_prefix=""
fi
# Grab any variable that begins with upload_.
local v
for k in $(get_keys "${meta_path}" "^upload_"); do
v="$(get_key_value "${meta_path}" "${k}")"
case ${k} in
# Product & version are handled separately.
upload_var_prod) ;;
upload_var_ver) ;;
upload_var_*)
set -- "$@" -F "${upload_prefix}${k#upload_var_}=${v}"
;;
upload_file_*)
if [ -r "${v}" ]; then
set -- "$@" -F "${upload_prefix}${k#upload_file_}=@${v}"
fi
;;
esac
done
# If ID or VERSION_ID is undefined, we use the default product name
# and bdk_version from /etc/os-release.d.
if [ "${product}" = "undefined" ]; then
product="${BRILLO_PRODUCT}"
fi
if [ "${version}" = "undefined" ]; then
version="${bdk_version}"
fi
local image_type
if is_test_image; then
image_type="test"
elif is_developer_image; then
image_type="dev"
elif [ ${FORCE_OFFICIAL} -ne 0 ]; then
image_type="force-official"
elif is_mock && ! is_mock_successful; then
image_type="mock-fail"
fi
local boot_mode
if is_developer_mode; then
boot_mode="dev"
fi
# Need to strip dashes ourselves as Chrome preserves it in the file
# nowadays. This is also what the Chrome breakpad client does.
guid=$(tr -d '-' < "${GUID_FILE}")
local error_type="$(get_key_value "${meta_path}" "error_type")"
[ "${error_type}" = "undefined" ] && error_type=
lecho "Sending crash:"
if [ "${product}" != "${BRILLO_PRODUCT}" ]; then
lecho " Sending crash report on behalf of ${product}"
fi
lecho " Metadata: ${meta_path} (${kind})"
lecho " Payload: ${report_payload}"
lecho " Version: ${version}"
lecho " Bdk Version: ${bdk_version}"
[ -n "${image_type}" ] && lecho " Image type: ${image_type}"
[ -n "${boot_mode}" ] && lecho " Boot mode: ${boot_mode}"
if is_mock; then
lecho " Product: ${product}"
lecho " URL: ${url}"
lecho " HWClass: ${hwclass}"
lecho " write_payload_size: ${write_payload_size}"
lecho " send_payload_size: ${send_payload_size}"
if [ "${log}" != "undefined" ]; then
lecho " log: @${log}"
fi
if [ "${sig}" != "undefined" ]; then
lecho " sig: ${sig}"
fi
fi
lecho " Exec name: ${exec_name}"
[ -n "${error_type}" ] && lecho " Error type: ${error_type}"
if is_mock; then
if ! is_mock_successful; then
lecho "Mocking unsuccessful send"
return 1
fi
lecho "Mocking successful send"
return 0
fi
# Read in the first proxy, if any, for a given URL. NOTE: The
# double-quotes are necessary due to a bug in dash with the "local"
# builtin command and values that have spaces in them (see
# "https://bugs.launchpad.net/ubuntu/+source/dash/+bug/139097").
if [ -f "${LIST_PROXIES}" ]; then
local proxy ret
proxy=$("${LIST_PROXIES}" --quiet "${url}")
ret=$?
if [ ${ret} -ne 0 ]; then
proxy=''
lwarn "Listing proxies failed with exit code ${ret}"
else
proxy=$(echo "${proxy}" | head -1)
fi
fi
# if a direct connection should be used, unset the proxy variable.
[ "${proxy}" = "direct://" ] && proxy=
local report_id="${TMP_DIR}/report_id"
local curl_stderr="${TMP_DIR}/curl_stderr"
set +e
curl "${url}" -f -v ${proxy:+--proxy "$proxy"} \
--capath "${RESTRICTED_CERTIFICATES_PATH}" --ciphers HIGH \
-F "prod=${product}" \
-F "ver=${version}" \
-F "bdk_version=${bdk_version}" \
-F "hwclass=${hwclass}" \
-F "exec_name=${exec_name}" \
-F "model_manifest_id=${model_manifest_id}" \
${image_type:+-F "image_type=${image_type}"} \
${boot_mode:+-F "boot_mode=${boot_mode}"} \
${error_type:+-F "error_type=${error_type}"} \
-F "guid=${guid}" \
-o "${report_id}" \
"$@" \
2>"${curl_stderr}"
curl_result=$?
set -e
if [ ${curl_result} -eq 0 ]; then
local id="$(cat "${report_id}")"
local timestamp="$(date +%s)"
local filter_prod="$(filter_log_string "${product}")"
local filter_exec="$(filter_log_string "${exec_name}")"
if [ "${filter_prod}" != "${product}" ]; then
lwarn "Product name filtered to: ${filter_prod}."
fi
if [ "${filter_exec}" != "${exec_name}" ]; then
lwarn "Exec name filtered to: ${filter_exec}."
fi
printf "{'time':%s,'id':'%s','product':'%s','exec_name':'%s'}\n" \
"${timestamp}" "${id}" "${filter_prod}" "${filter_exec}" >> "${CRASH_LOG}"
lecho "Crash report receipt ID ${id}"
else
lecho "Crash sending failed with exit code ${curl_result}: " \
"$(cat "${curl_stderr}")"
fi
rm -f "${report_id}"
return ${curl_result}
}
# *.meta files always end with done=1 so we can tell if they are complete.
is_complete_metadata() {
grep -q "done=1" "$1"
}
# Remove the given report path.
remove_report() {
local base="${1%.*}"
rm -f -- "${base}".*
}
# Send all crashes from the given directory. This applies even when we're on a
# 3G connection (see crosbug.com/3304 for discussion).
send_crashes() {
local dir="$1"
lecho "Sending crashes for ${dir}"
if [ ! -d "${dir}" ]; then
return
fi
# Consider any old files which still have no corresponding meta file
# as orphaned, and remove them.
for old_file in $(find "${dir}" -mindepth 1 \
-mtime +1 -type f); do
if [ ! -e "$(get_base "${old_file}").meta" ]; then
lecho "Removing old orphaned file: ${old_file}."
rm -f -- "${old_file}"
fi
done
# Look through all metadata (*.meta) files, oldest first. That way, the rate
# limit does not stall old crashes if there's a high amount of new crashes
# coming in.
# For each crash report, first evaluate conditions that might lead to its
# removal to honor user choice and to free disk space as soon as possible,
# then decide whether it should be sent right now or kept for later sending.
for meta_path in $(ls -1tr "${dir}"/*.meta 2>/dev/null); do
lecho "Considering metadata ${meta_path}."
local kind=$(get_kind "${meta_path}")
if [ "${kind}" != "minidump" ] && \
[ "${kind}" != "kcrash" ] && \
[ "${kind}" != "log" ] &&
[ "${kind}" != "devcore" ]; then
lecho "Unknown report kind ${kind}. Removing report."
remove_report "${meta_path}"
continue
fi
if ! is_complete_metadata "${meta_path}"; then
# This report is incomplete, so if it's old, just remove it.
local old_meta=$(find "${dir}" -mindepth 1 -name \
$(basename "${meta_path}") -mtime +1 -type f)
if [ -n "${old_meta}" ]; then
lecho "Removing old incomplete metadata."
remove_report "${meta_path}"
else
lecho "Ignoring recent incomplete metadata."
fi
continue
fi
# Ignore device coredump if device coredump uploading is not allowed.
if [ "${kind}" = "devcore" ] && ! is_device_coredump_upload_allowed; then
lecho "Ignoring device coredump. Device coredump upload not allowed."
continue
fi
if ! is_mock && ! is_official_image; then
lecho "Not an official OS version. Removing crash."
remove_report "${meta_path}"
continue
fi
# Remove existing crashes in case user consent has not (yet) been given or
# has been revoked. This must come after the guest mode check because
# metrics_client always returns "not consented" in guest mode.
if ! metrics_client -c; then
lecho "Crash reporting is disabled. Removing crash."
remove_report "${meta_path}"
continue
fi
# Skip report if the upload rate is exceeded. (Don't exit right now because
# subsequent reports may be candidates for deletion.)
if ! check_rate; then
lecho "Sending ${meta_path} would exceed rate. Leaving for later."
continue
fi
# The .meta file should be written *after* all to-be-uploaded files that it
# references. Nevertheless, as a safeguard, a hold-off time of thirty
# seconds after writing the .meta file is ensured. Also, sending of crash
# reports is spread out randomly by up to SECONDS_SEND_SPREAD. Thus, for
# the sleep call the greater of the two delays is used.
local now=$(date +%s)
local holdoff_time=$(($(stat -c "%Y" "${meta_path}") + 30 - ${now}))
local spread_time=$(generate_uniform_random "${SECONDS_SEND_SPREAD}")
local sleep_time
if [ ${spread_time} -gt ${holdoff_time} ]; then
sleep_time="${spread_time}"
else
sleep_time="${holdoff_time}"
fi
lecho "Scheduled to send in ${sleep_time}s."
if ! is_mock; then
if ! sleep "${sleep_time}"; then
lecho "Sleep failed"
return 1
fi
fi
# Try to upload.
if ! send_crash "${meta_path}"; then
lecho "Problem sending ${meta_path}, not removing."
continue
fi
# Send was successful, now remove.
lecho "Successfully sent crash ${meta_path} and removing."
remove_report "${meta_path}"
done
}
usage() {
cat <<EOF
Usage: crash_sender [options]
Options:
-e <var>=<val> Set env |var| to |val| (only some vars)
EOF
exit ${1:-1}
}
parseargs() {
# Parse the command line arguments.
while [ $# -gt 0 ]; do
case $1 in
-e)
shift
case $1 in
FORCE_OFFICIAL=*|\
MAX_CRASH_RATE=*|\
MOCK_DEVELOPER_MODE=*|\
OVERRIDE_PAUSE_SENDING=*|\
SECONDS_SEND_SPREAD=*)
export "$1"
;;
*)
lecho "Unknown var passed to -e: $1"
exit 1
;;
esac
;;
-h)
usage 0
;;
*)
lecho "Unknown options: $*"
exit 1
;;
esac
shift
done
}
main() {
parseargs "$@"
if [ -e "${PAUSE_CRASH_SENDING}" ] && \
[ ${OVERRIDE_PAUSE_SENDING} -eq 0 ]; then
lecho "Exiting early due to ${PAUSE_CRASH_SENDING}."
exit 1
fi
if is_test_image; then
lecho "Exiting early due to test image."
exit 1
fi
# We don't perform checks on this because we have a master lock with the
# CRASH_SENDER_LOCK file. This pid file is for the system to keep track
# (like with autotests) that we're still running.
echo $$ > "${RUN_FILE}"
for dependency in "${RESTRICTED_CERTIFICATES_PATH}"; do
if [ ! -x "${dependency}" ]; then
lecho "Fatal: Crash sending disabled: ${dependency} not found."
exit 1
fi
done
TMP_DIR="$(mktemp -d "${CRASH_STATE_DIR}/tmp/crash_sender.XXXXXX")"
# Send system-wide crashes
send_crashes "${CRASH_STATE_DIR}/crash"
}
trap cleanup EXIT INT TERM
#TODO(http://b/23937249): Change the locking logic back to using flock.
if ! mkdir "${CRASH_SENDER_LOCK}" 2>/dev/null; then
lecho "Already running; quitting."
crash_done
exit 1
fi
main "$@"