#!/bin/sh
#
# (c) Copyright 2010 Hewlett-Packard Development Company, L.P.
#
# See "man chkconfig" for information on next two lines (Red Hat only)
# chkconfig: 2345 91 2
# description: HP Advanced Server Recovery Daemon
#
#
# Following lines are in conformance with LSB 1.2 spec
### BEGIN INIT INFO
# Provides:            hp-asrd
# Required-Start:      hp-health
# Required-Stop:
# Default-Start:       2 3 4 5
# Default-Stop:        0 1 6
# Description:         HP Advanced Server Recovery Daemon
### END INIT INFO

##
## By default, this script will use the ASR settings from the system ROM.
## You can configure these values using either the hpasmcli command, or by
## entering the ROM-Based Setup Utility (RBSU) during boot. You can override
## the ROM settings using the FORCE variables below.
##

# FORCE_ASR_STATUS 
#  Override the ASR enabled/disabled setting from the ROM.
#  Valid settings are:
#   "on":  Enable the ASR watchdog
#   "off": Disable the ASR watchdog
#FORCE_ASR_STATUS="off"

# FORCE_ASR_TIMEOUT
#  Override the ASR timeout value set in the ROM.
#  Valid setting is an integer between 300 and 3600 (inclusive).
#  This integer is the number of seconds the watchdog will
#  wait for a ping before resetting the system.
#  Note that the ROM setting is stored in minutes, but this
#  variable is in seconds to permit a higher granularity.
#FORCE_ASR_TIMEOUT="600"

# WATCHDOG_PING_FREQUENCY
#  The number of seconds between attempts to reset the watchdog
WATCHDOG_PING_RATE=1

#### YOU SHOULD NOT NEED TO MODIFY ANYTHING BELOW THIS LINE ####

PATH="/sbin:/usr/sbin:/bin:/usr/bin"

if [ -f /lib/lsb/init-functions ]; then
    . /lib/lsb/init-functions || exit 1
else
    # Implement our own versions of the LSB-defined functions.
    # This is not an LSB-compliant implementation, it just provides 
    # the pieces we use.
    if [ -f /etc/rc.d/init.d/functions ]; then
	. /etc/rc.d/init.d/functions
    fi
    pidofproc() {
	pidof "$1"
    }
    start_daemon() {
	local daemon="$1"
	shift
	local args="$*"
	if [ -n "$(pidofproc $daemon)" ]; then
	    return 0
	fi
	$daemon $args
	if [ -n "$(pidofproc $daemon)" ]; then
	    return 0
	else
	    return 1
	fi
    }
    killproc() {
	local daemon="$1"
	local signal=""
	local pid="$(pidofproc $daemon)"
	if [ "$#" -eq 2 ]; then
	    signal="-s $(echo $2 | sed -r 's/-(SIG)?(.*)/\2/')"
	fi
	if [ -n "$pid" ]; then
	    kill $signal "$pid"
	fi
	return 0
    }
    _log_msg() {
	printf "$*\n"
    }
    log_success_msg() {
	if [ -f /etc/rc.d/init.d/functions ]; then
	    echo_success
	fi
	_log_msg "$*"
    }
    log_failure_msg() {
	if [ -f /etc/rc.d/init.d/functions ]; then
	    echo_failure
	fi
	_log_msg "$*"
    }
fi

HPHEALTH_BIN="/opt/hp/hp-health/bin"
DESCRIPTION="HP Advanced Server Recovery Daemon"
DAEMON="$HPHEALTH_BIN/hp-asrd"
HPASMCLI="/sbin/hpasmcli"
PIDFILE="/var/run/hp-asrd.pid"

asr_enabled() {
    local status

    if [ -n "$FORCE_ASR_STATUS" ] && [ "$FORCE_ASR_STATUS" = "on" ]; then
	return 0
    fi

    if [ ! -x "$HPASMCLI" ]; then
	return 1
    fi

    status=$("$HPASMCLI" -s "show asr" | \
	grep '^ASR is currently' | \
	sed -r 's/^ASR is currently ([a-z]*)\.$/\1/')

    if [ "$status" = "enabled" ]; then
	return 0
    fi

    return 1
}

is_valid_asr_timeout() {
    local timeout="$1"

    if echo "$timeout" | grep -Eq '^([5-9]|[1-5][0-9])|60$'; then
	return 0
    fi

    return 1
}

min_to_sec() {
    echo $(( $1 * 60 ))
}

asr_timeout() {
    local timeout

    if [ -n "$FORCE_ASR_TIMEOUT" ]; then
	if is_valid_timeout "$FORCE_ASR_TIMEOUT"; then
	    echo "$FORCE_ASR_TIMEOUT"
	    return 0
	fi
	echo "FORCE_ASR_TIMEOUT value is invalid: $FORCE_ASR_TIMEOUT" 1>&2
	return 1
    fi

    timeout=$("$HPASMCLI" -s "show asr" | \
	grep '^ASR timeout is' | \
	sed -r 's/^ASR timeout is ([0-9]+) minutes\.$/\1/')

    if is_valid_asr_timeout "$timeout"; then
	min_to_sec "$timeout"
	return 0
    fi

    echo "Failed to read valid ASR timeout value from ROM" 1>&2
    return 2
}

do_udev_settle() {
    if [ -x /sbin/udevadm ]; then
	/sbin/udevadm settle
    elif [ -x /sbin/udevsettle ]; then
	/sbin/udevsettle
    fi
}

have_watchdog() {
    do_udev_settle
    if [ -d /sys/class/misc/watchdog ] && [ -e /dev/watchdog ]; then
	return 0
    fi
    return 1
}

load_modules() {
    if have_watchdog; then
	# There is already a watchdog loaded, hopefully it is
	# one that we support
	return 0
    fi
    modprobe hpwdt > /dev/null 2>&1
    if have_watchdog; then
	return 0
    fi
    # No iLO2/iLO3 (or no hpwdt available)
    modprobe -r hpwdt > /dev/null 2>&1
    modprobe ipmi_watchdog > /dev/null 2>&1
    if have_watchdog; then
	return 0
    fi
    # Unsupported platform?
    modprobe -r ipmi_watchdog > /dev/null 2>&1
    return 1
}

PCI_VENDOR_ID_COMPAQ="0e11"
PCI_VENDOR_ID_HP="103c"
PCI_DEVICE_ID_ILO="b203" # iLO and iLO2
PCI_DEVICE_ID_ILO3="3307"

PCIID_ILO="$PCI_VENDOR_ID_COMPAQ:$PCI_DEVICE_ID_ILO"
PCIID_ILO3="$PCI_VENDOR_ID_HP:$PCI_DEVICE_ID_ILO3"

# return codes for probe_ilo()
ILO_NO_ILO=0
ILO_NO_EMBEDDED_HEALTH=1
ILO_WITH_EMBEDDED_HEALTH=2
ILO_WITH_EMBEDDED_HEALTH_G3=3

## Scan the system for an iLO device. Return one of the following:
## $ILO_NO_ILO - no iLO device found
## $ILO_NO_EMBEDDED_HEALTH - iLO found, does not support embedded health
## $ILO_WITH_EMBEDDED_HEALTH - iLO supporting embedded health found
## $ILO_WITH_EMBEDDED_HEALTH_G3 - iLO3 supporting embedded health found
probe_ilo() {
    local dev=""

    for id in $PCIID_ILO $PCIID_ILO3; do
    dev="$(lspci -d $id | cut -d' ' -f1)"
    if [ -n "$dev" ]; then
        if [ "$id" = "$PCIID_ILO" ]; then
            if ! lspci -xv -s "$dev" | grep -q "3c 10 05 33"; then
		return "$ILO_NO_EMBEDDED_HEALTH"
            fi
            return "$ILO_WITH_EMBEDDED_HEALTH"
        fi
        return "$ILO_WITH_EMBEDDED_HEALTH_G3"
    fi
    done

    return "$ILO_NO_ILO"
}

have_ilo() {
    probe_ilo
    if [ $? = "$ILO_NO_ILO" ]; then
	return 1
    fi
    return 0
}

start() {
	if ! have_ilo; then
	    return 0
	fi
	if ! asr_enabled; then
	    echo "ASR disabled, not starting daemon."
	    return 0
	fi

	timeout=$(asr_timeout) || exit 1

	load_modules # Ignore return code; iloraw mode may still work

	start_daemon $DAEMON -p "$WATCHDOG_PING_RATE" -t "$timeout"

	pid=$(pidof $DAEMON)
	if [ -n "$pid" ]; then
	    echo "$pid" > "$PIDFILE"
	    log_success_msg "Starting $DESCRIPTION"
	else
	    log_failure_msg "Starting $DESCRIPTION"
	fi
}

stop() {
	if ! have_ilo; then
	    return 0
	fi
	pid=$(pidof $DAEMON)
	if [ -z "$pid" ]; then
	    log_success_msg "hp-asrd not running"
	    return 0
	fi
	killproc $DAEMON -SIGTERM
	printf "Waiting for hp-asrd[$pid] to terminate"
	while [ -d "/proc/$pid/" ]; do
	    printf "."
	    sleep 1
	done
	printf "\n"
	rm -f "$PIDFILE"
	log_success_msg "$DESCRIPTION Terminated"
}

case "$1" in
    start)
	start
	exit $?
	;;
    stop)
	stop
	exit $?
	;;
    restart|force-restart)
	stop
	start
	exit $?
	;;
    status)
	if pidof $DAEMON; then
	    log_success_msg "Running"
	else
	    log_failure_msg "Not Running"
	fi
	exit 0
	;;
    *)
	cmaecho "Usage: $0 {start|stop|restart|status}"
	exit 1
        ;;
esac
