[Pacemaker] Pacemaker on system with disk failure
Carsten Otto
carsten.otto at andrena.de
Thu Sep 25 14:39:06 UTC 2014
Dear John,
On Thu, Sep 25, 2014 at 10:03:27AM -0400, John Lauro wrote:
> One of the reasons I like ksh is that true, echo, and sleep (among
> many others) are all builtin, so you don't need those commands on the
> filesystem, so the script is less likely to fail if the filesystem
> fails... that said you probably don't have ksh installed by default.
Thanks for the hint! I just wrote a simple watchdog resource agent and
the corresponding shell script which successfully reboots a server when
the disk fails.
I provided my solution in the attachment.
Put crude-watchdog.sh in /root/, and put crude-watchdog in
/usr/lib/ocf/resource.d/heartbeat/.
In my two node cluster I used these commands to let this watchdog run on
all two machines:
pcs resource create WATCHDOG ocf:heartbeat:crude-watchdog
pcs resource clone WATCHDOG
Best regards,
Carsten
--
andrena objects ag
Büro Frankfurt
Clemensstr. 8
60487 Frankfurt
Tel: +49 (0) 69 977 860 38
Fax: +49 (0) 69 977 860 39
http://www.andrena.de
Vorstand: Hagen Buchwald, Matthias Grund, Dr. Dieter Kuhn
Aufsichtsratsvorsitzender: Rolf Hetzelberger
Sitz der Gesellschaft: Karlsruhe
Amtsgericht Mannheim, HRB 109694
USt-IdNr. DE174314824
Bitte beachten Sie auch unsere anstehenden Veranstaltungen:
http://www.andrena.de/events
-------------- next part --------------
A non-text attachment was scrubbed...
Name: crude-watchdog.sh
Type: application/x-sh
Size: 189 bytes
Desc: not available
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20140925/b9ce7e6f/attachment-0004.sh>
-------------- next part --------------
#!/bin/sh
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SCRIPT=/root/crude-watchdog.sh
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="crude-watchdog" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This agent reboots the system if the root file system stops working.
</longdesc>
<shortdesc lang="en">
This agent reboots the system if the root file system stops working.
</shortdesc>
<parameters>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="reload" timeout="20" />
<action name="migrate_to" timeout="20" />
<action name="migrate_from" timeout="20" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
watchdog_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
watchdog_start() {
watchdog_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
nohup $SCRIPT &
}
watchdog_stop() {
watchdog_monitor
if [ $? = $OCF_SUCCESS ]; then
killall crude-watchdog.sh
fi
watchdog_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
watchdog_monitor() {
RES=`ps aux | grep crude-watchdog.sh | grep -v grep -q`
if [ $? = 0 ]; then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
watchdog_validate() {
if [ -x "$SCRIPT" ]; then
return $OCF_SUCCESS
fi
return $OCF_ERR_ARGS
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) watchdog_start;;
stop) watchdog_stop;;
monitor) watchdog_monitor;;
migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
watchdog_stop
;;
migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
watchdog_start
;;
reload) ocf_log info "Reloading ${OCF_RESOURCE_INSTANCE} ..."
;;
validate-all) watchdog_validate;;
usage|help) watchdog_usage
exit $OCF_SUCCESS
;;
*) watchdog_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20140925/b9ce7e6f/attachment-0004.sig>
More information about the Pacemaker
mailing list