[Pacemaker] Why Did Pacemaker Restart this VirtualDomain Resource?
Andrew Martin
amartin at xes-inc.com
Tue Jun 19 14:38:50 UTC 2012
Hello,
I have a 3 node Pacemaker+Heartbeat cluster (two real nodes and one "standby" quorum node) with Ubuntu 10.04 LTS on the nodes and using the Pacemaker+Heartbeat packages from the Ubuntu HA Team PPA ( https://launchpad.net/~ubuntu-ha-maintainers/+archive/ppa ). I have configured 3 DRBD resources, a filesystem mount, and a KVM-based virtual machine (using the VirtualDomain resource). I have constraints in place so that the DRBD devices must become primary and the filesystem must be mounted before the VM can start:
node $id="1ab0690c-5aa0-4d9c-ae4e-b662e0ca54e5" vmhost1
node $id="219e9bf6-ea99-41f4-895f-4c2c5c78484a" quorumnode \
attributes standby="on"
node $id="645e09b4-aee5-4cec-a241-8bd4e03a78c3" vmhost2
primitive p_drbd_mount2 ocf:linbit:drbd \
params drbd_resource="mount2" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="100" \
op monitor interval="10" role="Master" timeout="30" \
op monitor interval="20" role="Slave" timeout="30"
primitive p_drbd_mount1 ocf:linbit:drbd \
params drbd_resource="mount1" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="100" \
op monitor interval="10" role="Master" timeout="30" \
op monitor interval="20" role="Slave" timeout="30"
primitive p_drbd_vmstore ocf:linbit:drbd \
params drbd_resource="vmstore" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="100" \
op monitor interval="10" role="Master" timeout="30" \
op monitor interval="20" role="Slave" timeout="30"
primitive p_fs_vmstore ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/mnt/storage/vmstore" fstype="ext4" \
op start interval="0" timeout="60" \
op stop interval="0" timeout="60" \
op monitor interval="20" timeout="40"
primitive p_ping ocf:pacemaker:ping \
params name="p_ping" host_list="192.168.1.25 192.168.1.26" multiplier="1000" \
op start interval="0" timeout="60" \
op monitor interval="20s" timeout="60"
primitive p_sysadmin_notify ocf:heartbeat:MailTo \
params email="alert at example.com" \
params subject="Pacemaker Change" \
op start interval="0" timeout="10" \
op stop interval="0" timeout="10" \
op monitor interval="10" timeout="10"
primitive p_vm_myvm ocf:heartbeat:VirtualDomain \
params config="/mnt/storage/vmstore/config/myvm.xml" \
meta allow-migrate="false" target-role="Started" is-managed="true" \
op start interval="0" timeout="180" \
op stop interval="0" timeout="180" \
op monitor interval="10" timeout="30"
primitive stonithquorumnode stonith:external/webpowerswitch \
params wps_ipaddr="192.168.3.100" wps_port="x" wps_username="xxx" wps_password="xxx" hostname_to_stonith="quorumnode"
primitive stonithvmhost1 stonith:external/webpowerswitch \
params wps_ipaddr="192.168.3.100" wps_port="x" wps_username="xxx" wps_password="xxx" hostname_to_stonith="vmhost1"
primitive stonithvmhost2 stonith:external/webpowerswitch \
params wps_ipaddr="192.168.3.100" wps_port="x" wps_username="xxx" wps_password="xxx" hostname_to_stonith="vmhost2"
group g_vm p_fs_vmstore p_vm_myvm
ms ms_drbd_mount2 p_drbd_mount2 \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_mount1 p_drbd_mount1 \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_vmstore p_drbd_vmstore \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
clone cl_ping p_ping \
meta interleave="true"
clone cl_sysadmin_notify p_sysadmin_notify
location loc_run_on_most_connected g_vm \
rule $id="loc_run_on_most_connected-rule" p_ping: defined p_ping
location loc_st_nodescan stonithquorumnode -inf: vmhost1
location loc_st_vmhost1 stonithvmhost1 -inf: vmhost1
location loc_st_vmhost2 stonithvmhost2 -inf: vmhost2
colocation c_drbd_libvirt_vm inf: g_vm ms_drbd_vmstore:Master ms_drbd_tools:Master ms_drbd_crm:Master
order o_drbd-fs-vm inf: ms_drbd_vmstore:promote ms_drbd_tools:promote ms_drbd_crm:promote g_vm:start
property $id="cib-bootstrap-options" \
dc-version="1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c" \
cluster-infrastructure="Heartbeat" \
stonith-enabled="true" \
no-quorum-policy="freeze" \
last-lrm-refresh="1337746179"
This has been working well, however last week Pacemaker all of a sudden stopped the p_vm_myvm resource and then started it up again. I have attached the relevant section of /var/log/daemon.log - I am unable to determine what caused Pacemaker to restart this resource. Based on the log, could you tell me what event triggered this?
Thanks,
Andrew
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20120619/24a25b6c/attachment-0003.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: unexpected_shutdown.log
Type: text/x-log
Size: 11246 bytes
Desc: not available
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20120619/24a25b6c/attachment-0003.bin>
More information about the Pacemaker
mailing list