[Pacemaker] dependent resource reach max fail count

Michael Fung mike at 3open.org
Fri Jul 2 10:18:56 UTC 2010


Hi All,

It is a 2-node Active/Passive configuration.

First, everything is normal.

Then, all network cable disconneted. On node1 and node2, ms_drbd_r0 auto
go to Slave role. vs_fs stopped as expected, because:

  order ms_drbd_r0-b4-vz_fs inf: ms_drbd_r0:promote group_vz:start
  group group_vz vz_fs vz_svc ve1011

Then, all cable re-connected. On node1, ms_drbd_r0 auto back to Master role.

However, vz_fs reached max fail count and pacemaker refuse to start it!
I need to use the cleanup command to fix it **manually**.

Is this normal behavior? Can I make vz_fs not reach max fail count?


Thanks in advance!
Michael

Attached simplified configure show below:

primitive drbd_r0 ocf:linbit:drbd \
        params drbd_resource="r0" \
        op start interval="0" timeout="240s" \
        op stop interval="0" timeout="100s" \
        op monitor interval="11s" role="Master" \
        op monitor interval="12s" role="Slave"

ms ms_drbd_r0 drbd_r0 \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true" globally-unique="false"
target-role="Started"

primitive gw ocf:pacemaker:ping \
        params pidfile="/var/run/ping.pid" host_list="alix" \
        op start interval="0" timeout="60s" \
        op stop interval="0" timeout="20s" \
        op monitor interval="10s" timeout="60s"

primitive vz_fs ocf:heartbeat:Filesystem \
        params device="/dev/drbd0" directory="/vz" fstype="ext3" \
        op start interval="0" timeout="60s" \
        op stop interval="0" timeout="60s" \
        op monitor interval="10s" timeout="40s"

group group_vz vz_fs vz_svc ve1011

clone clone_gw gw \
        meta clone-node-max="1" clone-max="2"

location ms_drbd_r0-master-on-node1 ms_drbd_r0 \
        rule $id="ms_drbd_r0-master-on-node1-rule" $role="Master" 50:
#uname eq node1

location ms_drbd_r0-no-conn ms_drbd_r0 \
        rule $id="ms_drbd_r0-no-conn-rule" $role="Master" -inf:
not_defined pingd or pingd number:lte 0

colocation coloc_vz inf: ms_drbd_r0:Master group_vz

order ms_drbd_r0-b4-vz_fs inf: ms_drbd_r0:promote group_vz:start

property $id="cib-bootstrap-options" \
        dc-version="1.0.8-042548a451fce8400660f6031f4da6f0223dd5dd" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore" \
        last-lrm-refresh="1278064262"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"




More information about the Pacemaker mailing list