[Pacemaker] Failing back a multi-state resource eg. DRBD

Dominic Malolepszy dmalolepszy at optusnet.com.au
Thu Mar 3 06:18:51 CET 2011


Hi,

I'm trying to simulate various scenarios and what to do to correct the 
problem. I have a DRBD cluster as defined below; if the primary fails 
(ie power cycled drbd01.test), the secondary (drbd02.test) takes over 
successfully, so DRBD:master now runs on drbd02.test. When node 
drbd01.test comes back up, DRBD:master remains on drbd02.st (ie due to 
resource stickness); and drbd01.test simply becomes DRBD:Slave; this is 
what I want.

Now what command/s would I need to run to move the master back to 
drbd01.test, and make drbd02.test the new slave? The name of the 
multi-state resource is ms-drbd0, below is the config I am currently 
running.


node drbd01.test \
     attributes standby="off"
node drbd02.test \
     attributes standby="off"
primitive drbd0 ocf:linbit:drbd \
     params drbd_resource="drbd0" \
     op monitor interval="60s" \
     op start interval="0" timeout="240s" \
     op promote interval="0" timeout="90s" start-delay="3s" \
     op demote interval="0" timeout="90s" start-delay="3s" \
     op notify interval="0" timeout="90s" \
     op stop interval="0" timeout="100s" \
     op monitor interval="10s" role="Master" timeout="20s" 
start-delay="5s" \
     op monitor interval="20s" role="Slave" timeout="20s" start-delay="5s"
primitive fs0 ocf:heartbeat:Filesystem \
     params directory="/var/lib/pgsql/9.0/data" device="/dev/drbd0" 
fstype="ext3" \
     op start interval="0" timeout="60s" start-delay="1s" \
     op stop interval="0"
primitive ip ocf:heartbeat:IPaddr \
     params ip="192.168.1.50" cidr_netmask="24" \
     op monitor interval="10s"
primitive pgsql0 ocf:heartbeat:pgsql \
     params pgctl="/usr/pgsql-9.0/bin/pg_ctl" \
     params psql="/usr/pgsql-9.0/bin/psql" \
     params pgdata="/var/lib/pgsql/9.0/data" \
     op monitor interval="30s" timeout="30s" \
     op start interval="0" timeout="120s" start_delay="1s" \
     op stop interval="0" timeout="120s"
primitive ping_gateway ocf:pacemaker:ping \
     params host_list="192.168.1.1" multiplier="1000" \
     op monitor interval="10s" timeout="60s" \
     op start interval="0" timeout="60s" \
     op stop interval="0" timeout="20s"
ms ms-drbd0 drbd0 \
     meta master-max="1" master-node-max="1" notify="true" 
clone-node-max="1" clone-max="2"
clone connectivity_check ping_gateway \
     meta globally-unique="false"
location master-connected-node ms-drbd0 \
     rule $id="master-connected-node-rule" $role="master" -inf: 
not_defined pingd or pingd lte 0
location primary_location ip 50: drbd01.test
colocation fs0-with-drbd0 inf: fs0 ms-drbd0:Master
colocation ip-with-pgsql0 inf: ip pgsql0
colocation pgsql0-with-fs0 inf: pgsql0 fs0
order fs0-after-drbd0 inf: ms-drbd0:promote fs0:start
order ip-after-pgsql0 inf: pgsql0 ip
order pgsql0-after-fs0 inf: fs0:start pgsql0
property $id="cib-bootstrap-options" \
     cluster-infrastructure="openais" \
     expected-quorum-votes="2" \
     stonith-enabled="false" \
     no-quorum-policy="ignore" \
     dc-version="1.0.10-da7075976b5ff0bee71074385f8fd02f296ec8a3"
rsc_defaults $id="rsc-options" \
     resource-stickiness="100"


Cheers,
Dominic.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://oss.clusterlabs.org/pipermail/pacemaker/attachments/20110303/4ca59c39/attachment-0001.html>


More information about the Pacemaker mailing list