[Pacemaker] Failing back a multi-state resource eg. DRBD
Dominic Malolepszy
dmalolepszy at optusnet.com.au
Thu Mar 3 06:18:51 CET 2011
Hi,
I'm trying to simulate various scenarios and what to do to correct the
problem. I have a DRBD cluster as defined below; if the primary fails
(ie power cycled drbd01.test), the secondary (drbd02.test) takes over
successfully, so DRBD:master now runs on drbd02.test. When node
drbd01.test comes back up, DRBD:master remains on drbd02.st (ie due to
resource stickness); and drbd01.test simply becomes DRBD:Slave; this is
what I want.
Now what command/s would I need to run to move the master back to
drbd01.test, and make drbd02.test the new slave? The name of the
multi-state resource is ms-drbd0, below is the config I am currently
running.
node drbd01.test \
attributes standby="off"
node drbd02.test \
attributes standby="off"
primitive drbd0 ocf:linbit:drbd \
params drbd_resource="drbd0" \
op monitor interval="60s" \
op start interval="0" timeout="240s" \
op promote interval="0" timeout="90s" start-delay="3s" \
op demote interval="0" timeout="90s" start-delay="3s" \
op notify interval="0" timeout="90s" \
op stop interval="0" timeout="100s" \
op monitor interval="10s" role="Master" timeout="20s"
start-delay="5s" \
op monitor interval="20s" role="Slave" timeout="20s" start-delay="5s"
primitive fs0 ocf:heartbeat:Filesystem \
params directory="/var/lib/pgsql/9.0/data" device="/dev/drbd0"
fstype="ext3" \
op start interval="0" timeout="60s" start-delay="1s" \
op stop interval="0"
primitive ip ocf:heartbeat:IPaddr \
params ip="192.168.1.50" cidr_netmask="24" \
op monitor interval="10s"
primitive pgsql0 ocf:heartbeat:pgsql \
params pgctl="/usr/pgsql-9.0/bin/pg_ctl" \
params psql="/usr/pgsql-9.0/bin/psql" \
params pgdata="/var/lib/pgsql/9.0/data" \
op monitor interval="30s" timeout="30s" \
op start interval="0" timeout="120s" start_delay="1s" \
op stop interval="0" timeout="120s"
primitive ping_gateway ocf:pacemaker:ping \
params host_list="192.168.1.1" multiplier="1000" \
op monitor interval="10s" timeout="60s" \
op start interval="0" timeout="60s" \
op stop interval="0" timeout="20s"
ms ms-drbd0 drbd0 \
meta master-max="1" master-node-max="1" notify="true"
clone-node-max="1" clone-max="2"
clone connectivity_check ping_gateway \
meta globally-unique="false"
location master-connected-node ms-drbd0 \
rule $id="master-connected-node-rule" $role="master" -inf:
not_defined pingd or pingd lte 0
location primary_location ip 50: drbd01.test
colocation fs0-with-drbd0 inf: fs0 ms-drbd0:Master
colocation ip-with-pgsql0 inf: ip pgsql0
colocation pgsql0-with-fs0 inf: pgsql0 fs0
order fs0-after-drbd0 inf: ms-drbd0:promote fs0:start
order ip-after-pgsql0 inf: pgsql0 ip
order pgsql0-after-fs0 inf: fs0:start pgsql0
property $id="cib-bootstrap-options" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore" \
dc-version="1.0.10-da7075976b5ff0bee71074385f8fd02f296ec8a3"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
Cheers,
Dominic.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://oss.clusterlabs.org/pipermail/pacemaker/attachments/20110303/4ca59c39/attachment-0001.html>
More information about the Pacemaker
mailing list