[Pacemaker] Avoid one node from being a target for resources migration

Mon Jan 12 17:42:10 CET 2015

Hello.

I have 3-node cluster managed by corosync+pacemaker+crm. Node1 and Node2
are DRBD master-slave, also they have a number of other services installed
(postgresql, nginx, ...). Node3 is just a corosync node (for quorum), no
DRBD/postgresql/... are installed at it, only corosync+pacemaker.

But when I add resources to the cluster, a part of them are somehow moved
to node3 and since then fail. Note than I have a "colocation" directive to
place these resources to the DRBD master only and "location" with -inf for
node3, but this does not help - why? How to make pacemaker not run anything
at node3?

All the resources are added in a single transaction: "cat config.txt | crm
-w -f- configure" where config.txt contains directives and "commit"
statement at the end.

Below are "crm status" (error messages) and "crm configure show" outputs.

*root at node3:~# crm status*
Current DC: node2 (1017525950) - partition with quorum
3 Nodes configured
6 Resources configured
Online: [ node1 node2 node3 ]
Master/Slave Set: ms_drbd [drbd]
     Masters: [ node1 ]
     Slaves: [ node2 ]
Resource Group: server
     fs (ocf::heartbeat:Filesystem): Started node1
     postgresql (lsb:postgresql): Started node3 FAILED
     bind9 (lsb:bind9): Started node3 FAILED
     nginx (lsb:nginx): Started node3 (unmanaged) FAILED
Failed actions:
    drbd_monitor_0 (node=node3, call=744, rc=5, status=complete,
last-rc-change=Mon Jan 12 11:16:43 2015, queued=2ms, exec=0ms): not
installed
    postgresql_monitor_0 (node=node3, call=753, rc=1, status=complete,
last-rc-change=Mon Jan 12 11:16:43 2015, queued=8ms, exec=0ms): unknown
error
    bind9_monitor_0 (node=node3, call=757, rc=1, status=complete,
last-rc-change=Mon Jan 12 11:16:43 2015, queued=11ms, exec=0ms): unknown
error
    nginx_stop_0 (node=node3, call=767, rc=5, status=complete,
last-rc-change=Mon Jan 12 11:16:44 2015, queued=1ms, exec=0ms): not
installed

*root at node3:~# crm configure show | cat*
node $id="1017525950" node2
node $id="13071578" node3
node $id="1760315215" node1
primitive drbd ocf:linbit:drbd \
params drbd_resource="vlv" \
op start interval="0" timeout="240" \
op stop interval="0" timeout="120"
primitive fs ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/var/lib/vlv.drbd/root"
options="noatime,nodiratime" fstype="xfs" \
op start interval="0" timeout="300" \
op stop interval="0" timeout="300"
primitive postgresql lsb:postgresql \
op monitor interval="10" timeout="60" \
op start interval="0" timeout="60" \
op stop interval="0" timeout="60"
primitive bind9 lsb:bind9 \
op monitor interval="10" timeout="60" \
op start interval="0" timeout="60" \
op stop interval="0" timeout="60"
primitive nginx lsb:nginx \
op monitor interval="10" timeout="60" \
op start interval="0" timeout="60" \
op stop interval="0" timeout="60"
group server fs postgresql bind9 nginx
ms ms_drbd drbd meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
location loc_server server rule $id="loc_server-rule" -inf: #uname eq node3
colocation col_server inf: server ms_drbd:Master
order ord_server inf: ms_drbd:promote server:start
property $id="cib-bootstrap-options" \
stonith-enabled="false" \
last-lrm-refresh="1421079189" \
maintenance-mode="false"
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://oss.clusterlabs.org/pipermail/pacemaker/attachments/20150112/8a13f039/attachment.html>