[Pacemaker] All resources bounce on failback

Tue Mar 15 02:01:02 UTC 2011

The config below is behaving well and doing what I want it to do, however
there is one situation where it is misbehaving .... after a failover (using
standby for testing purposes) when resources failback to their preferred
node, all of the resources in the main group (DEPOT or ESP_AUDIT) bounce on
the node which they do not move from. ie: if DEPOT group is running on
server 1 which is its preferred node and resource group ESP_AUDIT moves from
node 1 to node 2, the DEPOT group stops and then promptly starts again with
no movement in location.

Secondary question:
Is the colocation statement required given the order of start is specified
and the clone will create two resources (one for each node) by default ?
Essentially I just need to ensure the OCFS2 modules / DLM are loaded before
the only actual shared filesystem.

This is on SLES 11 SP1
openais-1.1.3-0.2.3
pacemaker-1.1.2-0.7.1
corosync-1.2.6-0.2.2

# Config below
node company-prod-database-001
node company-prod-database-002
primitive DERBYDB lsb:derby
primitive FS_DB_DEPOT ocf:heartbeat:Filesystem \
        params device="/dev/mapper/3600601600350220035325c4ae011_part1"
directory="/DB_DEPOT" fstype="ocfs2" options="acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60"
primitive FS_DB_ESP_AUDIT ocf:heartbeat:Filesystem \
        params device="/dev/mapper/36006016003502200a1575c4ae011_part1"
directory="/DB_ESP_AUDIT" fstype="ocfs2" options="acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60"
primitive FS_DB_SHARED ocf:heartbeat:Filesystem \
        params device="/dev/mapper/360060160035022007eab5c4ae011_part1"
directory="/DB_SHARED" fstype="ocfs2" options="acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60"
primitive FS_LOGS_DEPOT ocf:heartbeat:Filesystem \
        params device="/dev/mapper/36006016003502200c3735c4ae011_part1"
directory="/LOGS_DEPOT" fstype="ocfs2" options="data=writeback,noatime,acl"
\
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60"
primitive FS_LOGS_ESP_AUDIT ocf:heartbeat:Filesystem \
        params device="/dev/mapper/3600601600350220034585c4ae011_part1"
directory="/LOGS_ESP_AUDIT" fstype="ocfs2"
options="data=writeback,noatime,acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60"
primitive IP_DEPOT_115 ocf:heartbeat:IPaddr2 \
        params ip="192.168.115.93" cidr_netmask="24" \
        op monitor interval="30s"
primitive IP_DEPOT_72 ocf:heartbeat:IPaddr2 \
        params ip="192.168.132.93" cidr_netmask="24" \
        op monitor interval="30s"
primitive IP_ESP_AUDIT_115 ocf:heartbeat:IPaddr2 \
        params ip="192.168.115.92" cidr_netmask="24" \
        op monitor interval="30s"
primitive IP_ESP_AUDIT_72 ocf:heartbeat:IPaddr2 \
        params ip="192.168.132.92" cidr_netmask="24" \
        op monitor interval="30s"
primitive MAIL_ALERT ocf:heartbeat:MailTo \
        params email="helpdesk at company.com" \
        op monitor interval="60" timeout="10"
primitive PGSQL_AUDIT ocf:heartbeat:pgsql \
        params pgdata="/DB_ESP_AUDIT/audit/dbdata/data/" pgport="5432"
pgdba="audit" \
        op start interval="0" timeout="120" \
        op stop interval="0" timeout="120" \
        op monitor interval="60" timeout="30"
primitive PGSQL_DEPOT ocf:heartbeat:pgsql \
        params pgdata="/DB_DEPOT/depot/dbdata/data/" pgport="5433"
pgdba="depot" \
        op start interval="0" timeout="120" \
        op stop interval="0" timeout="120" \
        op monitor interval="60" timeout="30"
primitive PGSQL_ESP ocf:heartbeat:pgsql \
        params pgdata="/DB_ESP_AUDIT/esp/dbdata/data/" pgport="5434"
pgdba="esp" \
        op start interval="0" timeout="120" \
        op stop interval="0" timeout="120" \
        op monitor interval="60" timeout="30"
primitive STONITH-DB-001 stonith:external/ipmi \
        params hostname="company-prod-database-001" ipaddr="192.168.132.80"
userid="thatguy" passwd="password" interface="lan" \
        op monitor interval="60s" timeout="30s"
primitive STONITH-DB-002 stonith:external/ipmi \
        params hostname="company-prod-database-002" ipaddr="192.168.132.81"
userid="thatguy" passwd="password" interface="lan" \
        op monitor interval="60s" timeout="30s"
primitive dlm ocf:pacemaker:controld \
        op monitor interval="60" timeout="60"
primitive o2cb ocf:ocfs2:o2cb \
        op monitor interval="60" timeout="60"
group DEPOT FS_LOGS_DEPOT FS_DB_DEPOT IP_DEPOT_115 IP_DEPOT_132 DERBYDB
PGSQL_DEPOT
group ESP_AUDIT FS_LOGS_ESP_AUDIT FS_DB_ESP_AUDIT IP_ESP_AUDIT_115
IP_ESP_AUDIT_132 PGSQL_AUDIT PGSQL_ESP
group OCFS2_SHARED dlm o2cb FS_DB_SHARED
clone CL_MAIL_ALERT MAIL_ALERT
clone CL_OCFS2_SHARED OCFS2_SHARED \
        meta interleave="true"
location LOC_DEPOT DEPOT 25: company-prod-database-001
location LOC_ESP_AUDIT ESP_AUDIT 25: company-prod-database-002
location LOC_STONITH-001 STONITH-DB-001 -inf: company-prod-database-001
location LOC_STONITH-002 STONITH-DB-002 -inf: company-prod-database-002
colocation COL_DB_SHARED_DEPOT inf: DEPOT CL_OCFS2_SHARED
colocation COL_DB_SHARED_ESP_AUDIT inf: ESP_AUDIT CL_OCFS2_SHARED
order DB_SHARE_FIRST_DEPOT inf: CL_OCFS2_SHARED DEPOT
order DB_SHARE_FIRST_ESP_AUDIT inf: CL_OCFS2_SHARED ESP_AUDIT
property $id="cib-bootstrap-options" \
        dc-version="1.1.2-ecb1e2ea172ba2551f0bd763e557fccde68c849b" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        no-quorum-policy="ignore" \
        start-failure-is-fatal="false"
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20110315/471477aa/attachment-0003.html>