[Pacemaker] Mostly STONITH Questions / Seeking Best Practice

Tue Sep 4 00:31:54 EDT 2012

Afternoon all,

We have a 2 node failover cluster using IBM IMM for STONITH via the
external/ipmi plugin. We have recently moved from OCFS2 to ext3 for our
database filesystems due to a bug we discovered, there is only one disk we
need to have available to both nodes (shared scripts, logs etc) which
remains as OCFS2.
All SAN disks are available to both nodes all of the time, although
obviously only mounted via the appropriate resource group.

I have the following questions around best practice for this type of
configuration:
1) I'm planning on implementing sfex resources (a small LVM volume on the
same volume group as the data being protected) as an additional safety
feature along side the existing external/ipmi STONITH control ... is this
best practice in case the IBM IMM is unavailable or credentials change etc
and the STONITH is not carried out ?
2) Is there any risk to a healthy node if an unhealthy node with a shared
OCFS2 volume mounted goes down ? Quorum policy is set to ignore. Seems to
not give any issues but I want to clarify this is the designed behavior.
3) Does a node need its own STONITH resource to be able to self fence or is
this covered off by internal pacemaker functionality ? ie: We currently use
location constraints to ensure STONITH resources don't run on themselves as
per the documentation.
4) What is the best way to disable STONITH non disruptively for node
maintenance ? Is it a case of editing the CIB stonith-enabled directive to
false and stopping the STONITH resources then stopping openais ?
5) Is there an OCF compliant resource agent script for Derby / JavaDB that
anyone knows of ? We use an old init style script at the moment, I'm afraid
it will trip us up and STONITH a node on shutdown at some stage.

Are there any other considerations to be in a best practice position ? We
have a window of change coming up and I want to create the best environment
possible. Please feel free to critique the below configuration as it stands.

Many thanks, Dave.

node server-001
node server-002
primitive DERBYDB lsb:derby
primitive FS_DB_NEWS ocf:heartbeat:Filesystem \
        params device="/dev/vg_db_news/lv_db_news" directory="/DB_NEWS"
fstype="ext3" options="acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        meta migration-threshold="3" failure-timeout="180"
primitive FS_DB_FEEDS ocf:heartbeat:Filesystem \
        params device="/dev/vg_db_feeds/lv_db_feeds" directory="/DB_FEEDS"
fstype="ext3" options="acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        meta migration-threshold="3" failure-timeout="180"
primitive FS_DB_SHARED ocf:heartbeat:Filesystem \
        params device="/dev/mapper/07ea2ffab5c4ae011_part1"
directory="/DB_SHARED" fstype="ocfs2" options="acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        meta target-role="Started"
primitive FS_LOGS_NEWS ocf:heartbeat:Filesystem \
        params device="/dev/mapper/0c2ebc3735c4ae011_part1"
directory="/LOGS_NEWS" fstype="ext3" options="data=writeback,noatime,acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        meta migration-threshold="3" failure-timeout="180"
primitive FS_LOGS_FEEDS ocf:heartbeat:Filesystem \
        params device="/dev/mapper/0345899885c4ae011_part1"
directory="/LOGS_FEEDS" fstype="ext3" options="data=writeback,noatime,acl" \
        op monitor interval="40s" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60" \
        meta migration-threshold="3" failure-timeout="180"
primitive IP_NEWS_15 ocf:heartbeat:IPaddr2 \
        params ip="192.168.15.92" cidr_netmask="24" \
        op monitor interval="30s" \
        meta migration-threshold="3" failure-timeout="180"
primitive IP_NEWS_72 ocf:heartbeat:IPaddr2 \
        params ip="192.168.72.92" cidr_netmask="24" \
        op monitor interval="30s" \
        meta migration-threshold="3" failure-timeout="180"
primitive IP_FEEDS_15 ocf:heartbeat:IPaddr2 \
        params ip="192.168.15.93" cidr_netmask="24" \
        op monitor interval="30s" \
        meta migration-threshold="3" failure-timeout="180"
primitive IP_FEEDS_72 ocf:heartbeat:IPaddr2 \
        params ip="192.168.72.93" cidr_netmask="24" \
        op monitor interval="30s" \
        meta migration-threshold="3" failure-timeout="180"
primitive MAIL_ALERT ocf:heartbeat:MailTo \
        params email="theguy at thatcompany.com" \
        op monitor interval="60" timeout="10"
primitive PGSQL_FEEDS1 ocf:heartbeat:pgsql \
        params pgdata="/DB_FEEDS/feeds1/dbdata/data/" pgport="5432"
pgdba="feeds1" \
        op start interval="0" timeout="120" \
        op stop interval="0" timeout="120" \
        op monitor interval="60" timeout="30" \
        meta migration-threshold="3" failure-timeout="180"
primitive PGSQL_FEEDS2 ocf:heartbeat:pgsql \
        params pgdata="/DB_FEEDS/feeds2/dbdata/data/" pgport="5434"
pgdba="feeds2" \
        op start interval="0" timeout="120" \
        op stop interval="0" timeout="120" \
        op monitor interval="60" timeout="30" \
        meta migration-threshold="3" failure-timeout="180"
primitive PGSQL_NEWS ocf:heartbeat:pgsql \
        params pgdata="/DB_NEWS/news/dbdata/data/" pgport="5433"
pgdba="news" \
        op start interval="0" timeout="120" \
        op stop interval="0" timeout="120" \
        op monitor interval="60" timeout="30" \
        meta migration-threshold="3" failure-timeout="180"
primitive STONITH-DB-001 stonith:external/ipmi \
        params hostname="server-001" ipaddr="192.168.72.80" userid="user"
passwd="password" interface="lan" \
        op monitor interval="60s" timeout="30s" \
        meta target-role="Started"
primitive STONITH-DB-002 stonith:external/ipmi \
        params hostname="server-002" ipaddr="192.168.72.81" userid="user"
passwd="password" interface="lan" \
        op monitor interval="60s" timeout="30s" \
        meta target-role="Started"
primitive VG_DB_NEWS ocf:heartbeat:LVM \
        params volgrpname="vg_db_news" \
        op monitor interval="60" timeout="60"
primitive VG_DB_FEEDS ocf:heartbeat:LVM \
        params volgrpname="vg_db_feeds" \
        op monitor interval="60" timeout="60"
primitive clvm ocf:lvm2:clvmd \
        params daemon_timeout="30" \
        op start interval="0" timeout="90" \
        op stop interval="0" timeout="100"
primitive dlm ocf:pacemaker:controld \
        op monitor interval="60" timeout="60"
primitive o2cb ocf:ocfs2:o2cb \
        op monitor interval="60" timeout="60"
group NEWS VG_DB_NEWS FS_LOGS_NEWS FS_DB_NEWS IP_NEWS_15 IP_NEWS_72 DERBYDB
PGSQL_NEWS \
        meta target-role="Started"
group FEEDS VG_DB_FEEDS FS_LOGS_FEEDS FS_DB_FEEDS IP_FEEDS_15 IP_FEEDS_72
PGSQL_FEEDS1 PGSQL_FEEDS2 \
        meta target-role="Started"
group OCFS2_SHARED dlm o2cb clvm FS_DB_SHARED
clone CL_MAIL_ALERT MAIL_ALERT
clone CL_OCFS2_SHARED OCFS2_SHARED \
        meta interleave="true"
location LOC_NEWS NEWS 25: server-001
location LOC_FEEDS FEEDS 25: server-002
location LOC_STONITH-001 STONITH-DB-001 -inf: server-001
location LOC_STONITH-002 STONITH-DB-002 -inf: server-002
colocation COL_DB_SHARED_NEWS inf: NEWS CL_OCFS2_SHARED
colocation COL_DB_SHARED_FEEDS inf: FEEDS CL_OCFS2_SHARED
order DB_SHARE_FIRST_NEWS 0: CL_OCFS2_SHARED NEWS
order DB_SHARE_FIRST_FEEDS 0: CL_OCFS2_SHARED FEEDS
property $id="cib-bootstrap-options" \
        dc-version="1.1.5-5bd2b9154d7d9f86d7f56fe0a74072a5a6590c60" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        no-quorum-policy="ignore" \
        start-failure-is-fatal="false" \
        stonith-enabled="true" \
        last-lrm-refresh="1346358565"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20120904/614bb904/attachment-0002.html>