[Pacemaker] Master is restarted when other node comes online
Andrei Borzenkov
arvidjaar at gmail.com
Sun Sep 28 07:56:55 UTC 2014
I have two node cluster with single master/slave resource (replicated
database) using pacemaker+openais on SLES11 SP3 (pacemaker
1.1.11-3ca8c3b). I hit weird situation that I did not see before, and
I cannot really understand it. Assuming master runs on node A and
slave runs on node B. If I stop cluster stack on B (rcopenais stop)
and start it again (rcopenais start) master is restarted. Of course
this means service interruption. Same happens if I reboot node B. I
have crm_report and can provide logs which are required but I wanted
first to quickly make sure this is not expected behavior.
I have not seen it before, but now when I recall what I tested, it was
always simulation of node failure. I did not really tried above
scenario.
Assuming this is correct behavior - what is the correct procedure to
shutdown single node then? It makes it impossible to do any
maintenance on slave node.
Configuration below:
node msksaphana1 \
attributes hana_hdb_vhost="msksaphana1" hana_hdb_site="SITE1"
hana_hdb_remoteHost="msksaphana2" hana_hdb_srmode="sync"
lpa_hdb_lpt="1411732740"
node msksaphana2 \
attributes hana_hdb_vhost="msksaphana2" hana_hdb_site="SITE2"
hana_hdb_srmode="sync" hana_hdb_remoteHost="msksaphana1"
lpa_hdb_lpt="30"
primitive rsc_SAPHanaTopology_HDB_HDB00 ocf:suse:SAPHanaTopology \
params SID="HDB" InstanceNumber="00" \
op monitor interval="10" timeout="600" \
op start interval="0" timeout="600" \
op stop interval="0" timeout="300"
primitive rsc_SAPHana_HDB_HDB00 ocf:suse:SAPHana \
params SID="HDB" InstanceNumber="00" PREFER_SITE_TAKEOVER="true"
AUTOMATED_REGISTER="true" DUPLICATE_PRIMARY_TIMEOUT="7200" \
op start timeout="3600" interval="0" \
op stop timeout="3600" interval="0" \
op promote timeout="3600" interval="0" \
op monitor timeout="700" role="Master" interval="60" \
op monitor timeout="700" role="Slave" interval="61"
primitive rsc_ip_HDB_HDB00 ocf:heartbeat:IPaddr2 \
params ip="10.72.10.64" \
op start timeout="20" interval="0" \
op stop timeout="20" interval="0" \
op monitor interval="10" timeout="20"
primitive stonith_IPMI_msksaphana1 stonith:external/ipmi \
params ipmitool="/usr/bin/ipmitool" hostname="msksaphana1"
passwd="P at ssw0rd" userid="hacluster" ipaddr="10.72.5.47" \
op stop timeout="15" interval="0" \
op monitor timeout="20" interval="3600" \
op start timeout="20" interval="0" \
meta target-role="Started"
primitive stonith_IPMI_msksaphana2 stonith:external/ipmi \
params ipmitool="/usr/bin/ipmitool" hostname="msksaphana2"
passwd="P at ssw0rd" userid="hacluster" ipaddr="10.72.5.48" \
op stop timeout="15" interval="0" \
op monitor timeout="20" interval="3600" \
op start timeout="20" interval="0" \
meta target-role="Started"
ms msl_SAPHana_HDB_HDB00 rsc_SAPHana_HDB_HDB00 \
meta clone-max="2" clone-node-max="1" target-role="Started"
clone cln_SAPHanaTopology_HDB_HDB00 rsc_SAPHanaTopology_HDB_HDB00 \
meta is-managed="true" clone-node-max="1" target-role="Started"
location stonoth_IPMI_msksaphana1_on_msksaphana2
stonith_IPMI_msksaphana1 -inf: msksaphana1
location stonoth_IPMI_msksaphana2_on_msksaphana1
stonith_IPMI_msksaphana2 -inf: msksaphana2
colocation col_saphana_ip_HDB_HDB00 2000: rsc_ip_HDB_HDB00:Started
msl_SAPHana_HDB_HDB00:Master
order ord_SAPHana_HDB_HDB00 2000: cln_SAPHanaTopology_HDB_HDB00
msl_SAPHana_HDB_HDB00
property $id="cib-bootstrap-options" \
stonith-enabled="true" \
placement-strategy="balanced" \
dc-version="1.1.11-3ca8c3b" \
cluster-infrastructure="classic openais (with plugin)" \
expected-quorum-votes="2" \
stonith-action="reboot" \
no-quorum-policy="ignore" \
last-lrm-refresh="1411730405"
rsc_defaults $id="rsc-options" \
resource-stickiness="1" \
migration-threshold="3"
op_defaults $id="op-options" \
timeout="600" \
record-pending="true"
Thank you!
-andrei
More information about the Pacemaker
mailing list