[Pacemaker] Issues with HA cluster for mysqld
David Parker
dparker at utica.edu
Wed Aug 22 18:49:32 UTC 2012
Hello,
I'm trying to set up a 2-node, active-passive HA cluster for MySQL using
heartbeat and Pacemaker. The operating system is Debian Linux 6.0.5
64-bit, and I am using the heartbeat packages installed via apt-get.
The servers involved are the SQL nodes of a running MySQL cluster, so
the only service I need HA for is the MySQL daemon (mysqld).
What I would like to do is have a single virtual IP address which
clients use to query MySQL, and have the IP and mysqld fail over to the
passive node in the event of a failure on the active node. I have read
through a lot of the heartbeat and Pacemaker documentation, and here are
the resources I have configured for the cluster:
* A custom LSB script for mysqld (compliant with Pacemaker's
requirements as outlined in the documentation)
* An iLO2-based STONITH device using riloe (both servers are HP Proliant
DL380 G5)
* A virtual IP address for mysqld using IPaddr2
I believe I have configured everything correctly, but I'm not positive.
Anyway, when I start heartbeat and pacemaker (/etc/init.d/heartbeat
start), everything seems to be ok. However, the virtual IP never comes
up, and the output of "crm_resource -LV" indicates that something is wrong:
root at ha1:~# crm_resource -LV
crm_resource[28988]: 2012/08/22_14:41:23 WARN: unpack_rsc_op: Processing
failed op stonith_start_0 on ha1: unknown error (1)
stonith (stonith:external/riloe) Started
MysqlIP (ocf::heartbeat:IPaddr2) Stopped
mysqld (lsb:mysqld) Started
When I attempt to stop heartbeat and Pacemaker (/etc/init.d/heartbeat
stop) it says "Stopping High-Availability services:" and then hangs for
about 5 minutes before finally stopping the services.
So, I'm left with a couple of questions. Is there something wrong with
my configuration? Is there a reason why the HA services can't shut down
in a timely manner? Is there something else I need to do to get the
virtual IP working? Thanks in advance for any help!
- Dave
P.S. My full config as reported by "cibadmin --query" is as follows
(iLO2 password removed):
<cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1"
have-quorum="1" admin_epoch="0" epoch="26" num_updates="8"
cib-last-written="Wed Aug 22 11:16:59 2012"
dc-uuid="1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.0.9-74392a28b7f31d7ddc86689598bd23114f58978b"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="Heartbeat"/>
<nvpair id="cib-bootstrap-options-stonith-enabled"
name="stonith-enabled" value="true"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1b48f410-44d1-4e89-8b52-ff23b32db1bc" uname="ha1" type="normal"/>
<node id="9790fe6e-67b2-4817-abf4-966b5aa6948c" uname="ha2" type="normal"/>
</nodes>
<resources>
<primitive class="stonith" id="stonith" type="external/riloe">
<instance_attributes id="stonith-instance_attributes">
<nvpair id="stonith-instance_attributes-hostlist" name="hostlist"
value="ha2"/>
<nvpair id="stonith-instance_attributes-ilo_hostname"
name="ilo_hostname" value="10.0.1.112"/>
<nvpair id="stonith-instance_attributes-ilo_user" name="ilo_user"
value="Administrator"/>
<nvpair id="stonith-instance_attributes-ilo_password"
name="ilo_password" value="XXXXXXXX"/>
<nvpair id="stonith-instance_attributes-ilo_can_reset"
name="ilo_can_reset" value="1"/>
<nvpair id="stonith-instance_attributes-ilo_protocol"
name="ilo_protocol" value="2"/>
<nvpair id="stonith-instance_attributes-ilo_powerdown_method"
name="ilo_powerdown_method" value="button"/>
</instance_attributes>
</primitive>
<primitive class="ocf" id="MysqlIP" provider="heartbeat" type="IPaddr2">
<instance_attributes id="MysqlIP-instance_attributes">
<nvpair id="MysqlIP-instance_attributes-ip" name="ip" value="192.168.25.9"/>
<nvpair id="MysqlIP-instance_attributes-cidr_netmask"
name="cidr_netmask" value="32"/>
</instance_attributes>
<operations>
<op id="MysqlIP-monitor-30s" interval="30s" name="monitor"/>
</operations>
</primitive>
<primitive id="mysqld" class="lsb" type="mysqld">
</primitive>
</resources>
<constraints/>
<rsc_defaults/>
<op_defaults/>
</configuration>
<status>
<node_state id="1b48f410-44d1-4e89-8b52-ff23b32db1bc" uname="ha1"
ha="active" in_ccm="true" crmd="online" join="member" expected="member"
crm-debug-origin="do_update_resource" shutdown="0">
<lrm id="1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<lrm_resources>
<lrm_resource id="stonith" type="external/riloe" class="stonith">
<lrm_rsc_op id="stonith_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="4:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44"
transition-magic="0:7;4:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44"
call-id="2" rc-code="7" op-status="0" interval="0" last-run="1345660607"
last-rc-change="1345660607" exec-time="0" queue-time="0"
op-digest="c9a588fa10b441aa64c0a83229e8f3e1"/>
<lrm_rsc_op id="stonith_start_0" operation="start"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="4:2:0:c09f049e-ed06-4d25-bc48-143a70b97e44"
transition-magic="0:1;4:2:0:c09f049e-ed06-4d25-bc48-143a70b97e44"
call-id="5" rc-code="1" op-status="0" interval="0" last-run="1345660607"
last-rc-change="1345660607" exec-time="21050" queue-time="0"
op-digest="c9a588fa10b441aa64c0a83229e8f3e1"/>
</lrm_resource>
<lrm_resource id="mysqld" type="mysqld" class="lsb">
<lrm_rsc_op id="mysqld_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="6:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44"
transition-magic="0:0;6:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44"
call-id="4" rc-code="0" op-status="0" interval="0" last-run="1345660606"
last-rc-change="1345660606" exec-time="10" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="MysqlIP" type="IPaddr2" class="ocf" provider="heartbeat">
<lrm_rsc_op id="MysqlIP_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="5:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44"
transition-magic="0:7;5:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44"
call-id="3" rc-code="7" op-status="0" interval="0" last-run="1345660606"
last-rc-change="1345660606" exec-time="20" queue-time="0"
op-digest="9611b7026c2dc135fbd13d3537b42d16"/>
</lrm_resource>
</lrm_resources>
</lrm>
<transient_attributes id="1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<instance_attributes id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<nvpair id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc-probe_complete"
name="probe_complete" value="true"/>
<nvpair
id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc-fail-count-stonith"
name="fail-count-stonith" value="INFINITY"/>
<nvpair
id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc-last-failure-stonith"
name="last-failure-stonith" value="1345660629"/>
</instance_attributes>
</transient_attributes>
</node_state>
</status>
</cib>
--
Dave Parker
Systems Administrator
Utica College
Integrated Information Technology Services
(315) 792-3229
Registered Linux User #408177
More information about the Pacemaker
mailing list