[Pacemaker] Being fenced node is killed again and again even the connection is recovered!

Javen Wu wu.javen at gmail.com
Fri May 14 05:54:42 EDT 2010


Hi Folks,

I setup a three nodes cluster with SBD STONITH configured.
After I manually isolate one node by running "ifconfig eth1 down" on the
node. The node is fenced as expected.
But after reboot, even the network is recovered, the node is killed again
once I start openais&pacemaker.
I saw the state of the node become from OFFLINE to ONLINE from `crm_mon -n`
before being killed. And I saw SBD slot from reset->clear->reset.

I attached the syslog and corosync log.
And my CIB configuration is very simple.

Could you help me check what's the problem? In my mind, it's not expected
behaviour.

===%<====CIB information=====================

<cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1" have-quorum="1"
admin_epoch="0" epoch="349" num_updates="99" cib-last-written="Fri May 14
14:50:21 2010" dc-uuid="vm209">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.1.1-530add2a3721a0ecccb24660a97dbfdaa3e68f51"/>
        <nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="openais"/>
        <nvpair id="cib-bootstrap-options-expected-quorum-votes"
name="expected-quorum-votes" value="3"/>
      </cluster_property_set>
    </crm_config>
    <nodes>
      <node id="vm208" uname="vm208" type="normal"/>
      <node id="vm209" uname="vm209" type="normal"/>
      <node id="vm210" uname="vm210" type="normal"/>
    </nodes>
    <resources>
      <clone id="Fencing">
        <primitive class="stonith" id="sbd-fencing" type="external/sbd">
          <instance_attributes id="sbd-fencing-instance_attributes">
            <nvpair id="sbd-fencing-instance_attributes-sbd_device"
name="sbd_device" value="/dev/sdc"/>
          </instance_attributes>
          <operations>
            <op id="sbd-fencing-monitor-20s" interval="20s" name="monitor"/>
          </operations>
        </primitive>
      </clone>
    </resources>
    <constraints/>
    <rsc_defaults/>
    <op_defaults/>
  </configuration>
  <status>
    <node_state id="vm209" uname="vm209" ha="active" in_ccm="true"
crmd="online" join="member" expected="member"
crm-debug-origin="post_cache_update" shutdown="0">
      <transient_attributes id="vm209">
        <instance_attributes id="status-vm209">
          <nvpair id="status-vm209-probe_complete" name="probe_complete"
value="true"/>
        </instance_attributes>
      </transient_attributes>
      <lrm id="vm209">
        <lrm_resources>
          <lrm_resource id="sbd-fencing:0" type="external/sbd"
class="stonith">
            <lrm_rsc_op id="sbd-fencing:0_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="4:1:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
transition-magic="0:7;4:1:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
call-id="2" rc-code="7" op-status="0" interval="0" last-run="1273820137"
last-rc-change="1273820137" exec-time="60" queue-time="0"
op-digest="4c3fd39434577fbb6540606d808ed050"/>
            <lrm_rsc_op id="sbd-fencing:0_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="5:1:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
transition-magic="0:0;5:1:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
call-id="3" rc-code="0" op-status="0" interval="0" last-run="1273820137"
last-rc-change="1273820137" exec-time="10" queue-time="0"
op-digest="4c3fd39434577fbb6540606d808ed050"/>
            <lrm_rsc_op id="sbd-fencing:0_monitor_20000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="6:2:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
transition-magic="0:0;6:2:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
call-id="4" rc-code="0" op-status="0" interval="20000" last-run="1273822956"
last-rc-change="1273820137" exec-time="1170" queue-time="0"
op-digest="4029bbaef749649e82d602afb46dd872"/>
          </lrm_resource>
        </lrm_resources>
      </lrm>
    </node_state>
    <node_state id="vm208" uname="vm208" ha="dead" in_ccm="false"
crmd="offline" crm-debug-origin="send_stonith_update" join="down"
expected="down" shutdown="0"/>
    <node_state id="vm210" uname="vm210" ha="active" in_ccm="true"
crmd="online" crm-debug-origin="post_cache_update" join="member"
expected="member" shutdown="0">
      <transient_attributes id="vm210">
        <instance_attributes id="status-vm210">
          <nvpair id="status-vm210-probe_complete" name="probe_complete"
value="true"/>
        </instance_attributes>
      </transient_attributes>
      <lrm id="vm210">
        <lrm_resources>
          <lrm_resource id="sbd-fencing:2" type="external/sbd"
class="stonith">
            <lrm_rsc_op id="sbd-fencing:2_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="8:5:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
transition-magic="0:7;8:5:7:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
call-id="2" rc-code="7" op-status="0" interval="0" last-run="1273820388"
last-rc-change="1273820388" exec-time="20" queue-time="0"
op-digest="4c3fd39434577fbb6540606d808ed050"/>
            <lrm_rsc_op id="sbd-fencing:2_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="13:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
transition-magic="0:0;13:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
call-id="3" rc-code="0" op-status="0" interval="0" last-run="1273820388"
last-rc-change="1273820388" exec-time="10" queue-time="0"
op-digest="4c3fd39434577fbb6540606d808ed050"/>
            <lrm_rsc_op id="sbd-fencing:2_monitor_20000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="14:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
transition-magic="0:0;14:5:0:f0adcb5c-10d1-4525-b094-b5ab1f776ee0"
call-id="4" rc-code="0" op-status="0" interval="20000" last-run="1273822976"
last-rc-change="1273820389" exec-time="1040" queue-time="0"
op-digest="4029bbaef749649e82d602afb46dd872"/>
          </lrm_resource>
        </lrm_resources>
      </lrm>
    </node_state>
  </status>
</cib>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20100514/cdc42d18/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: corosync.log.gz
Type: application/x-gzip
Size: 6568 bytes
Desc: not available
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20100514/cdc42d18/attachment.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: messages.gz
Type: application/x-gzip
Size: 22956 bytes
Desc: not available
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20100514/cdc42d18/attachment-0001.bin>


More information about the Pacemaker mailing list