[Pacemaker] Having a really hard time with clvmd on RHEL 7 beta
Digimer
lists at alteeve.ca
Mon Jan 27 18:15:23 UTC 2014
Hi all,
I'm having one heck of a time trying to get clvmd working with
pacemaker 1.1.10 on RHEL 7 beta... I can configure DRBD dual-primary
just fine. I can also configure DLM to start on both nodes just fine as
well.
However, once I try to add clvmd using lsb::clvmd, the cluster fails
randomly.
Here is the good config:
====
<cib admin_epoch="0" cib-last-written="Mon Jan 27 12:59:17 2014"
crm_feature_set="3.0.7" epoch="87" have-quorum="1" num_updates="29"
update-client="cibadmin" update-origin="an-c03n01.alteeve.ca"
validate-with="pacemaker-1.2" dc-uuid="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.1.10-19.el7-368c726"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="corosync"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="ignore"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh"
name="last-lrm-refresh" value="1390843785"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1" uname="an-c03n01.alteeve.ca">
<instance_attributes id="nodes-1"/>
</node>
<node id="2" uname="an-c03n02.alteeve.ca">
<instance_attributes id="nodes-2"/>
</node>
</nodes>
<resources>
<primitive class="stonith" id="fence_n01_virsh" type="fence_virsh">
<instance_attributes id="fence_n01_virsh-instance_attributes">
<nvpair
id="fence_n01_virsh-instance_attributes-pcmk_host_list"
name="pcmk_host_list" value="an-c03n01.alteeve.ca"/>
<nvpair id="fence_n01_virsh-instance_attributes-ipaddr"
name="ipaddr" value="192.168.122.1"/>
<nvpair id="fence_n01_virsh-instance_attributes-action"
name="action" value="reboot"/>
<nvpair id="fence_n01_virsh-instance_attributes-login"
name="login" value="root"/>
<nvpair
id="fence_n01_virsh-instance_attributes-passwd_script"
name="passwd_script" value="/root/lemass.pw"/>
<nvpair id="fence_n01_virsh-instance_attributes-port"
name="port" value="an-c03n01"/>
<nvpair id="fence_n01_virsh-instance_attributes-delay"
name="delay" value="15"/>
</instance_attributes>
<operations>
<op id="fence_n01_virsh-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<primitive class="stonith" id="fence_n02_virsh" type="fence_virsh">
<instance_attributes id="fence_n02_virsh-instance_attributes">
<nvpair
id="fence_n02_virsh-instance_attributes-pcmk_host_list"
name="pcmk_host_list" value="an-c03n02.alteeve.ca"/>
<nvpair id="fence_n02_virsh-instance_attributes-ipaddr"
name="ipaddr" value="192.168.122.1"/>
<nvpair id="fence_n02_virsh-instance_attributes-action"
name="action" value="reboot"/>
<nvpair id="fence_n02_virsh-instance_attributes-login"
name="login" value="root"/>
<nvpair
id="fence_n02_virsh-instance_attributes-passwd_script"
name="passwd_script" value="/root/lemass.pw"/>
<nvpair id="fence_n02_virsh-instance_attributes-port"
name="port" value="an-c03n02"/>
</instance_attributes>
<operations>
<op id="fence_n02_virsh-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<master id="drbd_r0_Clone">
<primitive class="ocf" id="drbd_r0" provider="linbit" type="drbd">
<instance_attributes id="drbd_r0-instance_attributes">
<nvpair id="drbd_r0-instance_attributes-drbd_resource"
name="drbd_resource" value="r0"/>
</instance_attributes>
<operations>
<op id="drbd_r0-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<meta_attributes id="drbd_r0_Clone-meta_attributes">
<nvpair id="drbd_r0_Clone-meta_attributes-master-max"
name="master-max" value="2"/>
<nvpair id="drbd_r0_Clone-meta_attributes-master-node-max"
name="master-node-max" value="1"/>
<nvpair id="drbd_r0_Clone-meta_attributes-clone-max"
name="clone-max" value="2"/>
<nvpair id="drbd_r0_Clone-meta_attributes-clone-node-max"
name="clone-node-max" value="1"/>
<nvpair id="drbd_r0_Clone-meta_attributes-notify"
name="notify" value="true"/>
</meta_attributes>
</master>
<clone id="dlm-clone">
<primitive class="ocf" id="dlm" provider="pacemaker"
type="controld">
<instance_attributes id="dlm-instance_attributes"/>
<operations>
<op id="dlm-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<meta_attributes id="dlm-clone-meta">
<nvpair id="dlm-clone-max" name="clone-max" value="2"/>
<nvpair id="dlm-clone-node-max" name="clone-node-max" value="1"/>
</meta_attributes>
<meta_attributes id="dlm-clone-meta_attributes"/>
</clone>
</resources>
<constraints/>
</configuration>
<status>
<node_state id="1" uname="an-c03n01.alteeve.ca" in_ccm="true"
crmd="online" crm-debug-origin="do_update_resource" join="member"
expected="member">
<lrm id="1">
<lrm_resources>
<lrm_resource id="fence_n01_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n01_virsh_last_0"
operation_key="fence_n01_virsh_start_0" operation="start"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="13:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;13:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="20" rc-code="0" op-status="0" interval="0"
last-run="1390845789" last-rc-change="1390845789" exec-time="1872"
queue-time="0" op-digest="15a02edf953b80ee2ea10742bc05d033"/>
<lrm_rsc_op id="fence_n01_virsh_monitor_60000"
operation_key="fence_n01_virsh_monitor_60000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="7:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;7:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="24" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="849" queue-time="1"
op-digest="228a38704a0c537c1dc62cdef20db0d2"/>
</lrm_resource>
<lrm_resource id="fence_n02_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n02_virsh_last_0"
operation_key="fence_n02_virsh_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="5:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:7;5:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="9" rc-code="7" op-status="0" interval="0" last-run="1390845788"
last-rc-change="1390845788" exec-time="0" queue-time="0"
op-digest="f0f20524f9486801befcdf71b079b75a"/>
</lrm_resource>
<lrm_resource id="dlm" type="controld" class="ocf"
provider="pacemaker">
<lrm_rsc_op id="dlm_last_0" operation_key="dlm_start_0"
operation="start" crm-debug-origin="do_update_resource"
crm_feature_set="3.0.7"
transition-key="45:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;45:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="22" rc-code="0" op-status="0" interval="0"
last-run="1390845790" last-rc-change="1390845790" exec-time="1023"
queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="dlm_monitor_60000"
operation_key="dlm_monitor_60000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="43:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;43:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="25" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="12" queue-time="0"
op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
</lrm_resource>
<lrm_resource id="drbd_r0" type="drbd" class="ocf"
provider="linbit">
<lrm_rsc_op id="drbd_r0_last_0"
operation_key="drbd_r0_promote_0" operation="promote"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="13:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;13:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="27" rc-code="0" op-status="0" interval="0"
last-run="1390845791" last-rc-change="1390845791" exec-time="1045"
queue-time="0" op-digest="c0e018b73fdf522b6cdd355e125af15e"/>
</lrm_resource>
</lrm_resources>
</lrm>
<transient_attributes id="1">
<instance_attributes id="status-1">
<nvpair id="status-1-probe_complete" name="probe_complete"
value="true"/>
<nvpair id="status-1-master-drbd_r0" name="master-drbd_r0"
value="10000"/>
</instance_attributes>
</transient_attributes>
</node_state>
<node_state id="2" uname="an-c03n02.alteeve.ca" in_ccm="true"
crmd="online" crm-debug-origin="do_update_resource" join="member"
expected="member">
<lrm id="2">
<lrm_resources>
<lrm_resource id="fence_n01_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n01_virsh_last_0"
operation_key="fence_n01_virsh_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="9:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:7;9:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="5" rc-code="7" op-status="0" interval="0" last-run="1390845788"
last-rc-change="1390845788" exec-time="1" queue-time="0"
op-digest="15a02edf953b80ee2ea10742bc05d033"/>
</lrm_resource>
<lrm_resource id="fence_n02_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n02_virsh_last_0"
operation_key="fence_n02_virsh_start_0" operation="start"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="15:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;15:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="20" rc-code="0" op-status="0" interval="0"
last-run="1390845789" last-rc-change="1390845789" exec-time="1879"
queue-time="0" op-digest="f0f20524f9486801befcdf71b079b75a"/>
<lrm_rsc_op id="fence_n02_virsh_monitor_60000"
operation_key="fence_n02_virsh_monitor_60000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="10:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;10:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="24" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="859" queue-time="0"
op-digest="7fbb265a061a1e74066b22c34d8b2477"/>
</lrm_resource>
<lrm_resource id="dlm" type="controld" class="ocf"
provider="pacemaker">
<lrm_rsc_op id="dlm_last_0" operation_key="dlm_start_0"
operation="start" crm-debug-origin="do_update_resource"
crm_feature_set="3.0.7"
transition-key="47:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;47:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="22" rc-code="0" op-status="0" interval="0"
last-run="1390845790" last-rc-change="1390845790" exec-time="1024"
queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="dlm_monitor_60000"
operation_key="dlm_monitor_60000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="46:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;46:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="25" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="25" queue-time="0"
op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
</lrm_resource>
<lrm_resource id="drbd_r0" type="drbd" class="ocf"
provider="linbit">
<lrm_rsc_op id="drbd_r0_last_0"
operation_key="drbd_r0_promote_0" operation="promote"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="16:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;16:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="27" rc-code="0" op-status="0" interval="0"
last-run="1390845791" last-rc-change="1390845791" exec-time="27"
queue-time="0" op-digest="c0e018b73fdf522b6cdd355e125af15e"/>
</lrm_resource>
</lrm_resources>
</lrm>
<transient_attributes id="2">
<instance_attributes id="status-2">
<nvpair id="status-2-probe_complete" name="probe_complete"
value="true"/>
<nvpair id="status-2-master-drbd_r0" name="master-drbd_r0"
value="10000"/>
</instance_attributes>
</transient_attributes>
</node_state>
</status>
</cib>
====
pcs syntax:
====
Cluster Name: an-cluster-03
Corosync Nodes:
an-c03n01.alteeve.ca an-c03n02.alteeve.ca
Pacemaker Nodes:
an-c03n01.alteeve.ca an-c03n02.alteeve.ca
Resources:
Master: drbd_r0_Clone
Meta Attrs: master-max=2 master-node-max=1 clone-max=2
clone-node-max=1 notify=true
Resource: drbd_r0 (class=ocf provider=linbit type=drbd)
Attributes: drbd_resource=r0
Operations: monitor interval=60s (drbd_r0-monitor-interval-60s)
Clone: dlm-clone
Meta Attrs: clone-max=2 clone-node-max=1
Resource: dlm (class=ocf provider=pacemaker type=controld)
Operations: monitor interval=60s (dlm-monitor-interval-60s)
Stonith Devices:
Resource: fence_n01_virsh (class=stonith type=fence_virsh)
Attributes: pcmk_host_list=an-c03n01.alteeve.ca ipaddr=192.168.122.1
action=reboot login=root passwd_script=/root/lemass.pw port=an-c03n01
delay=15
Operations: monitor interval=60s (fence_n01_virsh-monitor-interval-60s)
Resource: fence_n02_virsh (class=stonith type=fence_virsh)
Attributes: pcmk_host_list=an-c03n02.alteeve.ca ipaddr=192.168.122.1
action=reboot login=root passwd_script=/root/lemass.pw port=an-c03n02
Operations: monitor interval=60s (fence_n02_virsh-monitor-interval-60s)
Fencing Levels:
Location Constraints:
Ordering Constraints:
Colocation Constraints:
Cluster Properties:
cluster-infrastructure: corosync
dc-version: 1.1.10-19.el7-368c726
last-lrm-refresh: 1390843785
no-quorum-policy: ignore
====
I try to configure clvmd this way:
====
pcs cluster cib clvmd_cfg
pcs -f clvmd_cfg resource create clvmd lsb:clvmd params
daemon_timeout=30s op monitor interval=60s
pcs -f clvmd_cfg resource clone clvmd clone-max=2 clone-node-max=1
pcs cluster cib-push clvmd_cfg
====
Here is what happens when I run that:
====
[root at an-c03n01 ~]# pcs cluster cib-push clvmd_cfg
CIB updated
[root at an-c03n01 ~]# pcs status
Cluster name: an-cluster-03
Last updated: Mon Jan 27 13:08:07 2014
Last change: Mon Jan 27 13:07:27 2014 via cibadmin on an-c03n01.alteeve.ca
Stack: corosync
Current DC: an-c03n01.alteeve.ca (1) - partition with quorum
Version: 1.1.10-19.el7-368c726
2 Nodes configured
8 Resources configured
Online: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Full list of resources:
fence_n01_virsh (stonith:fence_virsh): Started an-c03n01.alteeve.ca
fence_n02_virsh (stonith:fence_virsh): Started an-c03n02.alteeve.ca
Master/Slave Set: drbd_r0_Clone [drbd_r0]
Masters: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Clone Set: dlm-clone [dlm]
Started: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Clone Set: clvmd-clone [clvmd]
Started: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
PCSD Status:
an-c03n01.alteeve.ca:
an-c03n01.alteeve.ca: Online
an-c03n02.alteeve.ca:
an-c03n02.alteeve.ca: Online
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: active/enabled
====
an-c03n01 logs:
====
Jan 27 13:07:27 an-c03n01 cibadmin[4779]: notice: crm_log_args: Invoked:
/usr/sbin/cibadmin -o resources -C -X <primitive class="lsb" id="clvmd"
type="clvmd"><instance_attributes id="clvmd-instance_attributes"><nvpair
id="clvmd-instance_attributes-daemon_timeout" name="daemon_timeout"
value="30s"/></instance_attributes><operations><op
id="clvmd-monitor-interval-60s" interval="60s"
name="monitor"/></operations></primitive>
Jan 27 13:07:27 an-c03n01 cibadmin[4784]: notice: crm_log_args: Invoked:
/usr/sbin/cibadmin -o resources -R -X <resources>
<primitive class="stonith" id="fence_n01_virsh" type="fence_virsh">
<instance_attributes id="fence_n01_virsh-instance_attributes">
<nvpair
id="fence_n01_virsh-instance_attributes-pcmk_host_list"
name="pcmk_host_list" value="an-c03n01.alteeve.ca"/>
<nvpair id="fence_n01_virsh-instance_attributes-ipaddr"
name="ipaddr" value="192.168.122.1"/>
<nvpair id="fence_n01_virsh-instance_attri
Jan 27 13:07:27 an-c03n01 cibadmin[4786]: notice: crm_log_args: Invoked:
/usr/sbin/cibadmin --replace -o constraints -X <constraints/>
Jan 27 13:07:27 an-c03n01 cibadmin[4788]: notice: crm_log_args: Invoked:
/usr/sbin/cibadmin --replace --xml-file clvmd_cfg
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: Diff: --- 0.87.29
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: Diff: +++ 0.90.1
0e4c7ba20e20c7db5f715cae1d1b464e
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: -- <cib
epoch="87" num_updates="29" admin_epoch="0"/>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++ <clone
id="clvmd-clone">
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<primitive class="lsb" id="clvmd" type="clvmd">
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<instance_attributes id="clvmd-instance_attributes">
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<nvpair id="clvmd-instance_attributes-daemon_timeout"
name="daemon_timeout" value="30s"/>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
</instance_attributes>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<operations>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<op id="clvmd-monitor-interval-60s" interval="60s" name="monitor"/>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
</operations>
Jan 27 13:07:27 an-c03n01 stonith-ng[4004]: notice: unpack_config: On
loss of CCM Quorum: Ignore
Jan 27 13:07:27 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC
cause=C_FSA_INTERNAL origin=abort_transition_graph ]
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
</primitive>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<meta_attributes id="clvmd-clone-meta">
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<nvpair id="clvmd-clone-max" name="clone-max" value="2"/>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
<nvpair id="clvmd-clone-node-max" name="clone-node-max" value="1"/>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++
</meta_attributes>
Jan 27 13:07:27 an-c03n01 cib[4003]: notice: cib:diff: ++ </clone>
Jan 27 13:07:27 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_ELECTION -> S_INTEGRATION [ input=I_ELECTION_DC
cause=C_FSA_INTERNAL origin=do_election_check ]
Jan 27 13:07:28 an-c03n01 stonith-ng[4004]: notice:
stonith_device_register: Device 'fence_n01_virsh' already existed in
device list (2 active devices)
Jan 27 13:07:28 an-c03n01 stonith-ng[4004]: notice:
stonith_device_register: Added 'fence_n02_virsh' to the device list (2
active devices)
Jan 27 13:07:29 an-c03n01 attrd[4006]: notice: attrd_local_callback:
Sending full refresh (origin=crmd)
Jan 27 13:07:29 an-c03n01 attrd[4006]: notice: attrd_trigger_update:
Sending flush op to all hosts for: master-drbd_r0 (10000)
Jan 27 13:07:29 an-c03n01 attrd[4006]: notice: attrd_trigger_update:
Sending flush op to all hosts for: probe_complete (true)
Jan 27 13:07:30 an-c03n01 pengine[4007]: notice: unpack_config: On loss
of CCM Quorum: Ignore
Jan 27 13:07:30 an-c03n01 pengine[4007]: notice: LogActions: Start
clvmd:0 (an-c03n01.alteeve.ca)
Jan 27 13:07:30 an-c03n01 pengine[4007]: notice: LogActions: Start
clvmd:1 (an-c03n02.alteeve.ca)
Jan 27 13:07:30 an-c03n01 pengine[4007]: notice: process_pe_message:
Calculated Transition 3: /var/lib/pacemaker/pengine/pe-input-64.bz2
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 8: monitor clvmd:0_monitor_0 on an-c03n01.alteeve.ca (local)
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 10: monitor clvmd:1_monitor_0 on an-c03n02.alteeve.ca
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation clvmd_monitor_0 (call=33, rc=7, cib-update=61, confirmed=true)
not running
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: process_lrm_event:
an-c03n01.alteeve.ca-clvmd_monitor_0:33 [ clvmd.service - LSB: This
service is Clusterd LVM Daemon.\n Loaded: loaded
(/etc/rc.d/init.d/clvmd)\n Active: inactive (dead)\n\n ]
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 7: probe_complete probe_complete on an-c03n01.alteeve.ca (local)
- no waiting
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 9: probe_complete probe_complete on an-c03n02.alteeve.ca - no waiting
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 55: start clvmd:0_start_0 on an-c03n01.alteeve.ca (local)
Jan 27 13:07:30 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 57: start clvmd:1_start_0 on an-c03n02.alteeve.ca
Jan 27 13:07:30 an-c03n01 systemd: Starting LSB: This service is
Clusterd LVM Daemon....
Jan 27 13:07:30 an-c03n01 kernel: [ 866.863794] dlm: Using TCP for
communications
Jan 27 13:07:30 an-c03n01 kernel: [ 866.871416] dlm: connecting to 2
Jan 27 13:07:31 an-c03n01 clvmd: Cluster LVM daemon started - connected
to Corosync
Jan 27 13:07:31 an-c03n01 kernel: [ 867.879072] device-mapper: uevent:
version 1.0.3
Jan 27 13:07:31 an-c03n01 kernel: [ 867.879133] device-mapper: ioctl:
4.26.0-ioctl (2013-08-15) initialised: dm-devel at redhat.com
Jan 27 13:07:31 an-c03n01 clvmd: Starting clvmd:
Jan 27 13:07:31 an-c03n01 kernel: [ 867.927893] bio: create slab
<bio-1> at 1
Jan 27 13:07:31 an-c03n01 clvmd: Activating VG(s): 1 logical volume(s)
in volume group "an-c03n01_vg0" now active
Jan 27 13:07:31 an-c03n01 clvmd: [ OK ]
Jan 27 13:07:31 an-c03n01 systemd: Started LSB: This service is Clusterd
LVM Daemon..
Jan 27 13:07:31 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation clvmd_start_0 (call=34, rc=0, cib-update=62, confirmed=true) ok
Jan 27 13:07:31 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 56: monitor clvmd:0_monitor_60000 on an-c03n01.alteeve.ca (local)
Jan 27 13:07:31 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 58: monitor clvmd:1_monitor_60000 on an-c03n02.alteeve.ca
Jan 27 13:07:31 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation clvmd_monitor_60000 (call=35, rc=0, cib-update=63,
confirmed=false) ok
Jan 27 13:07:31 an-c03n01 crmd[4008]: notice: run_graph: Transition 3
(Complete=11, Pending=0, Fired=0, Skipped=0, Incomplete=0,
Source=/var/lib/pacemaker/pengine/pe-input-64.bz2): Complete
Jan 27 13:07:31 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS
cause=C_FSA_INTERNAL origin=notify_crmd ]
====
an-c03n02 logs:
====
Jan 27 13:07:27 an-c03n02 crmd[5467]: notice: do_state_transition: State
transition S_NOT_DC -> S_PENDING [ input=I_PENDING cause=C_FSA_INTERNAL
origin=do_election_count_vote ]
Jan 27 13:07:27 an-c03n02 stonith-ng[5463]: notice: unpack_config: On
loss of CCM Quorum: Ignore
Jan 27 13:07:28 an-c03n02 stonith-ng[5463]: notice:
stonith_device_register: Added 'fence_n01_virsh' to the device list (2
active devices)
Jan 27 13:07:28 an-c03n02 stonith-ng[5463]: notice:
stonith_device_register: Device 'fence_n02_virsh' already existed in
device list (2 active devices)
Jan 27 13:07:29 an-c03n02 crmd[5467]: notice: do_state_transition: State
transition S_PENDING -> S_NOT_DC [ input=I_NOT_DC cause=C_HA_MESSAGE
origin=do_cl_join_finalize_respond ]
Jan 27 13:07:29 an-c03n02 attrd[5465]: notice: attrd_local_callback:
Sending full refresh (origin=crmd)
Jan 27 13:07:29 an-c03n02 attrd[5465]: notice: attrd_trigger_update:
Sending flush op to all hosts for: master-drbd_r0 (10000)
Jan 27 13:07:29 an-c03n02 attrd[5465]: notice: attrd_trigger_update:
Sending flush op to all hosts for: probe_complete (true)
Jan 27 13:07:30 an-c03n02 crmd[5467]: notice: process_lrm_event: LRM
operation clvmd_monitor_0 (call=33, rc=7, cib-update=20, confirmed=true)
not running
Jan 27 13:07:30 an-c03n02 crmd[5467]: notice: process_lrm_event:
an-c03n02.alteeve.ca-clvmd_monitor_0:33 [ clvmd.service - LSB: This
service is Clusterd LVM Daemon.\n Loaded: loaded
(/etc/rc.d/init.d/clvmd)\n Active: inactive (dead)\n\n ]
Jan 27 13:07:30 an-c03n02 systemd: Starting LSB: This service is
Clusterd LVM Daemon....
Jan 27 13:07:30 an-c03n02 kernel: [ 863.948873] dlm: Using TCP for
communications
Jan 27 13:07:30 an-c03n02 kernel: [ 863.952592] dlm: got connection from 1
Jan 27 13:07:31 an-c03n02 clvmd: Cluster LVM daemon started - connected
to Corosync
Jan 27 13:07:31 an-c03n02 kernel: [ 864.965742] device-mapper: uevent:
version 1.0.3
Jan 27 13:07:31 an-c03n02 kernel: [ 864.965813] device-mapper: ioctl:
4.26.0-ioctl (2013-08-15) initialised: dm-devel at redhat.com
Jan 27 13:07:31 an-c03n02 clvmd: Starting clvmd:
Jan 27 13:07:31 an-c03n02 kernel: [ 865.042111] bio: create slab
<bio-1> at 1
Jan 27 13:07:31 an-c03n02 clvmd: Activating VG(s): 1 logical volume(s)
in volume group "an-c03n01_vg0" now active
Jan 27 13:07:31 an-c03n02 clvmd: [ OK ]
Jan 27 13:07:31 an-c03n02 systemd: Started LSB: This service is Clusterd
LVM Daemon..
Jan 27 13:07:31 an-c03n02 crmd[5467]: notice: process_lrm_event: LRM
operation clvmd_start_0 (call=34, rc=0, cib-update=21, confirmed=true) ok
Jan 27 13:07:31 an-c03n02 crmd[5467]: notice: process_lrm_event: LRM
operation clvmd_monitor_60000 (call=35, rc=0, cib-update=22,
confirmed=false) ok
====
Thus far, it looks good:
====
[root at an-c03n01 ~]# pvscan; vgscan; lvscan
PV /dev/drbd0 VG an-c03n01_vg0 lvm2 [20.00 GiB / 10.00 GiB free]
Total: 1 [20.00 GiB] / in use: 1 [20.00 GiB] / in no VG: 0 [0 ]
Reading all physical volumes. This may take a while...
Found volume group "an-c03n01_vg0" using metadata type lvm2
ACTIVE '/dev/an-c03n01_vg0/shared' [10.00 GiB] inherit
[root at an-c03n02 ~]# pvscan; vgscan; lvscan
PV /dev/drbd0 VG an-c03n01_vg0 lvm2 [20.00 GiB / 10.00 GiB free]
Total: 1 [20.00 GiB] / in use: 1 [20.00 GiB] / in no VG: 0 [0 ]
Reading all physical volumes. This may take a while...
Found volume group "an-c03n01_vg0" using metadata type lvm2
ACTIVE '/dev/an-c03n01_vg0/shared' [10.00 GiB] inherit
====
Here is the new cib:
====
<cib admin_epoch="0" cib-last-written="Mon Jan 27 13:07:27 2014"
crm_feature_set="3.0.7" epoch="90" have-quorum="1" num_updates="12"
update-client="cibadmin" update-origin="an-c03n01.alteeve.ca"
validate-with="pacemaker-1.2" dc-uuid="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.1.10-19.el7-368c726"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="corosync"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="ignore"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh"
name="last-lrm-refresh" value="1390843785"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1" uname="an-c03n01.alteeve.ca">
<instance_attributes id="nodes-1"/>
</node>
<node id="2" uname="an-c03n02.alteeve.ca">
<instance_attributes id="nodes-2"/>
</node>
</nodes>
<resources>
<primitive class="stonith" id="fence_n01_virsh" type="fence_virsh">
<instance_attributes id="fence_n01_virsh-instance_attributes">
<nvpair
id="fence_n01_virsh-instance_attributes-pcmk_host_list"
name="pcmk_host_list" value="an-c03n01.alteeve.ca"/>
<nvpair id="fence_n01_virsh-instance_attributes-ipaddr"
name="ipaddr" value="192.168.122.1"/>
<nvpair id="fence_n01_virsh-instance_attributes-action"
name="action" value="reboot"/>
<nvpair id="fence_n01_virsh-instance_attributes-login"
name="login" value="root"/>
<nvpair
id="fence_n01_virsh-instance_attributes-passwd_script"
name="passwd_script" value="/root/lemass.pw"/>
<nvpair id="fence_n01_virsh-instance_attributes-port"
name="port" value="an-c03n01"/>
<nvpair id="fence_n01_virsh-instance_attributes-delay"
name="delay" value="15"/>
</instance_attributes>
<operations>
<op id="fence_n01_virsh-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<primitive class="stonith" id="fence_n02_virsh" type="fence_virsh">
<instance_attributes id="fence_n02_virsh-instance_attributes">
<nvpair
id="fence_n02_virsh-instance_attributes-pcmk_host_list"
name="pcmk_host_list" value="an-c03n02.alteeve.ca"/>
<nvpair id="fence_n02_virsh-instance_attributes-ipaddr"
name="ipaddr" value="192.168.122.1"/>
<nvpair id="fence_n02_virsh-instance_attributes-action"
name="action" value="reboot"/>
<nvpair id="fence_n02_virsh-instance_attributes-login"
name="login" value="root"/>
<nvpair
id="fence_n02_virsh-instance_attributes-passwd_script"
name="passwd_script" value="/root/lemass.pw"/>
<nvpair id="fence_n02_virsh-instance_attributes-port"
name="port" value="an-c03n02"/>
</instance_attributes>
<operations>
<op id="fence_n02_virsh-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<master id="drbd_r0_Clone">
<primitive class="ocf" id="drbd_r0" provider="linbit" type="drbd">
<instance_attributes id="drbd_r0-instance_attributes">
<nvpair id="drbd_r0-instance_attributes-drbd_resource"
name="drbd_resource" value="r0"/>
</instance_attributes>
<operations>
<op id="drbd_r0-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<meta_attributes id="drbd_r0_Clone-meta_attributes">
<nvpair id="drbd_r0_Clone-meta_attributes-master-max"
name="master-max" value="2"/>
<nvpair id="drbd_r0_Clone-meta_attributes-master-node-max"
name="master-node-max" value="1"/>
<nvpair id="drbd_r0_Clone-meta_attributes-clone-max"
name="clone-max" value="2"/>
<nvpair id="drbd_r0_Clone-meta_attributes-clone-node-max"
name="clone-node-max" value="1"/>
<nvpair id="drbd_r0_Clone-meta_attributes-notify"
name="notify" value="true"/>
</meta_attributes>
</master>
<clone id="dlm-clone">
<primitive class="ocf" id="dlm" provider="pacemaker"
type="controld">
<instance_attributes id="dlm-instance_attributes"/>
<operations>
<op id="dlm-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<meta_attributes id="dlm-clone-meta">
<nvpair id="dlm-clone-max" name="clone-max" value="2"/>
<nvpair id="dlm-clone-node-max" name="clone-node-max" value="1"/>
</meta_attributes>
<meta_attributes id="dlm-clone-meta_attributes"/>
</clone>
<clone id="clvmd-clone">
<primitive class="lsb" id="clvmd" type="clvmd">
<instance_attributes id="clvmd-instance_attributes">
<nvpair id="clvmd-instance_attributes-daemon_timeout"
name="daemon_timeout" value="30s"/>
</instance_attributes>
<operations>
<op id="clvmd-monitor-interval-60s" interval="60s"
name="monitor"/>
</operations>
</primitive>
<meta_attributes id="clvmd-clone-meta">
<nvpair id="clvmd-clone-max" name="clone-max" value="2"/>
<nvpair id="clvmd-clone-node-max" name="clone-node-max"
value="1"/>
</meta_attributes>
</clone>
</resources>
<constraints/>
</configuration>
<status>
<node_state id="1" uname="an-c03n01.alteeve.ca" in_ccm="true"
crmd="online" crm-debug-origin="do_update_resource" join="member"
expected="member">
<transient_attributes id="1">
<instance_attributes id="status-1">
<nvpair id="status-1-probe_complete" name="probe_complete"
value="true"/>
<nvpair id="status-1-master-drbd_r0" name="master-drbd_r0"
value="10000"/>
</instance_attributes>
</transient_attributes>
<lrm id="1">
<lrm_resources>
<lrm_resource id="fence_n01_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n01_virsh_last_0"
operation_key="fence_n01_virsh_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="13:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;13:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="20" rc-code="0" op-status="0" interval="0"
last-run="1390845789" last-rc-change="1390845789" exec-time="1872"
queue-time="0" op-digest="15a02edf953b80ee2ea10742bc05d033"/>
<lrm_rsc_op id="fence_n01_virsh_monitor_60000"
operation_key="fence_n01_virsh_monitor_60000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="7:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;7:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="24" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="849" queue-time="1"
op-digest="228a38704a0c537c1dc62cdef20db0d2"/>
</lrm_resource>
<lrm_resource id="fence_n02_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n02_virsh_last_0"
operation_key="fence_n02_virsh_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="5:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:7;5:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="9" rc-code="7" op-status="0" interval="0" last-run="1390845788"
last-rc-change="1390845788" exec-time="0" queue-time="0"
op-digest="f0f20524f9486801befcdf71b079b75a"/>
</lrm_resource>
<lrm_resource id="drbd_r0" type="drbd" class="ocf"
provider="linbit">
<lrm_rsc_op id="drbd_r0_last_0"
operation_key="drbd_r0_promote_0" operation="promote"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="13:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;13:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="27" rc-code="0" op-status="0" interval="0"
last-run="1390845791" last-rc-change="1390845791" exec-time="1045"
queue-time="0" op-digest="c0e018b73fdf522b6cdd355e125af15e"/>
</lrm_resource>
<lrm_resource id="dlm" type="controld" class="ocf"
provider="pacemaker">
<lrm_rsc_op id="dlm_last_0" operation_key="dlm_start_0"
operation="start" crm-debug-origin="build_active_RAs"
crm_feature_set="3.0.7"
transition-key="45:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;45:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="22" rc-code="0" op-status="0" interval="0"
last-run="1390845790" last-rc-change="1390845790" exec-time="1023"
queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="dlm_monitor_60000"
operation_key="dlm_monitor_60000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="43:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;43:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="25" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="12" queue-time="0"
op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
</lrm_resource>
<lrm_resource id="clvmd" type="clvmd" class="lsb">
<lrm_rsc_op id="clvmd_last_0" operation_key="clvmd_start_0"
operation="start" crm-debug-origin="do_update_resource"
crm_feature_set="3.0.7"
transition-key="55:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;55:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="34" rc-code="0" op-status="0" interval="0"
last-run="1390846050" last-rc-change="1390846050" exec-time="1108"
queue-time="0" op-digest="794098a8c9b2fd6dfa0bfbe993e66d1a"/>
<lrm_rsc_op id="clvmd_monitor_60000"
operation_key="clvmd_monitor_60000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="56:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;56:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="35" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390846051" exec-time="51" queue-time="0"
op-digest="c3a8ed7a1c932ba83c4a1789e7a816b8"/>
</lrm_resource>
</lrm_resources>
</lrm>
</node_state>
<node_state id="2" uname="an-c03n02.alteeve.ca" in_ccm="true"
crmd="online" crm-debug-origin="do_update_resource" join="member"
expected="member">
<transient_attributes id="2">
<instance_attributes id="status-2">
<nvpair id="status-2-probe_complete" name="probe_complete"
value="true"/>
<nvpair id="status-2-master-drbd_r0" name="master-drbd_r0"
value="10000"/>
</instance_attributes>
</transient_attributes>
<lrm id="2">
<lrm_resources>
<lrm_resource id="fence_n01_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n01_virsh_last_0"
operation_key="fence_n01_virsh_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="9:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:7;9:0:7:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="5" rc-code="7" op-status="0" interval="0" last-run="1390845788"
last-rc-change="1390845788" exec-time="1" queue-time="0"
op-digest="15a02edf953b80ee2ea10742bc05d033"/>
</lrm_resource>
<lrm_resource id="fence_n02_virsh" type="fence_virsh"
class="stonith">
<lrm_rsc_op id="fence_n02_virsh_last_0"
operation_key="fence_n02_virsh_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="15:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;15:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="20" rc-code="0" op-status="0" interval="0"
last-run="1390845789" last-rc-change="1390845789" exec-time="1879"
queue-time="0" op-digest="f0f20524f9486801befcdf71b079b75a"/>
<lrm_rsc_op id="fence_n02_virsh_monitor_60000"
operation_key="fence_n02_virsh_monitor_60000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="10:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;10:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="24" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="859" queue-time="0"
op-digest="7fbb265a061a1e74066b22c34d8b2477"/>
</lrm_resource>
<lrm_resource id="drbd_r0" type="drbd" class="ocf"
provider="linbit">
<lrm_rsc_op id="drbd_r0_last_0"
operation_key="drbd_r0_promote_0" operation="promote"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="16:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;16:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="27" rc-code="0" op-status="0" interval="0"
last-run="1390845791" last-rc-change="1390845791" exec-time="27"
queue-time="0" op-digest="c0e018b73fdf522b6cdd355e125af15e"/>
</lrm_resource>
<lrm_resource id="dlm" type="controld" class="ocf"
provider="pacemaker">
<lrm_rsc_op id="dlm_last_0" operation_key="dlm_start_0"
operation="start" crm-debug-origin="build_active_RAs"
crm_feature_set="3.0.7"
transition-key="47:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;47:0:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="22" rc-code="0" op-status="0" interval="0"
last-run="1390845790" last-rc-change="1390845790" exec-time="1024"
queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="dlm_monitor_60000"
operation_key="dlm_monitor_60000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.7"
transition-key="46:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;46:1:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="25" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390845791" exec-time="25" queue-time="0"
op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
</lrm_resource>
<lrm_resource id="clvmd" type="clvmd" class="lsb">
<lrm_rsc_op id="clvmd_last_0" operation_key="clvmd_start_0"
operation="start" crm-debug-origin="do_update_resource"
crm_feature_set="3.0.7"
transition-key="57:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;57:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="34" rc-code="0" op-status="0" interval="0"
last-run="1390846050" last-rc-change="1390846050" exec-time="1145"
queue-time="0" op-digest="794098a8c9b2fd6dfa0bfbe993e66d1a"/>
<lrm_rsc_op id="clvmd_monitor_60000"
operation_key="clvmd_monitor_60000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.7"
transition-key="58:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
transition-magic="0:0;58:3:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae"
call-id="35" rc-code="0" op-status="0" interval="60000"
last-rc-change="1390846051" exec-time="38" queue-time="0"
op-digest="c3a8ed7a1c932ba83c4a1789e7a816b8"/>
</lrm_resource>
</lrm_resources>
</lrm>
</node_state>
</status>
</cib>
====
Now the fun part;
If I try to disable the clvmd resource (or simply stop the cluster),
things go sideways:
Here I try to stop it on an-c03n01:
====
[root at an-c03n01 ~]# pcs resource disable clvmd-clone
====
At first it looks like it worked:
====
Cluster name: an-cluster-03
Last updated: Mon Jan 27 13:11:19 2014
Last change: Mon Jan 27 13:11:05 2014 via crm_resource on
an-c03n01.alteeve.ca
Stack: corosync
Current DC: an-c03n01.alteeve.ca (1) - partition with quorum
Version: 1.1.10-19.el7-368c726
2 Nodes configured
8 Resources configured
Online: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Full list of resources:
fence_n01_virsh (stonith:fence_virsh): Started
an-c03n01.alteeve.ca
fence_n02_virsh (stonith:fence_virsh): Started
an-c03n02.alteeve.ca
Master/Slave Set: drbd_r0_Clone [drbd_r0]
Masters: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Clone Set: dlm-clone [dlm]
Started: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Clone Set: clvmd-clone [clvmd]
Started: [ an-c03n02.alteeve.ca ]
Stopped: [ an-c03n01.alteeve.ca ]
PCSD Status:
an-c03n01.alteeve.ca:
an-c03n01.alteeve.ca: Online
an-c03n02.alteeve.ca:
an-c03n02.alteeve.ca: Online
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: active/enabled
====
But then it fails:
====
Cluster name: an-cluster-03
Last updated: Mon Jan 27 13:12:37 2014
Last change: Mon Jan 27 13:11:05 2014 via crm_resource on
an-c03n01.alteeve.ca
Stack: corosync
Current DC: an-c03n01.alteeve.ca (1) - partition with quorum
Version: 1.1.10-19.el7-368c726
2 Nodes configured
8 Resources configured
Node an-c03n02.alteeve.ca (2): UNCLEAN (online)
Online: [ an-c03n01.alteeve.ca ]
Full list of resources:
fence_n01_virsh (stonith:fence_virsh): Started
an-c03n01.alteeve.ca
fence_n02_virsh (stonith:fence_virsh): Started
an-c03n02.alteeve.ca
Master/Slave Set: drbd_r0_Clone [drbd_r0]
Masters: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Clone Set: dlm-clone [dlm]
Started: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
Clone Set: clvmd-clone [clvmd]
clvmd (lsb:clvmd): FAILED an-c03n02.alteeve.ca
Stopped: [ an-c03n01.alteeve.ca ]
Failed actions:
clvmd_stop_0 on an-c03n02.alteeve.ca 'unknown error' (1): call=37,
status=Timed Out, last-rc-change='Mon J
an 27 13:11:05 2014', queued=20002ms, exec=0ms
PCSD Status:
an-c03n01.alteeve.ca:
an-c03n01.alteeve.ca: Online
an-c03n02.alteeve.ca:
an-c03n02.alteeve.ca: Online
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: active/enabled
====
Shortly after, an-c03n02 gets fenced.
Here are the logs from both nodes, starting from the disable call until
after an-c03n02 is fenced:
an-c03n01's logs:
====
Jan 27 13:11:05 an-c03n01 cib[4003]: notice: cib:diff: Diff: --- 0.90.12
Jan 27 13:11:05 an-c03n01 cib[4003]: notice: cib:diff: Diff: +++ 0.91.1
d12511719b02a7dd194155882456dada
Jan 27 13:11:05 an-c03n01 cib[4003]: notice: cib:diff: -- <cib
admin_epoch="0" epoch="90" num_updates="12"/>
Jan 27 13:11:05 an-c03n01 cib[4003]: notice: cib:diff: ++
<meta_attributes id="clvmd-clone-meta_attributes">
Jan 27 13:11:05 an-c03n01 cib[4003]: notice: cib:diff: ++
<nvpair id="clvmd-clone-meta_attributes-target-role" name="target-role"
value="Stopped"/>
Jan 27 13:11:05 an-c03n01 cib[4003]: notice: cib:diff: ++
</meta_attributes>
Jan 27 13:11:05 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC
cause=C_FSA_INTERNAL origin=abort_transition_graph ]
Jan 27 13:11:05 an-c03n01 pengine[4007]: notice: unpack_config: On loss
of CCM Quorum: Ignore
Jan 27 13:11:05 an-c03n01 pengine[4007]: notice: LogActions: Stop
clvmd:0 (an-c03n01.alteeve.ca)
Jan 27 13:11:05 an-c03n01 pengine[4007]: notice: LogActions: Stop
clvmd:1 (an-c03n02.alteeve.ca)
Jan 27 13:11:05 an-c03n01 pengine[4007]: notice: process_pe_message:
Calculated Transition 4: /var/lib/pacemaker/pengine/pe-input-65.bz2
Jan 27 13:11:05 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 55: stop clvmd_stop_0 on an-c03n01.alteeve.ca (local)
Jan 27 13:11:05 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 56: stop clvmd_stop_0 on an-c03n02.alteeve.ca
Jan 27 13:11:05 an-c03n01 systemd: Stopping LSB: This service is
Clusterd LVM Daemon....
Jan 27 13:11:05 an-c03n01 clvmd: Deactivating clustered VG(s): 0
logical volume(s) in volume group "an-c03n01_vg0" now active
Jan 27 13:11:05 an-c03n01 clvmd: [ OK ]
Jan 27 13:11:05 an-c03n01 clvmd: Signaling clvmd to exit [ OK ]
Jan 27 13:11:05 an-c03n01 dlm_controld[4122]: 1081 cpg_dispatch error 9
Jan 27 13:11:05 an-c03n01 clvmd: clvmd terminated[ OK ]
Jan 27 13:11:05 an-c03n01 systemd: Stopped LSB: This service is Clusterd
LVM Daemon..
Jan 27 13:11:05 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation clvmd_stop_0 (call=37, rc=0, cib-update=65, confirmed=true) ok
Jan 27 13:11:25 an-c03n01 crmd[4008]: warning: status_from_rc: Action 56
(clvmd_stop_0) on an-c03n02.alteeve.ca failed (target: 0 vs. rc: 1): Error
Jan 27 13:11:25 an-c03n01 crmd[4008]: warning: update_failcount:
Updating failcount for clvmd on an-c03n02.alteeve.ca after failed stop:
rc=1 (update=INFINITY, time=1390846285)
Jan 27 13:11:25 an-c03n01 crmd[4008]: warning: update_failcount:
Updating failcount for clvmd on an-c03n02.alteeve.ca after failed stop:
rc=1 (update=INFINITY, time=1390846285)
Jan 27 13:11:25 an-c03n01 crmd[4008]: notice: run_graph: Transition 4
(Complete=4, Pending=0, Fired=0, Skipped=1, Incomplete=0,
Source=/var/lib/pacemaker/pengine/pe-input-65.bz2): Stopped
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: unpack_config: On loss
of CCM Quorum: Ignore
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: unpack_rsc_op:
Processing failed op stop for clvmd:0 on an-c03n02.alteeve.ca: unknown
error (1)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: pe_fence_node: Node
an-c03n02.alteeve.ca will be fenced because of resource failure(s)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: stage6: Scheduling
Node an-c03n02.alteeve.ca for STONITH
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice:
native_stop_constraints: Stop of failed resource clvmd:0 is implicit
after an-c03n02.alteeve.ca is fenced
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Move
fence_n02_virsh (Started an-c03n02.alteeve.ca -> an-c03n01.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Demote
drbd_r0:1 (Master -> Stopped an-c03n02.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Stop
dlm:1 (an-c03n02.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Stop
clvmd:0 (an-c03n02.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: process_pe_message:
Calculated Transition 5: /var/lib/pacemaker/pengine/pe-warn-5.bz2
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: unpack_config: On loss
of CCM Quorum: Ignore
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: unpack_rsc_op:
Processing failed op stop for clvmd:0 on an-c03n02.alteeve.ca: unknown
error (1)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: pe_fence_node: Node
an-c03n02.alteeve.ca will be fenced because of resource failure(s)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning:
common_apply_stickiness: Forcing clvmd-clone away from
an-c03n02.alteeve.ca after 1000000 failures (max=1000000)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning:
common_apply_stickiness: Forcing clvmd-clone away from
an-c03n02.alteeve.ca after 1000000 failures (max=1000000)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: stage6: Scheduling
Node an-c03n02.alteeve.ca for STONITH
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice:
native_stop_constraints: Stop of failed resource clvmd:0 is implicit
after an-c03n02.alteeve.ca is fenced
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Move
fence_n02_virsh (Started an-c03n02.alteeve.ca -> an-c03n01.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Demote
drbd_r0:1 (Master -> Stopped an-c03n02.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Stop
dlm:1 (an-c03n02.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: notice: LogActions: Stop
clvmd:0 (an-c03n02.alteeve.ca)
Jan 27 13:11:25 an-c03n01 pengine[4007]: warning: process_pe_message:
Calculated Transition 6: /var/lib/pacemaker/pengine/pe-warn-6.bz2
Jan 27 13:11:25 an-c03n01 crmd[4008]: notice: te_fence_node: Executing
reboot fencing operation (57) on an-c03n02.alteeve.ca (timeout=60000)
Jan 27 13:11:25 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 72: notify drbd_r0_pre_notify_demote_0 on an-c03n01.alteeve.ca
(local)
Jan 27 13:11:25 an-c03n01 stonith-ng[4004]: notice: handle_request:
Client crmd.4008.0580da9f wants to fence (reboot) 'an-c03n02.alteeve.ca'
with device '(any)'
Jan 27 13:11:25 an-c03n01 stonith-ng[4004]: notice:
initiate_remote_stonith_op: Initiating remote operation reboot for
an-c03n02.alteeve.ca: 2ca7afa0-1bcb-4bf1-a23c-f458d3eccf9d (0)
Jan 27 13:11:25 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 74: notify drbd_r0_pre_notify_demote_0 on an-c03n02.alteeve.ca
Jan 27 13:11:25 an-c03n01 stonith-ng[4004]: notice:
can_fence_host_with_device: fence_n01_virsh can not fence
an-c03n02.alteeve.ca: static-list
Jan 27 13:11:25 an-c03n01 stonith-ng[4004]: notice:
can_fence_host_with_device: fence_n02_virsh can fence
an-c03n02.alteeve.ca: static-list
Jan 27 13:11:25 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation drbd_r0_notify_0 (call=38, rc=0, cib-update=0, confirmed=true) ok
Jan 27 13:12:37 an-c03n01 stonith-ng[4004]: notice:
can_fence_host_with_device: fence_n01_virsh can not fence
an-c03n02.alteeve.ca: static-list
Jan 27 13:12:37 an-c03n01 stonith-ng[4004]: notice:
can_fence_host_with_device: fence_n02_virsh can fence
an-c03n02.alteeve.ca: static-list
Jan 27 13:12:37 an-c03n01 fence_virsh: Parse error: Ignoring unknown
option 'nodename=an-c03n02.alteeve.ca
Jan 27 13:12:38 an-c03n01 corosync[3987]: [TOTEM ] A processor failed,
forming new configuration.
Jan 27 13:12:38 an-c03n01 stonith-ng[4004]: notice: log_operation:
Operation 'reboot' [5338] (call 2 from crmd.4008) for host
'an-c03n02.alteeve.ca' with device 'fence_n02_virsh' returned: 0 (OK)
Jan 27 13:12:39 an-c03n01 kernel: [ 1176.511609] dlm: closing connection
to node 2
Jan 27 13:12:39 an-c03n01 corosync[3987]: [TOTEM ] A new membership
(10.20.30.1:160) was formed. Members left: 2
Jan 27 13:12:39 an-c03n01 dlm_controld[4122]: 1176 fence request 2 pid
5352 nodedown time 1390846359 fence_all dlm_stonith
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice: crm_update_peer_state:
pcmk_quorum_notification: Node an-c03n02.alteeve.ca[2] - state is now
lost (was member)
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice:
fail_incompletable_actions: Action 75 (75) is scheduled for 2 (offline)
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice:
fail_incompletable_actions: Action 69 (69) is scheduled for 2 (offline)
Jan 27 13:12:39 an-c03n01 crmd[4008]: warning:
fail_incompletable_actions: Node 2 shutdown resulted in un-runnable actions
Jan 27 13:12:39 an-c03n01 pacemakerd[4002]: notice:
crm_update_peer_state: pcmk_quorum_notification: Node
an-c03n02.alteeve.ca[2] - state is now lost (was member)
Jan 27 13:12:39 an-c03n01 corosync[3987]: [QUORUM] Members[1]: 1
Jan 27 13:12:39 an-c03n01 corosync[3987]: [MAIN ] Completed service
synchronization, ready to provide service.
Jan 27 13:12:39 an-c03n01 stonith-ng[4004]: notice: remote_op_done:
Operation reboot of an-c03n02.alteeve.ca by an-c03n01.alteeve.ca for
crmd.4008 at an-c03n01.alteeve.ca.2ca7afa0: OK
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice: tengine_stonith_callback:
Stonith operation 2/57:6:0:fe2b9eba-421a-4f5b-a149-7a9d6e7541ae: OK (0)
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice: tengine_stonith_notify:
Peer an-c03n02.alteeve.ca was terminated (reboot) by
an-c03n01.alteeve.ca for an-c03n01.alteeve.ca: OK
(ref=2ca7afa0-1bcb-4bf1-a23c-f458d3eccf9d) by client crmd.4008
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice: run_graph: Transition 6
(Complete=7, Pending=0, Fired=0, Skipped=17, Incomplete=11,
Source=/var/lib/pacemaker/pengine/pe-warn-6.bz2): Stopped
Jan 27 13:12:39 an-c03n01 pengine[4007]: notice: unpack_config: On loss
of CCM Quorum: Ignore
Jan 27 13:12:39 an-c03n01 pengine[4007]: notice: LogActions: Start
fence_n02_virsh (an-c03n01.alteeve.ca)
Jan 27 13:12:39 an-c03n01 pengine[4007]: notice: process_pe_message:
Calculated Transition 7: /var/lib/pacemaker/pengine/pe-input-66.bz2
Jan 27 13:12:39 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 8: start fence_n02_virsh_start_0 on an-c03n01.alteeve.ca (local)
Jan 27 13:12:39 an-c03n01 stonith-ng[4004]: notice:
stonith_device_register: Device 'fence_n02_virsh' already existed in
device list (2 active devices)
Jan 27 13:12:40 an-c03n01 dlm_controld[4122]: 1177 fence result 2 pid
5352 result 0 exit status
Jan 27 13:12:40 an-c03n01 dlm_controld[4122]: 1177 fence status 2
receive 0 from 1 walltime 1390846360 local 1177
Jan 27 13:12:41 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation fence_n02_virsh_start_0 (call=39, rc=0, cib-update=82,
confirmed=true) ok
Jan 27 13:12:41 an-c03n01 crmd[4008]: notice: te_rsc_command: Initiating
action 9: monitor fence_n02_virsh_monitor_60000 on an-c03n01.alteeve.ca
(local)
Jan 27 13:12:42 an-c03n01 crmd[4008]: notice: process_lrm_event: LRM
operation fence_n02_virsh_monitor_60000 (call=40, rc=0, cib-update=83,
confirmed=false) ok
Jan 27 13:12:42 an-c03n01 crmd[4008]: notice: run_graph: Transition 7
(Complete=2, Pending=0, Fired=0, Skipped=0, Incomplete=0,
Source=/var/lib/pacemaker/pengine/pe-input-66.bz2): Complete
Jan 27 13:12:42 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS
cause=C_FSA_INTERNAL origin=notify_crmd ]
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293064] drbd r0: PingAck did
not arrive in time.
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293094] drbd r0: peer( Primary
-> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate ->
DUnknown ) susp( 0 -> 1 )
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293320] drbd r0: asender terminated
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293323] drbd r0: Terminating
drbd_a_r0
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293451] drbd r0: Connection closed
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293478] drbd r0: conn(
NetworkFailure -> Unconnected )
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293479] drbd r0: receiver
terminated
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293480] drbd r0: Restarting
receiver thread
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293481] drbd r0: receiver
(re)started
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293486] drbd r0: conn(
Unconnected -> WFConnection )
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.293561] drbd r0: helper
command: /sbin/drbdadm fence-peer r0
Jan 27 13:12:42 an-c03n01 crm-fence-peer.sh[5380]: invoked for r0
Jan 27 13:12:42 an-c03n01 cibadmin[5411]: notice: crm_log_args: Invoked:
cibadmin -C -o constraints -X <rsc_location rsc="drbd_r0_Clone"
id="drbd-fence-by-handler-r0-drbd_r0_Clone">
<rule role="Master" score="-INFINITY"
id="drbd-fence-by-handler-r0-rule-drbd_r0_Clone">
<expression attribute="#uname" operation="ne"
value="an-c03n01.alteeve.ca"
id="drbd-fence-by-handler-r0-expr-drbd_r0_Clone"/>
</rule>
</rsc_location>
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: Diff: --- 0.91.13
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: Diff: +++ 0.92.1
159b3e3191219b6a7ca7c17e10ab87ab
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: -- <cib
admin_epoch="0" epoch="91" num_updates="13"/>
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: ++
<rsc_location rsc="drbd_r0_Clone"
id="drbd-fence-by-handler-r0-drbd_r0_Clone">
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: ++ <rule
role="Master" score="-INFINITY"
id="drbd-fence-by-handler-r0-rule-drbd_r0_Clone">
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: ++
<expression attribute="#uname" operation="ne"
value="an-c03n01.alteeve.ca"
id="drbd-fence-by-handler-r0-expr-drbd_r0_Clone"/>
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: ++ </rule>
Jan 27 13:12:42 an-c03n01 cib[4003]: notice: cib:diff: ++
</rsc_location>
Jan 27 13:12:42 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC
cause=C_FSA_INTERNAL origin=abort_transition_graph ]
Jan 27 13:12:42 an-c03n01 stonith-ng[4004]: notice: unpack_config: On
loss of CCM Quorum: Ignore
Jan 27 13:12:42 an-c03n01 pengine[4007]: notice: unpack_config: On loss
of CCM Quorum: Ignore
Jan 27 13:12:42 an-c03n01 pengine[4007]: notice: process_pe_message:
Calculated Transition 8: /var/lib/pacemaker/pengine/pe-input-67.bz2
Jan 27 13:12:42 an-c03n01 crmd[4008]: notice: run_graph: Transition 8
(Complete=0, Pending=0, Fired=0, Skipped=0, Incomplete=0,
Source=/var/lib/pacemaker/pengine/pe-input-67.bz2): Complete
Jan 27 13:12:42 an-c03n01 crmd[4008]: notice: do_state_transition: State
transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS
cause=C_FSA_INTERNAL origin=notify_crmd ]
Jan 27 13:12:42 an-c03n01 crm-fence-peer.sh[5380]: INFO peer is fenced,
my disk is UpToDate: placed constraint
'drbd-fence-by-handler-r0-drbd_r0_Clone'
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.332722] drbd r0: helper
command: /sbin/drbdadm fence-peer r0 exit code 7 (0x700)
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.332726] drbd r0: fence-peer
helper returned 7 (peer was stonithed)
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.332741] drbd r0: pdsk( DUnknown
-> Outdated )
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.333135] block drbd0: new
current UUID
90329390E90C0A61:3CDCA6D754EDF103:FA71AAD0BB08E04A:FA70AAD0BB08E04B
Jan 27 13:12:42 an-c03n01 kernel: [ 1179.340036] drbd r0: susp( 1 -> 0 )
Jan 27 13:12:42 an-c03n01 stonith-ng[4004]: notice:
stonith_device_register: Device 'fence_n01_virsh' already existed in
device list (2 active devices)
Jan 27 13:12:42 an-c03n01 stonith-ng[4004]: notice:
stonith_device_register: Device 'fence_n02_virsh' already existed in
device list (2 active devices)
====
an-c03n02's logs:
====
Jan 27 13:11:05 an-c03n02 systemd: Stopping LSB: This service is
Clusterd LVM Daemon....
Jan 27 13:11:25 an-c03n02 lrmd[5464]: warning: child_timeout_callback:
clvmd_stop_0 process (PID 7593) timed out
Jan 27 13:11:25 an-c03n02 lrmd[5464]: warning: operation_finished:
clvmd_stop_0:7593 - timed out after 20000ms
Jan 27 13:11:25 an-c03n02 crmd[5467]: error: process_lrm_event: LRM
operation clvmd_stop_0 (37) Timed Out (timeout=20000ms)
Jan 27 13:11:25 an-c03n02 crmd[5467]: notice: process_lrm_event:
an-c03n02.alteeve.ca-clvmd_stop_0:37 [ Stopping clvmd (via systemctl): ]
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_cs_dispatch: Update
relayed from an-c03n01.alteeve.ca
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_trigger_update:
Sending flush op to all hosts for: fail-count-clvmd (INFINITY)
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_perform_update:
Sent update 24: fail-count-clvmd=INFINITY
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_cs_dispatch: Update
relayed from an-c03n01.alteeve.ca
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_trigger_update:
Sending flush op to all hosts for: last-failure-clvmd (1390846285)
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_perform_update:
Sent update 27: last-failure-clvmd=1390846285
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_cs_dispatch: Update
relayed from an-c03n01.alteeve.ca
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_trigger_update:
Sending flush op to all hosts for: fail-count-clvmd (INFINITY)
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_perform_update:
Sent update 30: fail-count-clvmd=INFINITY
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_cs_dispatch: Update
relayed from an-c03n01.alteeve.ca
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_trigger_update:
Sending flush op to all hosts for: last-failure-clvmd (1390846285)
Jan 27 13:11:25 an-c03n02 attrd[5465]: notice: attrd_perform_update:
Sent update 33: last-failure-clvmd=1390846285
Jan 27 13:11:25 an-c03n02 stonith-ng[5463]: notice:
can_fence_host_with_device: fence_n01_virsh can not fence
an-c03n02.alteeve.ca: static-list
Jan 27 13:11:25 an-c03n02 stonith-ng[5463]: notice:
can_fence_host_with_device: fence_n02_virsh can fence
an-c03n02.alteeve.ca: static-list
Jan 27 13:11:25 an-c03n02 crmd[5467]: notice: process_lrm_event: LRM
operation drbd_r0_notify_0 (call=38, rc=0, cib-update=0, confirmed=true) ok
====
Post fence, this is the cluster status:
====
[root at an-c03n01 ~]# pcs status
Cluster name: an-cluster-03
Last updated: Mon Jan 27 13:14:32 2014
Last change: Mon Jan 27 13:12:42 2014 via cibadmin on an-c03n01.alteeve.ca
Stack: corosync
Current DC: an-c03n01.alteeve.ca (1) - partition with quorum
Version: 1.1.10-19.el7-368c726
2 Nodes configured
8 Resources configured
Online: [ an-c03n01.alteeve.ca ]
OFFLINE: [ an-c03n02.alteeve.ca ]
Full list of resources:
fence_n01_virsh (stonith:fence_virsh): Started an-c03n01.alteeve.ca
fence_n02_virsh (stonith:fence_virsh): Started an-c03n01.alteeve.ca
Master/Slave Set: drbd_r0_Clone [drbd_r0]
Masters: [ an-c03n01.alteeve.ca ]
Stopped: [ an-c03n02.alteeve.ca ]
Clone Set: dlm-clone [dlm]
Started: [ an-c03n01.alteeve.ca ]
Stopped: [ an-c03n02.alteeve.ca ]
Clone Set: clvmd-clone [clvmd]
Stopped: [ an-c03n01.alteeve.ca an-c03n02.alteeve.ca ]
PCSD Status:
an-c03n01.alteeve.ca:
an-c03n01.alteeve.ca: Online
an-c03n02.alteeve.ca:
an-c03n02.alteeve.ca: Online
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: active/enabled
====
I'm totally lost... I am sure the problem is simple, but I am missing it.
--
Digimer
Papers and Projects: https://alteeve.ca/w/
What if the cure for cancer is trapped in the mind of a person without
access to education?
More information about the Pacemaker
mailing list