[ClusterLabs] glusterfs 2 active primitives after reboot
lukas
lukas.kostyan at gmail.com
Sun May 3 14:06:40 CEST 2015
Hi,
i am testing a Debian-Wheezy cluster with corosync 1.4.2 and pacamaker
1.1.7. On the cluster glusterfs is used
as storage (replicated) which is mounted to the webservers root
directory. To test the down time ping is used.
Lets assume node1 fails. There is no problem and virtually no
interruption during change of one cl_IP instance
on node2 (both run on node2). If node1 fails node2 restarts node1 with
stonith. But as soon as node1 is back
online again all apache and IP instances stop and start again. This is
where the interruption happens which is
around 5seconds. Is this behaviour to be expected? I thought the
instances running on node2 shouldnt stop at
all when node1 gets back online again.
I guess it has something to do with the primitive active on both nodes
(see below)? Why is the
primitive p_apache started on the failed node?
The CIB looks as follows:
node vm-1 \
attributes standby="off"
node vm-2 \
attributes standby="off"
primitive IP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.200" nic="eth0"
clusterip_hash="sourceip-sourceport" \
op monitor interval="10s"
primitive p_apache ocf:heartbeat:apache \
params configfile="/etc/apache2/apache2.conf"
statusurl="http://localhost/server-status" \
op monitor interval="60" timeout="20" \
op start interval="0" timeout="40s" start-delay="0" \
meta is-managed="true"
primitive p_fence_N1 stonith:external/libvirt \
params hostlist="vm-1:N1"
hypervisor_uri="qemu+tcp://192.168.122.1/system"
pcmk_reboot_action="reboot" \
op monitor interval="60"
primitive p_fence_N2 stonith:external/libvirt \
params hostlist="vm-2:N2"
hypervisor_uri="qemu+tcp://192.168.122.1/system"
pcmk_reboot_action="reboot" \
op monitor interval="60"
primitive p_gluster_mnt ocf:heartbeat:Filesystem \
params device="localhost:/gvolrep" directory="/var/www/html"
fstype="glusterfs" \
op monitor interval="10"
primitive p_glusterd ocf:glusterfs:glusterd \
op monitor interval="30"
primitive p_volume_gluster ocf:glusterfs:volume \
params volname="gvolrep" \
op monitor interval="10"
clone cl_IP IP \
meta globally-unique="true" clone-max="2" clone-node-max="2"
interleave="true" \
params resource-stickiness="0"
clone cl_apache p_apache \
meta globally-unique="true" target-role="Started"
clone cl_gluster_mnt p_gluster_mnt \
meta globally-unique="true" interleave="true"
target-role="Started"
clone cl_glusterd p_glusterd \
meta interleave="true"
clone cl_glustervol p_volume_gluster \
meta interleave="true"
location l_fence_N1 p_fence_N1 -inf: vm-1
location l_fence_N2 p_fence_N2 -inf: vm-2
colocation apache-with-ip inf: cl_apache cl_IP
colocation c_apache_gluster inf: cl_apache cl_gluster_mnt
colocation c_gluster_mnt_with_apache inf: cl_gluster_mnt cl_apache
colocation c_glusterd_with_glustervol inf: cl_glusterd cl_glustervol
colocation c_glustervol_with_gluster_mnt inf: cl_glustervol cl_gluster_mnt
order o_apacher_after_mnt inf: cl_gluster_mnt cl_apache
order o_gluster inf: cl_glusterd cl_glustervol cl_gluster_mnt cl_apache
cl_IP
property $id="cib-bootstrap-options" \
dc-version="1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="true" \
no-quorum-policy="ignore" \
last-lrm-refresh="1430571170" \
default-resource-stickiness="100"
op_defaults $id="op-options" \
timeout="240s"
################################################
root at vm-2:~# tail -f /var/log/corosync.log | grep ERROR
May 03 13:38:14 vm-2 pengine: [3285]: ERROR: native_create_actions:
Resource p_apache:1 (ocf::apache) is active on 2 nodes attempting recovery
May 03 13:38:14 vm-2 pengine: [3285]: ERROR: process_pe_message:
Transition 30: ERRORs found during PE processing. PEngine Input stored
in: /var/lib/pengine/pe-error-0.bz2
More information about the Users
mailing list