[Pacemaker] Help! Simultaneously Mount gfs2 in Pacemaker on RHEL6.1x64 HANGS!
xin.liang at cs2c.com.cn
xin.liang at cs2c.com.cn
Thu Sep 22 01:48:28 UTC 2011
hi:
Hope you everything goes well.
I have problems with DRBD+dlm+gfs
in pacemaker.
I follow this
doc:http://www.clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Clusters_from_Scratch/index.html
to make a DRBD-GFS2 cluster
When I run DRBD(two
primaries)+dlm+gfs+Filesystem, node-c hangs and reboot.
Then I run
DRBD(two primaries)+dlm+gfs , it does well, running on both nodes;
And
then, I "mount -t gfs2 /dev/drbd0 /mnt" on node-b, it's ok;
BUT, when I
run command "mount -t gfs2 /dev/drbd0 /mnt" on node-c, node-c hangs and
reboot. No err log on stderr.
Once again, I do this on node-c firstly,
it's ok; then on node-b, it hangs and
reboot!
#################################################################
$
crm configure show
node ha-b
node ha-c
primitive dlm
ocf:pacemaker:controld
operations $id="dlm-operations"
op monitor
interval="10" timeout="20" start-delay="0"
params args="-L -K -P -q 0
"
primitive drbd ocf:linbit:drbd
operations $id="drbd-operations"
op
monitor interval="20" role="Slave" timeout="20"
op monitor interval="10"
role="Master" timeout="20"
params drbd_resource="drbd0"
primitive gfs
ocf:pacemaker:controld
operations $id="gfs-operations"
op monitor
interval="10" timeout="20" start-delay="0"
params
daemon="gfs_controld.pcmk" args="-L -P -g 0"
group groups dlm gfs
ms
ms-drbd drbd
meta master-max="2" notify="true"
target-role="Started"
clone clone-set groups
meta interleave="true"
target-role="Started"
colocation clone-on-drbd inf: clone-set:Started
ms-drbd:Master
order clone-after-drbd inf: ms-drbd:promote clone-set:start
symmetrical=true
property $id="cib-bootstrap-options"
dc-version="1.1.6-1.el6-9971ebba4494012a93c03b40a2c58ec0eb60f50c"
cluster-infrastructure="openais"
expected-quorum-votes="2"
no-quorum-policy="ignore"
stonith-enabled="false"
##########################################################################
gfs_controld.log
on
ha-b
##########################################################################
[root at ha-b
~]# cat /var/log/cluster/gfs_controld.log
Sep 22 09:08:13 gfs_controld
gfs_controld 3.0.12 started
Sep 22 09:08:13 gfs_controld Connected as node
3393650954 to cluster 'cs2c'
Sep 22 09:08:13 gfs_controld logging mode 3
syslog f 160 p 6 logfile p 7 /var/log/cluster/gfs_controld.log
Sep 22
09:08:13 gfs_controld group_mode 3 compat 0
Sep 22 09:08:13 gfs_controld
setup_cpg_daemon 11
Sep 22 09:08:13 gfs_controld gfs:controld conf 1 1 0
memb -901316342 join -901316342 left
Sep 22 09:08:13 gfs_controld
set_protocol member_count 1 propose daemon 1.1.1 kernel 1.1.1
Sep 22
09:08:13 gfs_controld run protocol from nodeid -901316342
Sep 22 09:08:13
gfs_controld daemon run 1.1.1 max 1.1.1 kernel run 1.1.1 max 1.1.1
Sep 22
09:08:14 gfs_controld gfs:controld conf 2 1 0 memb -901316342 -884539126
join -884539126 left
Sep 22 09:11:57 gfs_controld client connection 5 fd
14
Sep 22 09:11:57 gfs_controld join: /mnt gfs2 lock_dlm cs2c:liang rw
/dev/drbd0
Sep 22 09:11:57 gfs_controld liang join: cluster name matches:
cs2c
Sep 22 09:11:57 gfs_controld liang process_dlmcontrol register 0
Sep
22 09:11:57 gfs_controld gfs:mount:liang conf 1 1 0 memb -901316342 join
-901316342 left
Sep 22 09:11:57 gfs_controld liang add_change cg 1 joined
nodeid -901316342
Sep 22 09:11:57 gfs_controld liang add_change cg 1 we
joined
Sep 22 09:11:57 gfs_controld liang add_change cg 1 counts member 1
joined 1 remove 0 failed 0
Sep 22 09:11:57 gfs_controld liang
wait_conditions skip for zero started_count
Sep 22 09:11:57 gfs_controld
liang send_start cg 1 id_count 1 om 0 nm 1 oj 0 nj 0
Sep 22 09:11:57
gfs_controld liang receive_start -901316342:1 len 92
Sep 22 09:11:57
gfs_controld liang match_change -901316342:1 matches cg 1
Sep 22 09:11:57
gfs_controld liang wait_messages cg 1 got all 1
Sep 22 09:11:57
gfs_controld liang pick_first_recovery_master low -901316342 old 0
Sep 22
09:11:57 gfs_controld liang sync_state all_nodes_new first_recovery_needed
master -901316342
Sep 22 09:11:57 gfs_controld liang create_old_nodes all
new
Sep 22 09:11:57 gfs_controld liang create_new_nodes -901316342 ro 0
spect 0
Sep 22 09:11:57 gfs_controld liang create_failed_journals all
new
Sep 22 09:11:57 gfs_controld liang apply_recovery first
start_kernel
Sep 22 09:11:57 gfs_controld liang start_kernel cg 1
member_count 1
Sep 22 09:11:57 gfs_controld liang set
/sys/fs/gfs2/cs2c:liang/lock_module/block to 0
Sep 22 09:11:57 gfs_controld
liang set open /sys/fs/gfs2/cs2c:liang/lock_module/block error -1 2
Sep 22
09:11:57 gfs_controld liang client_reply_join_full ci 5 result 0
hostdata=jid=0:id=915250580:first=1
Sep 22 09:11:57 gfs_controld
client_reply_join liang ci 5 result 0
Sep 22 09:11:57 gfs_controld uevent
add gfs2 /fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang
ping_kernel_mount 0
Sep 22 09:11:57 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang recovery_uevent jid
0 first recovery done 0
Sep 22 09:11:57 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 09:11:57 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 09:11:57 gfs_controld liang recovery_uevent
first_done
Sep 22 09:11:57 gfs_controld liang receive_first_recovery_done
from -901316342 master -901316342 mount_client_notified 1
Sep 22 09:11:57
gfs_controld liang wait_recoveries done
Sep 22 09:11:57 gfs_controld uevent
online gfs2 /fs/gfs2/cs2c:liang
Sep 22 09:11:57 gfs_controld liang
ping_kernel_mount 0
Sep 22 09:11:57 gfs_controld mount_done: liang result
0
Sep 22 09:11:57 gfs_controld connection 5 read error -1
Sep 22 09:11:57
gfs_controld liang receive_mount_done from -901316342 result 0
Sep 22
09:11:57 gfs_controld liang wait_recoveries done
Sep 22 09:12:37
gfs_controld uevent remove gfs2 /fs/gfs2/cs2c:liang
Sep 22 09:12:37
gfs_controld do_leave liang mnterr 0
Sep 22 09:12:37 gfs_controld
gfs:mount:liang conf 0 0 1 memb join left -901316342
Sep 22 09:12:37
gfs_controld liang confchg for our
leave
##########################################################################
gfs_controld.log
on
ha-c
##########################################################################
[root at ha-c
~]# cat /var/log/cluster/gfs_controld.log
Sep 22 08:52:12 gfs_controld
gfs_controld 3.0.12 started
Sep 22 08:52:12 gfs_controld Connected as node
3410428170 to cluster 'cs2c'
Sep 22 08:52:12 gfs_controld logging mode 3
syslog f 160 p 6 logfile p 7 /var/log/cluster/gfs_controld.log
Sep 22
08:52:12 gfs_controld group_mode 3 compat 0
Sep 22 08:52:12 gfs_controld
setup_cpg_daemon 11
Sep 22 08:52:12 gfs_controld gfs:controld conf 2 1 0
memb -901316342 -884539126 join -884539126 left
Sep 22 08:52:12
gfs_controld run protocol from nodeid -901316342
Sep 22 08:52:12
gfs_controld daemon run 1.1.1 max 1.1.1 kernel run 1.1.1 max 1.1.1
Sep 22
08:56:52 gfs_controld client connection 5 fd 14
Sep 22 08:56:52
gfs_controld join: /mnt gfs2 lock_dlm cs2c:liang rw /dev/drbd0
Sep 22
08:56:52 gfs_controld liang join: cluster name matches: cs2c
Sep 22
08:56:52 gfs_controld liang process_dlmcontrol register 0
Sep 22 08:56:52
gfs_controld gfs:mount:liang conf 1 1 0 memb -884539126 join -884539126
left
Sep 22 08:56:52 gfs_controld liang add_change cg 1 joined nodeid
-884539126
Sep 22 08:56:52 gfs_controld liang add_change cg 1 we joined
Sep
22 08:56:52 gfs_controld liang add_change cg 1 counts member 1 joined 1
remove 0 failed 0
Sep 22 08:56:52 gfs_controld liang wait_conditions skip
for zero started_count
Sep 22 08:56:52 gfs_controld liang send_start cg 1
id_count 1 om 0 nm 1 oj 0 nj 0
Sep 22 08:56:52 gfs_controld liang
receive_start -884539126:1 len 92
Sep 22 08:56:52 gfs_controld liang
match_change -884539126:1 matches cg 1
Sep 22 08:56:52 gfs_controld liang
wait_messages cg 1 got all 1
Sep 22 08:56:52 gfs_controld liang
pick_first_recovery_master low -884539126 old 0
Sep 22 08:56:52
gfs_controld liang sync_state all_nodes_new first_recovery_needed master
-884539126
Sep 22 08:56:52 gfs_controld liang create_old_nodes all new
Sep
22 08:56:52 gfs_controld liang create_new_nodes -884539126 ro 0 spect 0
Sep
22 08:56:52 gfs_controld liang create_failed_journals all new
Sep 22
08:56:52 gfs_controld liang apply_recovery first start_kernel
Sep 22
08:56:52 gfs_controld liang start_kernel cg 1 member_count 1
Sep 22
08:56:52 gfs_controld liang set /sys/fs/gfs2/cs2c:liang/lock_module/block
to 0
Sep 22 08:56:52 gfs_controld liang set open
/sys/fs/gfs2/cs2c:liang/lock_module/block error -1 2
Sep 22 08:56:52
gfs_controld liang client_reply_join_full ci 5 result 0
hostdata=jid=0:id=915250580:first=1
Sep 22 08:56:52 gfs_controld
client_reply_join liang ci 5 result 0
Sep 22 08:56:53 gfs_controld uevent
add gfs2 /fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang
ping_kernel_mount 0
Sep 22 08:56:53 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang recovery_uevent jid
0 first recovery done 0
Sep 22 08:56:53 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 08:56:53 gfs_controld uevent change gfs2
/fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang recovery_uevent jid
1 first recovery done 0
Sep 22 08:56:53 gfs_controld liang recovery_uevent
first_done
Sep 22 08:56:53 gfs_controld liang receive_first_recovery_done
from -884539126 master -884539126 mount_client_notified 1
Sep 22 08:56:53
gfs_controld liang wait_recoveries done
Sep 22 08:56:53 gfs_controld uevent
online gfs2 /fs/gfs2/cs2c:liang
Sep 22 08:56:53 gfs_controld liang
ping_kernel_mount 0
Sep 22 08:56:53 gfs_controld mount_done: liang result
0
Sep 22 08:56:53 gfs_controld connection 5 read error -1
Sep 22 08:56:53
gfs_controld liang receive_mount_done from -884539126 result 0
Sep 22
08:56:53 gfs_controld liang wait_recoveries done
Sep 22 08:57:17
gfs_controld gfs:mount:liang conf 2 1 0 memb -901316342 -884539126 join
-901316342 left
Sep 22 08:57:17 gfs_controld liang add_change cg 2 joined
nodeid -901316342
Sep 22 08:57:17 gfs_controld liang add_change cg 2 counts
member 2 joined 1 remove 0 failed 0
Sep 22 08:57:17 gfs_controld liang
wait_conditions skip for zero journals_need_recovery
Sep 22 08:57:17
gfs_controld liang send_start cg 2 id_count 2 om 1 nm 1 oj 0 nj 0
Sep 22
08:57:17 gfs_controld liang receive_start -901316342:1 len 104
Sep 22
08:57:17 gfs_controld liang match_change -901316342:1 matches cg 2
Sep 22
08:57:17 gfs_controld liang wait_messages cg 2 need 1 of 2
Sep 22 08:57:17
gfs_controld liang receive_start -884539126:2 len 104
Sep 22 08:57:17
gfs_controld liang match_change -884539126:2 matches cg 2
Sep 22 08:57:17
gfs_controld liang wait_messages cg 2 got all 2
Sep 22 08:57:17
gfs_controld liang sync_state first_recovery_msg
Sep 22 08:57:17
gfs_controld liang create_new_nodes -901316342 ro 0 spect 0
Sep 22 08:57:17
gfs_controld liang wait_recoveries done
Sep 22 08:57:22 gfs_controld
gfs:controld conf 1 0 1 memb -884539126 join left -901316342
Sep 22
08:57:22 gfs_controld gfs:mount:liang conf 1 0 1 memb -884539126 join left
-901316342
Sep 22 08:57:22 gfs_controld liang add_change cg 3 remove nodeid
-901316342 reason 3
Sep 22 08:57:22 gfs_controld liang add_change cg 3
counts member 1 joined 0 remove 1 failed 1
Sep 22 08:57:22 gfs_controld
liang stop_kernel
Sep 22 08:57:22 gfs_controld liang set
/sys/fs/gfs2/cs2c:liang/lock_module/block to 1
Sep 22 08:57:22 gfs_controld
liang check_dlm_notify nodeid -901316342 begin
Sep 22 08:57:22 gfs_controld
liang process_dlmcontrol notified nodeid -901316342 result 0
Sep 22
08:57:22 gfs_controld liang check_dlm_notify done
Sep 22 08:57:22
gfs_controld liang send_start cg 3 id_count 1 om 1 nm 0 oj 0 nj 0
Sep 22
08:57:22 gfs_controld liang receive_start -884539126:3 len 92
Sep 22
08:57:22 gfs_controld liang match_change -884539126:3 matches cg 3
Sep 22
08:57:22 gfs_controld liang wait_messages cg 3 got all 1
Sep 22 08:57:22
gfs_controld liang sync_state first_recovery_msg
Sep 22 08:57:22
gfs_controld liang set_failed_journals no journal for nodeid -901316342
Sep 22 08:57:22 gfs_controld liang wait_recoveries done
Sep 22 08:57:22
gfs_controld liang apply_recovery start_kernel
Sep 22 08:57:22 gfs_controld
liang start_kernel cg 3 member_count 1
Sep 22 08:57:22 gfs_controld liang
set /sys/fs/gfs2/cs2c:liang/lock_module/block to
0
##########################################################################
My experimental environment:
2 PC
node-b
node-c
Both OS are
RHEL6.1X64
RPMS:
pacemaker-cli-1.1.6-1.el6.x86_64
pacemaker-doc-1.1.6-1.el6.x86_64
pacemaker-libs-1.1.6-1.el6.x86_64
pacemaker-1.1.6-1.el6.x86_64
pacemaker-cts-1.1.6-1.el6.x86_64
pacemaker-libs-devel-1.1.6-1.el6.x86_64
corosynclib-1.4.1-1.x86_64
corosync-1.4.1-1.x86_64
corosynclib-devel-1.4.1-1.x86_64
resource-agents-3.9.2-1.x86_64
cluster-glue-libs-devel-1.0.7-1.el6.x86_64
cluster-glue-libs-1.0.7-1.el6.x86_64
cluster-glue-1.0.7-1.el6.x86_64
openais-1.1.1-7.el6.x86_64
openaislib-1.1.1-7.el6.x86_64
dlm-pcmk-3.0.12-23.el6_0.6.x86_64
gfs-pcmk-3.0.12-23.el6_0.6.x86_64
gfs2-utils-3.0.12-41.el6.x86_64
clusterlib-3.0.12-41.el6.x86_64
drbd-udev-8.4.0-1.el6.x86_64
drbd-8.4.0-1.el6.x86_64
drbd-utils-8.4.0-1.el6.x86_64
drbd-heartbeat-8.4.0-1.el6.x86_64
drbd-pacemaker-8.4.0-1.el6.x86_64
drbd-bash-completion-8.4.0-1.el6.x86_64
drbd-xen-8.4.0-1.el6.x86_64
drbd-km-2.6.32_131.0.15.el6.x86_64-8.4.0-1.el6.x86_64
drbd-kernel-8.4.0-1.el6.x86_64
My conf:
$ cat
/etc/corosync/corosync.conf
compatibility: whitetank
totem {
version:
2
secauth: off
threads: 0
rrp_mode: passive
interface {
ringnumber: 0
bindnetaddr: 10.1.71.0
mcastaddr: 235.3.4.5
mcastport: 9876
}
interface {
ringnumber: 1
bindnetaddr: 10.10.10.0
mcastaddr: 235.3.4.6
mcastport: 9877
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
to_syslog: yes
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
amf {
mode: disabled
}
service {
name: pacemaker
var: 0
use_logd:
yes
use_mgmtd: yes
clustername: cs2c
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20110922/ec9ef296/attachment-0003.html>
More information about the Pacemaker
mailing list