[Pacemaker] cannot mount gfs2 filesystem
Soni Maula Harriz
soni.harriz at sangkuriang.co.id
Wed Oct 31 06:04:18 UTC 2012
and this is what happen in cluster1
[root at cluster1 ~]# ps axf
PID TTY STAT TIME COMMAND
2 ? S 0:00 [kthreadd]
3 ? S 0:00 \_ [migration/0]
4 ? S 0:00 \_ [ksoftirqd/0]
5 ? S 0:00 \_ [migration/0]
6 ? S 0:00 \_ [watchdog/0]
7 ? S 0:04 \_ [events/0]
8 ? S 0:00 \_ [cgroup]
9 ? S 0:00 \_ [khelper]
10 ? S 0:00 \_ [netns]
11 ? S 0:00 \_ [async/mgr]
12 ? S 0:00 \_ [pm]
13 ? S 0:00 \_ [sync_supers]
14 ? S 0:00 \_ [bdi-default]
15 ? S 0:00 \_ [kintegrityd/0]
16 ? S 0:00 \_ [kblockd/0]
17 ? S 0:00 \_ [kacpid]
18 ? S 0:00 \_ [kacpi_notify]
19 ? S 0:00 \_ [kacpi_hotplug]
20 ? S 0:00 \_ [ata/0]
21 ? S 0:00 \_ [ata_aux]
22 ? S 0:00 \_ [ksuspend_usbd]
23 ? S 0:00 \_ [khubd]
24 ? S 0:00 \_ [kseriod]
25 ? S 0:00 \_ [md/0]
26 ? S 0:00 \_ [md_misc/0]
27 ? S 0:00 \_ [khungtaskd]
28 ? S 0:00 \_ [kswapd0]
29 ? SN 0:00 \_ [ksmd]
30 ? SN 0:00 \_ [khugepaged]
31 ? S 0:00 \_ [aio/0]
32 ? S 0:00 \_ [crypto/0]
37 ? S 0:00 \_ [kthrotld/0]
39 ? S 0:00 \_ [kpsmoused]
40 ? S 0:00 \_ [usbhid_resumer]
71 ? S 0:00 \_ [kstriped]
198 ? S 0:00 \_ [scsi_eh_0]
199 ? S 0:00 \_ [scsi_eh_1]
210 ? S 0:00 \_ [scsi_eh_2]
266 ? S 0:00 \_ [kdmflush]
268 ? S 0:00 \_ [kdmflush]
287 ? S 0:00 \_ [jbd2/dm-0-8]
288 ? S 0:00 \_ [ext4-dio-unwrit]
845 ? S 0:00 \_ [kdmflush]
881 ? S 0:00 \_ [flush-253:0]
882 ? S 0:00 \_ [jbd2/sda1-8]
883 ? S 0:00 \_ [ext4-dio-unwrit]
941 ? S 0:00 \_ [kauditd]
1281 ? S 0:00 \_ [rpciod/0]
2246 ? S 0:00 \_ [cqueue]
2282 ? S 0:00 \_ [drbd1_worker]
2721 ? S 0:00 \_ [glock_workqueue]
2722 ? S 0:00 \_ [delete_workqueu]
2723 ? S< 0:00 \_ [kslowd001]
2724 ? S< 0:00 \_ [kslowd000]
1 ? Ss 0:01 /sbin/init
371 ? S<s 0:00 /sbin/udevd -d
832 ? S< 0:00 \_ /sbin/udevd -d
833 ? S< 0:00 \_ /sbin/udevd -d
1167 ? S<sl 0:00 auditd
1193 ? Sl 0:00 /sbin/rsyslogd -i /var/run/syslogd.pid -c 5
1235 ? Ss 0:00 rpcbind
1253 ? Ss 0:00 rpc.statd
1285 ? Ss 0:00 rpc.idmapd
1391 ? SLsl 0:05 corosync -f
1434 ? Ssl 0:00 fenced
1459 ? Ssl 0:00 dlm_controld
1507 ? Ssl 0:00 gfs_controld
1579 ? Ss 0:00 dbus-daemon --system
1590 ? S 0:00 avahi-daemon: running [cluster1.local]
1591 ? Ss 0:00 \_ avahi-daemon: chroot helper
1601 ? Ss 0:00 cupsd -C /etc/cups/cupsd.conf
1626 ? Ss 0:00 /usr/sbin/acpid
1635 ? Ss 0:00 hald
1636 ? S 0:00 \_ hald-runner
1677 ? S 0:00 \_ hald-addon-input: Listening on
/dev/input/event1 /dev/input/event0 /dev/input/event3
1680 ? S 0:00 \_ hald-addon-acpi: listening on acpid
socket /var/run/acpid.socket
1700 ? Ssl 0:00 automount --pid-file /var/run/autofs.pid
1725 ? Ss 0:00 /usr/sbin/sshd
10172 ? Ss 0:00 \_ sshd: root at pts/0
10177 pts/0 Ss 0:00 \_ -bash
10239 pts/0 R+ 0:00 \_ ps axf
1733 ? Ss 0:00 ntpd -u ntp:ntp -p /var/run/ntpd.pid -g
1813 ? Ss 0:00 /usr/libexec/postfix/master
1819 ? S 0:00 \_ pickup -l -t fifo -u
1820 ? S 0:00 \_ qmgr -l -t fifo -u
1837 ? Ss 0:00 /usr/sbin/abrtd
1845 ? Ss 0:00 abrt-dump-oops -d /var/spool/abrt -rwx
/var/log/messages
1855 ? Ss 0:00 crond
1869 ? Ss 0:00 /usr/sbin/atd
1881 ? Ss 0:00 /usr/sbin/certmonger -S -p
/var/run/certmonger.pid
1907 ? S 0:00 pacemakerd
1911 ? Ss 0:01 \_ /usr/libexec/pacemaker/cib
1912 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd
1913 ? Ss 0:02 \_ /usr/lib64/heartbeat/lrmd
1914 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd
1915 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine
1916 ? Ss 0:00 \_ /usr/libexec/pacemaker/crmd
1980 ? Ss 0:00 /usr/sbin/gdm-binary -nodaemon
2117 ? S 0:00 \_ /usr/libexec/gdm-simple-slave --display-id
/org/gnome/DisplayManager/Display1 --force-active-vt
2129 tty1 Ss+ 0:02 \_ /usr/bin/Xorg :0 -nr -verbose -audit 4
-auth /var/run/gdm/auth-for-gdm-4wqtrX/database -nolisten tcp vt1
2416 ? Ssl 0:00 \_ /usr/bin/gnome-session
--autostart=/usr/share/gdm/autostart/LoginWindow/
2479 ? S 0:00 | \_ /usr/libexec/at-spi-registryd
2545 ? S 0:00 | \_ metacity
2568 ? S 0:00 | \_ gnome-power-manager
2573 ? S 0:00 | \_
/usr/libexec/polkit-gnome-authentication-agent-1
2574 ? S 0:00 | \_ /usr/libexec/gdm-simple-greeter
2666 ? S 0:00 \_ pam: gdm-password
2011 tty2 Ss+ 0:00 /sbin/mingetty /dev/tty2
2023 tty3 Ss+ 0:00 /sbin/mingetty /dev/tty3
2035 tty4 Ss+ 0:00 /sbin/mingetty /dev/tty4
2048 tty5 Ss+ 0:00 /sbin/mingetty /dev/tty5
2056 tty6 Ss+ 0:00 /sbin/mingetty /dev/tty6
2295 ? Sl 0:00 /usr/sbin/console-kit-daemon --no-daemon
2405 ? S 0:00 /usr/bin/dbus-launch --exit-with-session
2411 ? Ss 0:00 /bin/dbus-daemon --fork --print-pid 5
--print-address 7 --session
2437 ? S 0:00 /usr/libexec/devkit-power-daemon
2448 ? S 0:00 /usr/libexec/gconfd-2
2487 ? Ssl 0:00 /usr/libexec/gnome-settings-daemon
--gconf-prefix=/apps/gdm/simple-greeter/settings-manager-plugins
2499 ? Ssl 0:00 /usr/libexec/bonobo-activation-server
--ac-activate --ior-output-fd=12
2536 ? S 0:00 /usr/libexec/gvfsd
2595 ? S 0:00 /usr/libexec/polkit-1/polkitd
2616 ? S<sl 0:00 /usr/bin/pulseaudio --start --log-target=syslog
2619 ? SNl 0:00 /usr/libexec/rtkit-daemon
[root at cluster1 ~]# grep -i error /var/log/messages
Oct 31 11:04:58 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:10:44 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:10:49 cluster1 dlm_controld[1467]: open "/sys/kernel/dlm/web/id"
error -1 2
Oct 31 11:10:49 cluster1 dlm_controld[1467]: open
"/sys/kernel/dlm/web/control" error -1 2
Oct 31 11:10:49 cluster1 dlm_controld[1467]: open
"/sys/kernel/dlm/web/event_done" error -1 2
Oct 31 11:11:12 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:11:19 cluster1 crmd[1922]: warning: status_from_rc: Action 10
(WebSite:0_monitor_0) on cluster2 failed (target: 7 vs. rc: 0): Error
Oct 31 11:11:19 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:11:26 cluster1 kernel: dlm_new_lockspace error -512
Oct 31 11:11:26 cluster1 kernel: block drbd1: error receiving ReportState,
l: 4!
Oct 31 11:11:30 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:11:30 cluster1 kernel: block drbd1: error receiving ReportState,
l: 4!
Oct 31 11:11:37 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:11:40 cluster1 dlm_controld[1467]: open
"/sys/kernel/dlm/web/control" error -1 2
Oct 31 11:11:40 cluster1 dlm_controld[1467]: open
"/sys/kernel/dlm/web/control" error -1 2
Oct 31 11:11:58 cluster1 crmd[1922]: warning: status_from_rc: Action 47
(WebFS:0_start_0) on cluster2 failed (target: 0 vs. rc: -2): Error
Oct 31 11:11:58 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:11:58 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)
Oct 31 11:12:18 cluster1 crmd[1922]: warning: status_from_rc: Action 3
(WebFS:0_stop_0) on cluster2 failed (target: 0 vs. rc: -2): Error
Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)
Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:12:18 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)
Oct 31 11:15:45 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:15:45 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)
Oct 31 11:15:49 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:15:49 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)
Oct 31 11:15:51 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster1: unknown exec error (-2)
Oct 31 11:15:51 cluster1 pengine[1921]: warning: unpack_rsc_op: Processing
failed op WebFS:0_last_failure_0 on cluster2: unknown exec error (-2)
Oct 31 11:40:41 cluster1 kernel: block drbd1: error receiving ReportState,
l: 4!
Oct 31 11:40:44 cluster1 kernel: block drbd1: error receiving ReportState,
l: 4!
Oct 31 11:41:12 cluster1 crmd[1916]: error: process_lrm_event: LRM
operation WebFS:1_start_0 (14) Timed Out (timeout=20000ms)
On Wed, Oct 31, 2012 at 12:15 PM, Soni Maula Harriz <
soni.harriz at sangkuriang.co.id> wrote:
>
>
> On Tue, Oct 30, 2012 at 12:20 PM, Andrew Beekhof <andrew at beekhof.net>wrote:
>
>> On Mon, Oct 29, 2012 at 4:22 PM, Soni Maula Harriz
>> <soni.harriz at sangkuriang.co.id> wrote:
>> > dear all,
>> > i configure pacemaker and corosync on 2 Centos 6.3 servers by following
>> > instruction on 'Cluster from Scratch'.
>> > on the beginning, i follow 'Cluster from Scratch' edition 5. but, since
>> i
>> > use centos, i change to 'Cluster from Scratch' edition 3 to configure
>> > active/active servers.
>> > Now on 1st server (cluster1), the Filesystem resource cannot start. the
>> gfs2
>> > filesystem can't be mounted.
>> >
>> > this is the crm configuration
>> > [root at cluster2 ~]# crm configure show
>> > node cluster1 \
>> > attributes standby="off"
>> > node cluster2 \
>> > attributes standby="off"
>> > primitive ClusterIP ocf:heartbeat:IPaddr2 \
>> > params ip="xxx.xxx.xxx.229" cidr_netmask="32"
>> clusterip_hash="sourceip"
>> > \
>> > op monitor interval="30s"
>> > primitive WebData ocf:linbit:drbd \
>> > params drbd_resource="wwwdata" \
>> > op monitor interval="60s"
>> > primitive WebFS ocf:heartbeat:Filesystem \
>> > params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html"
>> > fstype="gfs2"
>> > primitive WebSite ocf:heartbeat:apache \
>> > params configfile="/etc/httpd/conf/httpd.conf"
>> > statusurl="http://localhost/server-status" \
>> > op monitor interval="1min"
>> > ms WebDataClone WebData \
>> > meta master-max="2" master-node-max="1" clone-max="2"
>> clone-node-max="1"
>> > notify="true"
>> > clone WebFSClone WebFS
>> > clone WebIP ClusterIP \
>> > meta globally-unique="true" clone-max="2" clone-node-max="1"
>> > interleave="false"
>> > clone WebSiteClone WebSite \
>> > meta interleave="false"
>> > colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
>> > colocation colocation-WebSite-ClusterIP-INFINITY inf: WebSiteClone WebIP
>> > colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
>> > order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
>> > order WebSite-after-WebFS inf: WebFSClone WebSiteClone
>> > order order-ClusterIP-WebSite-mandatory : WebIP:start WebSiteClone:start
>> > property $id="cib-bootstrap-options" \
>> > dc-version="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14" \
>> > cluster-infrastructure="cman" \
>> > expected-quorum-votes="2" \
>> > stonith-enabled="false" \
>> > no-quorum-policy="ignore"
>> > rsc_defaults $id="rsc-options" \
>> > resource-stickiness="100"
>> >
>> > when i want to mount the filesystem manually, this message appear :
>> > [root at cluster1 ~]# mount /dev/drbd1 /mnt/
>> > mount point already used or other mount in progress
>> > error mounting lockproto lock_dlm
>> >
>> > but when i check the mount, there is no mount from drbd
>>
>>
> This is what the system told me :
>
>
>> what does "ps axf" say? Is there another mount process running?
>>
>
> [root at cluster2 ~]# ps axf
> PID TTY STAT TIME COMMAND
> 2 ? S 0:00 [kthreadd]
> 3 ? S 0:00 \_ [migration/0]
> 4 ? S 0:00 \_ [ksoftirqd/0]
> 5 ? S 0:00 \_ [migration/0]
> 6 ? S 0:00 \_ [watchdog/0]
> 7 ? S 0:03 \_ [events/0]
> 8 ? S 0:00 \_ [cgroup]
> 9 ? S 0:00 \_ [khelper]
> 10 ? S 0:00 \_ [netns]
> 11 ? S 0:00 \_ [async/mgr]
> 12 ? S 0:00 \_ [pm]
> 13 ? S 0:00 \_ [sync_supers]
> 14 ? S 0:00 \_ [bdi-default]
> 15 ? S 0:00 \_ [kintegrityd/0]
> 16 ? S 0:03 \_ [kblockd/0]
> 17 ? S 0:00 \_ [kacpid]
> 18 ? S 0:00 \_ [kacpi_notify]
> 19 ? S 0:00 \_ [kacpi_hotplug]
> 20 ? S 0:00 \_ [ata/0]
> 21 ? S 0:00 \_ [ata_aux]
> 22 ? S 0:00 \_ [ksuspend_usbd]
> 23 ? S 0:00 \_ [khubd]
> 24 ? S 0:00 \_ [kseriod]
> 25 ? S 0:00 \_ [md/0]
> 26 ? S 0:00 \_ [md_misc/0]
> 27 ? S 0:00 \_ [khungtaskd]
> 28 ? S 0:00 \_ [kswapd0]
> 29 ? SN 0:00 \_ [ksmd]
> 30 ? SN 0:00 \_ [khugepaged]
> 31 ? S 0:00 \_ [aio/0]
> 32 ? S 0:00 \_ [crypto/0]
> 37 ? S 0:00 \_ [kthrotld/0]
> 39 ? S 0:00 \_ [kpsmoused]
> 40 ? S 0:00 \_ [usbhid_resumer]
> 71 ? S 0:00 \_ [kstriped]
> 188 ? S 0:00 \_ [scsi_eh_0]
> 190 ? S 0:00 \_ [scsi_eh_1]
> 220 ? S 0:00 \_ [scsi_eh_2]
> 272 ? S 0:00 \_ [kdmflush]
> 273 ? S 0:00 \_ [kdmflush]
> 293 ? S 0:00 \_ [jbd2/dm-0-8]
> 294 ? S 0:00 \_ [ext4-dio-unwrit]
> 853 ? S 0:00 \_ [kdmflush]
> 877 ? S 0:00 \_ [flush-253:0]
> 890 ? S 0:00 \_ [jbd2/sda1-8]
> 891 ? S 0:00 \_ [ext4-dio-unwrit]
> 949 ? S 0:00 \_ [kauditd]
> 1602 ? S 0:00 \_ [rpciod/0]
> 2344 ? S 0:00 \_ [cqueue]
> 2456 ? S 0:00 \_ [drbd1_worker]
> 2831 ? S 0:00 \_ [glock_workqueue]
> 2832 ? S 0:00 \_ [delete_workqueu]
> 2833 ? S< 0:00 \_ [kslowd001]
> 2834 ? S< 0:00 \_ [kslowd000]
> 2846 ? S 0:00 \_ [dlm_astd]
> 2847 ? S 0:00 \_ [dlm_scand]
> 2848 ? S 0:00 \_ [dlm_recv/0]
> 2849 ? S 0:00 \_ [dlm_send]
> 2850 ? S 0:00 \_ [dlm_recoverd]
> 1 ? Ss 0:01 /sbin/init
> 377 ? S<s 0:00 /sbin/udevd -d
> 840 ? S< 0:00 \_ /sbin/udevd -d
> 842 ? S< 0:00 \_ /sbin/udevd -d
> 1182 ? S<sl 0:00 auditd
> 1208 ? Sl 0:00 /sbin/rsyslogd -i /var/run/syslogd.pid -c 5
> 1250 ? Ss 0:00 rpcbind
> 1351 ? SLsl 0:06 corosync -f
> 1394 ? Ssl 0:00 fenced
> 1420 ? Ssl 0:00 dlm_controld
> 1467 ? Ssl 0:00 gfs_controld
> 1539 ? Ss 0:00 dbus-daemon --system
> 1550 ? S 0:00 avahi-daemon: running [cluster2.local]
> 1551 ? Ss 0:00 \_ avahi-daemon: chroot helper
> 1568 ? Ss 0:00 rpc.statd
> 1606 ? Ss 0:00 rpc.idmapd
> 1616 ? Ss 0:00 cupsd -C /etc/cups/cupsd.conf
> 1641 ? Ss 0:00 /usr/sbin/acpid
> 1650 ? Ss 0:00 hald
> 1651 ? S 0:00 \_ hald-runner
> 1692 ? S 0:00 \_ hald-addon-input: Listening on
> /dev/input/event3 /dev/input/event1 /dev/input/event0
> 1695 ? S 0:00 \_ hald-addon-acpi: listening on acpid
> socket /var/run/acpid.socket
> 1715 ? Ssl 0:00 automount --pid-file /var/run/autofs.pid
> 1740 ? Ss 0:00 /usr/sbin/sshd
> 1979 ? Ss 0:00 \_ sshd: root at pts/0
> 2207 pts/0 Ss 0:00 \_ -bash
> 8528 pts/0 R+ 0:00 \_ ps axf
> 1748 ? Ss 0:00 ntpd -u ntp:ntp -p /var/run/ntpd.pid -g
> 1828 ? Ss 0:00 /usr/libexec/postfix/master
> 1834 ? S 0:00 \_ pickup -l -t fifo -u
> 1835 ? S 0:00 \_ qmgr -l -t fifo -u
> 1852 ? Ss 0:00 /usr/sbin/abrtd
> 1860 ? Ss 0:00 abrt-dump-oops -d /var/spool/abrt -rwx
> /var/log/messages
> 1890 ? Ss 0:00 crond
> 1901 ? Ss 0:00 /usr/sbin/atd
> 1913 ? Ss 0:00 /usr/sbin/certmonger -S -p
> /var/run/certmonger.pid
> 1939 ? S 0:00 pacemakerd
> 1943 ? Ss 0:02 \_ /usr/libexec/pacemaker/cib
> 1944 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd
> 1945 ? Ss 0:01 \_ /usr/lib64/heartbeat/lrmd
> 1946 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd
> 1947 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine
> 1948 ? Ss 0:01 \_ /usr/libexec/pacemaker/crmd
> 2005 ? Ss 0:00 /usr/sbin/gdm-binary -nodaemon
> 2136 ? S 0:00 \_ /usr/libexec/gdm-simple-slave --display-id
> /org/gnome/DisplayManager/Display1 --force-active-vt
> 2157 tty1 Ss+ 0:02 \_ /usr/bin/Xorg :0 -nr -verbose -audit 4
> -auth /var/run/gdm/auth-for-gdm-nrpPGF/database -nolisten tcp vt1
> 2485 ? Ssl 0:00 \_ /usr/bin/gnome-session
> --autostart=/usr/share/gdm/autostart/LoginWindow/
> 2595 ? S 0:00 | \_ /usr/libexec/at-spi-registryd
> 2683 ? S 0:00 | \_ metacity
> 2705 ? S 0:00 | \_ gnome-power-manager
> 2706 ? S 0:00 | \_ /usr/libexec/gdm-simple-greeter
> 2708 ? S 0:00 | \_
> /usr/libexec/polkit-gnome-authentication-agent-1
> 2788 ? S 0:00 \_ pam: gdm-password
> 2028 tty2 Ss+ 0:00 /sbin/mingetty /dev/tty2
> 2037 tty3 Ss+ 0:00 /sbin/mingetty /dev/tty3
> 2050 tty4 Ss+ 0:00 /sbin/mingetty /dev/tty4
> 2062 tty5 Ss+ 0:00 /sbin/mingetty /dev/tty5
> 2071 tty6 Ss+ 0:00 /sbin/mingetty /dev/tty6
> 2346 ? Sl 0:00 /usr/sbin/console-kit-daemon --no-daemon
> 2474 ? S 0:00 /usr/bin/dbus-launch --exit-with-session
> 2482 ? Ss 0:00 /bin/dbus-daemon --fork --print-pid 5
> --print-address 7 --session
> 2527 ? S 0:00 /usr/libexec/devkit-power-daemon
> 2546 ? S 0:00 /usr/libexec/gconfd-2
> 2609 ? Ssl 0:00 /usr/libexec/gnome-settings-daemon
> --gconf-prefix=/apps/gdm/simple-greeter/settings-manager-plugins
> 2615 ? Ssl 0:00 /usr/libexec/bonobo-activation-server
> --ac-activate --ior-output-fd=12
> 2672 ? S 0:00 /usr/libexec/gvfsd
> 2728 ? S 0:00 /usr/libexec/polkit-1/polkitd
> 2744 ? S<sl 0:00 /usr/bin/pulseaudio --start --log-target=syslog
> 2748 ? SNl 0:00 /usr/libexec/rtkit-daemon
> 2843 ? D 0:00 /sbin/mount.gfs2 /dev/drbd1 /var/www/html -o rw
> 3049 ? D 0:00 blockdev --flushbufs /dev/drbd/by-res/wwwdata
> 7881 ? Ss 0:00 /usr/sbin/anacron -s
>
>
>
>> Did crm_mon report any errors?
>
>
> [root at cluster2 ~]# crm status
> ============
> Last updated: Wed Oct 31 12:10:31 2012
> Last change: Mon Oct 29 17:01:09 2012 via cibadmin on cluster1
> Stack: cman
> Current DC: cluster2 - partition with quorum
> Version: 1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14
> 2 Nodes configured, 2 expected votes
> 8 Resources configured.
> ============
>
> Online: [ cluster1 cluster2 ]
>
> Master/Slave Set: WebDataClone [WebData]
> Masters: [ cluster1 cluster2 ]
> Clone Set: WebIP [ClusterIP] (unique)
> ClusterIP:0 (ocf::heartbeat:IPaddr2): Started cluster1
> ClusterIP:1 (ocf::heartbeat:IPaddr2): Started cluster2
> Clone Set: WebFSClone [WebFS]
> WebFS:0 (ocf::heartbeat:Filesystem): Started cluster2
> (unmanaged) FAILED
> Stopped: [ WebFS:1 ]
>
> Failed actions:
> WebFS:1_start_0 (node=cluster1, call=14, rc=-2, status=Timed Out):
> unknown exec error
> WebFS:0_stop_0 (node=cluster2, call=16, rc=-2, status=Timed Out):
> unknown exec error
>
>
>
>> Did you check the system logs?
>>
>
> [root at cluster2 ~]# crm_verify -L -V
> warning: unpack_rsc_op: Processing failed op WebFS:1_last_failure_0
> on cluster1: unknown exec error (-2)
> warning: unpack_rsc_op: Processing failed op WebFS:0_last_failure_0
> on cluster2: unknown exec error (-2)
> warning: common_apply_stickiness: Forcing WebFSClone away from
> cluster1 after 1000000 failures (max=1000000)
> warning: common_apply_stickiness: Forcing WebFSClone away from
> cluster1 after 1000000 failures (max=1000000)
> warning: common_apply_stickiness: Forcing WebFSClone away from
> cluster2 after 1000000 failures (max=1000000)
> warning: common_apply_stickiness: Forcing WebFSClone away from
> cluster2 after 1000000 failures (max=1000000)
> warning: should_dump_input: Ignoring requirement that WebFS:0_stop_0
> comeplete before WebFSClone_stopped_0: unmanaged failed resources cannot
> prevent clone shutdown
>
> [root at cluster2 ~]# grep -i error /var/log/messages
> Oct 31 11:12:25 cluster2 kernel: block drbd1: error receiving ReportState,
> l: 4!
> Oct 31 11:12:29 cluster2 kernel: block drbd1: error receiving ReportState,
> l: 4!
> Oct 31 11:12:56 cluster2 crmd[1948]: error: process_lrm_event: LRM
> operation WebFS:0_start_0 (15) Timed Out (timeout=20000ms)
> Oct 31 11:13:17 cluster2 crmd[1948]: error: process_lrm_event: LRM
> operation WebFS:0_stop_0 (16) Timed Out (timeout=20000ms)
> Oct 31 11:15:51 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:16:16 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:31:16 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:39:05 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:39:30 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:39:42 cluster2 kernel: block drbd1: error receiving ReportState,
> l: 4!
> Oct 31 11:39:44 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:39:44 cluster2 kernel: block drbd1: error receiving ReportState,
> l: 4!
> Oct 31 11:39:53 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:40:13 cluster2 crmd[1948]: warning: status_from_rc: Action 49
> (WebFS:1_start_0) on cluster1 failed (target: 0 vs. rc: -2): Error
> Oct 31 11:40:13 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error
> (-2)
> Oct 31 11:40:13 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:40:14 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error
> (-2)
> Oct 31 11:40:14 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 11:55:15 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error
> (-2)
> Oct 31 11:55:15 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
> Oct 31 12:10:15 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:1_last_failure_0 on cluster1: unknown exec error
> (-2)
> Oct 31 12:10:15 cluster2 pengine[1947]: warning: unpack_rsc_op:
> Processing failed op WebFS:0_last_failure_0 on cluster2: unknown exec error
> (-2)
>
>
>
>> >
>> > there is another strange thing, the 1st server (cluster1) cannot
>> reboot. it
>> > hangs with message 'please standby while rebooting the system'. in the
>> > reboot process, there are 2 failed action which is related to fencing. i
>> > didn't configure any fencing yet. one of the failed action is :
>> > 'stopping cluster
>> > leaving fence domain .... found dlm lockspace /sys/kernel/dlm/web
>> > fence_tool : cannot leave due to active system [FAILED]'
>> >
>> > please help me with this problem
>> >
>> > --
>> > Best Regards,
>> >
>> > Soni Maula Harriz
>> > Database Administrator
>> > PT. Data Aksara Sangkuriang
>> >
>> >
>> > _______________________________________________
>> > Pacemaker mailing list: Pacemaker at oss.clusterlabs.org
>> > http://oss.clusterlabs.org/mailman/listinfo/pacemaker
>> >
>> > Project Home: http://www.clusterlabs.org
>> > Getting started:
>> http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
>> > Bugs: http://bugs.clusterlabs.org
>> >
>>
>> _______________________________________________
>> Pacemaker mailing list: Pacemaker at oss.clusterlabs.org
>> http://oss.clusterlabs.org/mailman/listinfo/pacemaker
>>
>> Project Home: http://www.clusterlabs.org
>> Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
>> Bugs: http://bugs.clusterlabs.org
>>
>
>
>
> --
> Best Regards,
>
> Soni Maula Harriz
> Database Administrator
> PT. Data Aksara Sangkuriang
>
>
--
Best Regards,
Soni Maula Harriz
Database Administrator
PT. Data Aksara Sangkuriang
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.clusterlabs.org/pipermail/pacemaker/attachments/20121031/d21086af/attachment.htm>
More information about the Pacemaker
mailing list