[Pacemaker] Manual fsck required on passive node upon failover

Thu Dec 10 16:01:17 UTC 2009

I installed a two-node cluster following this link on clusterlabs.org
http://www.howtoforge.com/installation-and-setup-guide-for-drbd-openais-pacemaker-xen-on-opensuse-11.1
The guide is for OpenSuse but I did it on Centos 5 as all the packages are 
available there.

Basically what I did so far is a Xen guest instance sitting on the 
DRBD-backed file system. Everything seems to be working fine except for one 
thing - When I shutdown openais (service openais stop) on the active node 
for failover testing, the passive node was trying to take over all the 
resources but got stuck at the point of file system resource. Logs show it 
required file system check thus manual intervention is required to bring it 
online. However if I do 'crm resource move xen_rsc passive_node' the 
failover transits fine every time.

Can I borrow someone's sharp eyses and give me a clue what might be causing 
this?

[root at ilo141 ~]# crm configure show
node ilo141 \
        attributes standby="off"
node ilo142 \
        attributes standby="off"
primitive drbd_xen ocf:linbit:drbd \
        params drbd_resource="r0" \
        op monitor interval="15s"
primitive ns1 ocf:heartbeat:Xen \
        params xmfile="/xen/ns1" \
        op monitor interval="10s" \
        op start interval="0s" timeout="30s" \
        op stop interval="0s" timeout="300s" \
        meta target-role="Started"
primitive xen_fs ocf:heartbeat:Filesystem \
        params device="/dev/drbd0" directory="/xen" \
        meta target-role="Started"
ms ms_drbd_xen drbd_xen \
        meta master-max="1" master-node-max="1" clone-max="2" 
clone-node-max="1" notify="true"
location cli-prefer-ns1 ns1 \
        rule $id="cli-prefer-rule-ns1" inf: #uname eq ilo142
colocation fs_on_drbd inf: xen_fs ms_drbd_xen:Master
colocation ns1-with-xen_fs inf: ns1 xen_fs
order fs_after_drbd inf: ms_drbd_xen:promote xen_fs:start
order ns1-after-xen_fs inf: xen_fs:start ns1:start
property $id="cib-bootstrap-options" \
        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        no-quorum-policy="ignore" \
        stonith-enabled="false" \
        default-resource-stickiness="1000" \
        last-lrm-refresh="1260156983"

cat /etc/drbd.conf

global {
    usage-count yes;
}
common {
  syncer { rate 100M; }
}
resource r0 {
  protocol C;
  handlers {
    pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; 
/usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; 
reboot -f";
    pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; 
/usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; 
reboot -f";
    local-io-error "/usr/lib/drbd/notify-io-error.sh; 
/usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; 
halt -f";
    fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
    after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
  }
  startup {
  }
  disk {
    on-io-error   detach;
    fencing resource-only;
  }
  net {
    allow-two-primaries;
    after-sb-0pri disconnect;
    after-sb-1pri disconnect;
    after-sb-2pri disconnect;
    rr-conflict disconnect;
  }
  syncer {
    rate 100M;
    al-extents 257;
  }
  on ilo142 {
    device     /dev/drbd0;
    disk       /dev/VolGroup00/drbdr0;
    address    172.16.1.2:7788;
    meta-disk  internal;
  }
  on ilo141 {
    device    /dev/drbd0;
    disk       /dev/VolGroup00/drbdr0;
    address    172.16.1.1:7788;
    meta-disk internal;
  }
}

Thanks,
Daniel