[Pacemaker] cluster doesn't failover

Fri Oct 30 13:36:07 UTC 2009

Hello,

I'm trying to setup a cluster for shared storage with pacemaker, drbd, and
nfs.
I use two server and there is two network interface one each server, eth0
connected to the network, and eth1 direct link betwene the two node for drbd
replication,

                 Eth1: replication link
 --------------      ----------------
 | Server 1   |------| Server 2     |
 --------------      ----------------
      |                    |
     Eth0                 eth0
      |                    |  
         External Network
IPs
- server1 (storage01): eth0:10.1.1.70 eth1:192.168.0.2
- server2 (storage02): eth0:10.1.1.71 eth1:192.168.0.3

I can migrate the resource between the to node, but the problem is when i
make a hard power-off of server01 ( the server where is running the
resource) the second server (server02) doesn't make failover. (the resource
doesn't start)
May you can take a look to my config file in the following

Thanks for your help

Thomas Schneider

Drbd.conf
------------------------------------------------------------------------
global {
        usage-count no;
}

common {
        syncer { rate 35M; }
}

resource nfs {
        protocol C;
        handlers {
                fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
                after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
        }
        startup {
                degr-wfc-timeout 120;    # 2 minutes.
        }
        disk {
                on-io-error   detach;
                fencing resource-only;
        }
        net {
                max-buffers     2048;
                max-epoch-size  2048;
                }
        syncer {
                rate 35M;
                al-extents 257;
        }
        on storage01.myriapulse.local {
                device     /dev/drbd0;
                disk       /dev/sda4;
                address    192.168.0.2:7788;
                meta-disk  /dev/sda3[0];
        }
        on storage02.myriapulse.local {
                device    /dev/drbd0;
                disk      /dev/sda4;
                address   192.168.0.3:7788;
                meta-disk /dev/sda3[0];
        }
}
----------------------------------------------------------------------------
----------

storage01:~# crm configure show
node storage01.myriapulse.local \
        attributes standby="off"
node storage02.myriapulse.local \
        attributes standby="off"
primitive drbd_nfs ocf:linbit:drbd \
        params drbd_resource="nfs" \
        op monitor interval="15s" \
        meta target-role="Started"
primitive fs_nfs ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/nfs" directory="/share"
fstype="ext3" \
        meta is-managed="true"
primitive ftp-server lsb:proftpd \
        op monitor interval="1min"
primitive ip_nfs ocf:heartbeat:IPaddr2 \
        params ip="10.1.1.69" nic="eth0"
primitive nfs-kernel-server lsb:nfs-kernel-server \
        op monitor interval="1min"
group nfs fs_nfs ip_nfs nfs-kernel-server \
        meta target-role="Started"
ms ms_drbd_nfs drbd_nfs \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
location cli-standby-nfs nfs \
        rule $id="cli-standby-rule-nfs" -inf: #uname eq
storage02.myriapulse.local
colocation ftp_on_nfs inf: ftp-server nfs
colocation nfs_on_drbd inf: nfs ms_drbd_nfs:Master
order ftp_after_nfs inf: nfs ftp-server
order nfs_after_drbd inf: ms_drbd_nfs:promote nfs:start
property $id="cib-bootstrap-options" \
        dc-version="1.0.5-unknown" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        no-quorum-policy="ignore" \
        stonith-enabled="false"
storage01:~#
----------------------------------------------------------------------
Openais.conf

storage02:~# cat /etc/openais/openais.conf
# Please read the openais.conf.5 manual page

totem {
        version: 2

        # How long before declaring a token lost (ms)
        token: 3000

        # How many token retransmits before forming a new configuration
        token_retransmits_before_loss_const: 10

        # How long to wait for join messages in the membership protocol (ms)
        join: 60

        # How long to wait for consensus to be achieved before starting a
new round of membership configuration (ms)
        consensus: 1500

        # Turn off the virtual synchrony filter
        vsftype: none

        # Number of messages that may be sent by one processor on receipt of
the token
        max_messages: 20

        # Limit generated nodeids to 31-bits (positive signed integers)
        clear_node_high_bit: yes

        # Disable encryption
        secauth: on

        # How many threads to use for encryption/decryption
        threads: 0

        # Optionally assign a fixed node id (integer)
        # nodeid: 1234

        # This specifies the mode of redundant ring, which may be none,
active, or passive.
        rrp_mode: passive

        interface {
                # The following values need to be set based on your
environment
                ringnumber: 0
                bindnetaddr: 10.1.1.0
                mcastaddr: 226.94.1.1
                mcastport: 5405
        }
        interface {
                # The following values need to be set based on your
environment
                ringnumber: 1
                bindnetaddr: 192.168.0.0
                mcastaddr: 226.94.2.1
                mcastport: 5405
        }
}

logging {
        to_stderr: yes
        debug: off
        timestamp: on
        to_file: /var/log/openais/openais.log
        to_syslog: yes
        syslog_facility: daemon
}

amf {
        mode: disabled
}

service {
        # Load the Pacemaker Cluster Resource Manager
        ver:       0
        name:      pacemaker
        use_mgmtd: yes
}

aisexec {
        user:   root
        group:  root
}