[Pacemaker] DRBD Outdated by Heartbeat/Pacemaker - node alive don't get Primary

Mon Aug 30 10:27:02 UTC 2010

Hi pacemaker group,

I am using Debian 5.0.5 Lenny, DRBD 8.3.7, Heartbeat 3.0.3 (backports),
pacemaker 1.0.9 (backports)

I have a problem with putting nodes in standby mode, or shutting down one
node :

When one node is offline or in standby (crm node standby), the other one
goes slave and DRBD gets secondary / outdated :

#crm_mon
============
Last updated: Mon Aug 30 11:50:45 2010
Stack: Heartbeat
Current DC: swmaster1 (2cd4bf30-7a63-4da7-9102-b4f49d91b9d0) - partition
with quorum
Version: 1.0.9-unknown
2 Nodes configured, unknown expected votes
2 Resources configured.
============

Online: [ swmaster1 ]
OFFLINE: [ swslave1 ]

 Master/Slave Set: ms_drbd_mysql
     Slaves: [ swmaster1 ]
     Stopped: [ drbd_mysql:0 ]

_________________________________

SWMaster1:~# cat /proc/drbd
version: 8.3.7 (api:88/proto:86-91)
built-in
1: cs:WFConnection ro:Secondary/Unknown ds:Outdated/DUnknown C r----
    ns:1104 nr:744 dw:1944 dr:67439479 al:44 bm:67 lo:0 pe:0 ua:0 ap:0 ep:1
wo:b oos:64

____________________________________________________________________________
___________

When both nodes are online, everything is ok, and I can switch resources
using 'crm resource migrate grp_mysql'  :

============
Last updated: Mon Aug 30 11:57:09 2010
Stack: Heartbeat
Current DC: swmaster1 (2cd4bf30-7a63-4da7-9102-b4f49d91b9d0) - partition
with quorum
Version: 1.0.9-unknown
2 Nodes configured, unknown expected votes
2 Resources configured.
============

Online: [ swslave1 swmaster1 ]

 Master/Slave Set: ms_drbd_mysql
     Masters: [ swmaster1 ]
     Slaves: [ swslave1 ]
 Resource Group: grp_mysql
     fs_mysql   (ocf::heartbeat:Filesystem):    Started swmaster1
     mysqld     (lsb:mysql):    Started swmaster1

_________________________________

Reconnecting...SWMaster1:~# cat /proc/drbd
version: 8.3.7 (api:88/proto:86-91)
built-in
1: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
    ns:1520 nr:1136 dw:2704 dr:67449610 al:50 bm:79 lo:0 pe:0 ua:0 ap:0 ep:1
wo:b oos:0

____________________________________________________________________________
___________

Instead of having an HA infrastructure, I have a LA :).

When I use DRBD manually and shutting down hearbeat (/etc/init.d/heartbeat
stop), I can stop DRBD on one side and the other node stay in update state,
so I can put it primary (drbdadm primary all).

How can I do to make understand Heartbeat/Pacemaker not to put DRBD in
Outdated state and make it putting services/resources on the other node ?

Here are my configurations :

SWMaster1:~# crm configure show
node $id="2cd4bf30-7a63-4da7-9102-b4f49d91b9d0" swmaster1 \
        attributes standby="off"
node $id="e022eabd-ef7b-4049-b941-fc26d00c5cd1" swslave1 \
        attributes standby="off"
primitive drbd_mysql ocf:linbit:drbd \
        params drbd_resource="mysql" \
        op monitor interval="15s"
primitive fs_mysql ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/mysql" directory="/var/lib/mysql"
fstype="ext3"
primitive mysqld lsb:mysql
group grp_mysql fs_mysql mysqld
ms ms_drbd_mysql drbd_mysql \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
location cli-prefer-mysqld mysqld \
        rule $id="cli-prefer-rule-mysqld" inf: #uname eq swmaster1
location cli-standby-grp_mysql grp_mysql \
        rule $id="cli-standby-rule-grp_mysql" -inf: #uname eq swslave1
colocation mysql_on_drbd inf: grp_mysql ms_drbd_mysql:Master
order mysql_after_drbd inf: ms_drbd_mysql:promote grp_mysql:start
property $id="cib-bootstrap-options" \
        dc-version="1.0.9-unknown" \
        cluster-infrastructure="Heartbeat" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"

____________________________________________________________________________
___________

SWMaster1:~# cat /etc/ha.d/ha.cf

use_logd on

autojoin none

node SWMaster1
node SWSlave1

crm yes

compression bz2

warntime 10
deadtime 40
initdead 60

msgfmt netstring

ucast eth0 ip.serv.mas.ter
ucast eth0 ip.serv.sla.ve

____________________________________________________________________________
___________

cat /etc/drbd.conf

global {
        usage-count yes;
}

common {
        protocol C;
        syncer          {
                                #algorithme a utiliser et activation de la
possibilite verification de synchronisation on-line - drbdadm verify
[ressource|all]
                                verify-alg sha1;

                                #comparaison de blocs par checksum pour
verifier necessite ecriture
                                csums-alg sha1;

                                #vitesse de synchronisation - drbdsetup
/dev/drbdnum syncer -r 10M
                                rate 7M;
                        }

        disk            {
                                on-io-error detach;
                        }

        net             {
                                #
http://www.drbd.org/users-guide-emb/s-integrity-check.html
                                data-integrity-alg sha1;

                                after-sb-0pri discard-least-changes;
                                after-sb-1pri discard-secondary;
                                #after-sb-2pri
                        }

        handlers        {
                                # envoi d un mail si split-brain
                                split-brain
"/usr/lib/drbd/notify-split-brain.sh root";
                        }
}

resource mysql {
        on SWMaster1 {
                device /dev/drbd1;
                disk /dev/md2;
                address ip.serv.mas.ter:7789;
                meta-disk internal;
        }
        on SWSlave1 {
                device /dev/drbd1;
                disk /dev/vda6;
                address ip.serv.sla.ve:7789;
                meta-disk internal;
        }
}

There must be something I don't understand or I made a mistake in the
configuration.

Thanks for help,

Raphael LOUIS