[Pacemaker] Corosync/Pacemaker cluster configuration for Postgresql

Mon Feb 10 19:19:49 EST 2014

Hi,
Good Day!!
Initially I have two node Postgresql Master/Slave replication in
Vmware-ESXi environment and I plan to setup auto-failover or switch over
the virtual-ip & promote slave as master incase active Master down.

Using single network interface for all (postgresql replication, corosync
communication, other network connectivity). I am not sure due to this
corosync having issue.

Attached herewith the configuration and log file for your concern. Kindly
advise me what i am missing?

I followed the following links

http://clusterlabs.org/wiki/PgSQL_Replicated_Cluster

https://github.com/t-matsuo/resource-agents/wiki/Resource-Agent-for-PostgreSQL-9.1-streaming-replication

Thanks

Regards
Ramanan(Yee)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/pacemaker/attachments/20140211/aab3e9df/attachment-0002.html>
-------------- next part --------------
corosync.conf
====================
compatibility: whitetank
totem {
        version: 2
        token: 3000
        token_retransmits_before_loss_const: 10
        join: 60
        consensus: 3600
        vsftype: none
        max_messages: 20
        secauth: off
        threads: 0
        clear_node_high_bit: yes
        rrp_mode: none
        interface {
                 ringnumber: 0
                 bindnetaddr: 192.168.0.0
                 mcastaddr: 224.0.0.1
                 mcastport: 5405
                 ttl: 1
                 }
         }

quorum {
        # Quorum for the Pacemaker Cluster Resource Manager
        provider: corosync_votequorum
        expected_votes: 1
}

aisexec {
        user:   root
        group:  root
}

logging {
        fileline: off
        to_stderr: no
        to_logfile: yes
        to_syslog: yes
        syslog_facility: daemon
        logfile: /var/log/corosync/corosync.log
        debug: on
        timestamp: on
        logger_subsys {
                subsys: AMF
                debug: off
                tags: enter|leave|trace1|trace2|trace3|trace4|trace6
                }
        }

 amf {
     mode: disabled
     }
====================

=====================================================================
node $id="822257856" UCS06-RDB-01
node $id="906143936" UCS07-RDB-02

primitive VIPv4 ocf:heartbeat:IPaddr2 \
        params ip="192.168.101.155" nic="eth0" cidr_netmask="24" \
        op start interval="0s" timeout="60s" \
        op monitor interval="20s" timeout="20s" on-fail="restart" \
        op stop interval="0s" timeout="60s" \
        meta target-role="Started"

primitive pgsql ocf:heartbeat:pgsql \
        params pgctl="/usr/lib/postgresql/9.2/bin/pg_ctl" psql="/usr/bin/psql" pgdata="/var/lib/postgresql/9.2/main" rep_mode="sync" node_list="UCS06-RDB-01 UCS07-RDB-02" restore_command="cp /var/lib/postgresql/9.2/archive/%f %p" primary_conninfo_opt="keepalives_idle=60 keepalives_interval=5 keepalives_count=5" stop_escalate="0" \
        op start interval="0s" timeout="120s" on-fail="restart" \
        op monitor interval="7s" timeout="60s" on-fail="restart" \
        op monitor interval="2s" role="Master" timeout="60s" on-fail="restart" \
        op promote interval="0s" timeout="120s" on-fail="restart" \
        op demote interval="0s" timeout="120s" on-fail="stop" \
        op stop interval="0s" timeout="120s" on-fail="block" \
        op notify interval="0s" timeout="90s"

primitive pingCheck ocf:pacemaker:ping \
        params host_list="192.168.101.206 192.168.101.151" multiplier="100" dampen="5s" \
        op start interval="0" timeout="90s" \
        op monitor interval="10s" timeout="60s" \
        op stop interval="0" timeout="100s"

ms msPostgresql pgsql \
        meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" target-role="Started"

clone clnPingCheck pingCheck \
        meta target-role="Started"

location rsc_location-1 msPostgresql \
        rule $id="rsc_location-1-rule" 200: pgsql-status eq HS:sync \
        rule $id="rsc_location-1-rule-0" 100: pgsql-status eq PRI \
        rule $id="rsc_location-1-rule-1" -inf: not_defined pgsql-status \
        rule $id="rsc_location-1-rule-2" -inf: pgsql-status ne HS:sync and pgsql-status ne PRI \
        rule $id="rsc_location-1-rule-3" 50: #uname eq UCS06-RDB-01

location rsc_location-2 VIPv4 \
        rule $id="rsc_location-2-rule" -inf: not_defined ping or ping lt 0 \
        rule $id="rsc_location-2-rule-1" 50: #uname eq UCS06-RDB-01

colocation rsc_colocation-1 inf: msPostgresql clnPingCheck
colocation rsc_colocation-2 inf: VIPv4 msPostgresql:Master

order rsc_order-1 0: clnPingCheck VIPv4 msPostgresql symmetrical=false
order rsc_order-2 0: msPostgresql:promote VIPv4:start symmetrical=false
order rsc_order-3 0: msPostgresql:demote VIPv4:stop symmetrical=false

property $id="cib-bootstrap-options" \
        dc-version="1.1.10-42f2063" \
        cluster-infrastructure="corosync" \
        stonith-enabled="false" \
        no-quorum-policy="ignore" \
        last-lrm-refresh="1392049060"

rsc_defaults $id="rsc-options" \
        resource-stickiness="INFINITY" \
        migration-threshold="1"
==================================================================================

crm(live)# status
Last updated: Tue Feb 11 08:06:54 2014
Last change: Tue Feb 11 01:04:58 2014 via crmd on UCS07-RDB-02
Stack: corosync
Current DC: UCS06-RDB-01 (822257856) - partition with quorum
Version: 1.1.10-42f2063
2 Nodes configured
5 Resources configured

Online: [ UCS06-RDB-01 UCS07-RDB-02 ]

 Clone Set: clnPingCheck [pingCheck]
     Started: [ UCS06-RDB-01 UCS07-RDB-02 ]
====================================================================================

crm(live)resource# status
 Clone Set: clnPingCheck [pingCheck]
     Started: [ UCS06-RDB-01 UCS07-RDB-02 ]
 VIPv4  (ocf::heartbeat:IPaddr2):       Stopped
 Master/Slave Set: msPostgresql [pgsql]
     Stopped: [ UCS06-RDB-01 UCS07-RDB-02 ]
=====================================================================================

-------------- next part --------------
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: unpack_rsc_op:       Operation monitor found resource pingCheck:1 active on UCS07-RDB-02
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: find_anonymous_clone:        Internally renamed pgsql on UCS07-RDB-02 to pgsql:0
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: clone_print:          Clone Set: clnPingCheck [pingCheck]
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_active:       Resource pingCheck:0 active on UCS06-RDB-01
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_active:       Resource pingCheck:0 active on UCS06-RDB-01
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_active:       Resource pingCheck:1 active on UCS07-RDB-02
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_active:       Resource pingCheck:1 active on UCS07-RDB-02
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: short_print:              Started: [ UCS06-RDB-01 UCS07-RDB-02 ]
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: native_print:        VIPv4   (ocf::heartbeat:IPaddr2):       Stopped
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: clone_print:          Master/Slave Set: msPostgresql [pgsql]
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: short_print:              Stopped: [ UCS06-RDB-01 UCS07-RDB-02 ]
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: common_apply_stickiness:     Resource pingCheck:0: preferring current location (node=UCS06-RDB-01, weight=1)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: common_apply_stickiness:     Resource pingCheck:1: preferring current location (node=UCS07-RDB-02, weight=1)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  Assigning UCS06-RDB-01 to pingCheck:0
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  Assigning UCS07-RDB-02 to pingCheck:1
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: clone_color:         Allocated 2 clnPingCheck instances of a possible 2
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: rsc_merge_weights:   msPostgresql: Rolling back scores from VIPv4
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  All nodes for resource pgsql:0 are unavailable, unclean or shutting down (UCS06-RDB-01: 1, -1000000)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  Could not allocate a node for pgsql:0
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: native_color:        Resource pgsql:0 cannot run anywhere
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  All nodes for resource pgsql:1 are unavailable, unclean or shutting down (UCS06-RDB-01: 1, -1000000)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  Could not allocate a node for pgsql:1
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: native_color:        Resource pgsql:1 cannot run anywhere
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: clone_color:         Allocated 0 msPostgresql instances of a possible 2
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: rsc_merge_weights:   msPostgresql: Rolling back scores from VIPv4
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: master_color:        pgsql:0 master score: 0
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: master_color:        pgsql:1 master score: 0
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: master_color:        msPostgresql: Promoted 0 instances of a possible 1 to master
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  All nodes for resource VIPv4 are unavailable, unclean or shutting down (UCS06-RDB-01: 1, -1000000)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: native_assign_node:  Could not allocate a node for VIPv4
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: native_color:        Resource VIPv4 cannot run anywhere
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:    debug: master_create_actions:       Creating actions for msPostgresql
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: LogActions:  Leave   pingCheck:0     (Started UCS06-RDB-01)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: LogActions:  Leave   pingCheck:1     (Started UCS07-RDB-02)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: LogActions:  Leave   VIPv4   (Stopped)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: LogActions:  Leave   pgsql:0 (Stopped)
Feb 11 08:04:58 [17884] UCS06-RDB-01    pengine:     info: LogActions:  Leave   pgsql:1 (Stopped)