[Pacemaker] new RA: http_ping

Dejan Muhamedagic dejanmm at fastmail.fm
Thu Aug 16 05:14:32 EDT 2012


Hi,

On Wed, Aug 15, 2012 at 12:53:33PM +0000, Nicolai Langfeldt wrote:
> Hi,
> 
> I've written a new RA based on what I learnt from the ping and nginx RAs
> for monitoring frontend-proxy-stacks.
> 
> It is attached here for your consideration - and indeed - critique.  I
> am hopeful that it makes it into the pacemaker distribution some time.

Did you consider using the existing monitor facility in the
apache RA? It can be sourced from
/usr/lib/ocf/lib/heartbeat/http-mon.sh
Somebody was already up to this, but it seems like they gave up.
More details here:
https://github.com/ClusterLabs/resource-agents/pull/22

Thanks,

Dejan

> Regards,
>   Nicolai

> #!/bin/sh
> #
> #	High-Availability httpd daemon monitoring OCF agent
> # 
> # nginx
> #
> # Description:	monitors http servers (no start, no stop).
> #
> # Author:       Nicolai Langfeldt, Broadnet AS
> #
> #               Started out as nginx agent.  Heavily repurposed.
> #
> # Nginx RA lists these authors: 
> #        	Alan Robertson
> #		Dejan Muhamedagic
> 
> #
> # Support:	linux-ha at lists.linux-ha.org
> #
> # License:	GNU General Public License (GPL)
> #
> # Copyright:
> #	Some parts (C) 2012 Broadnet AS
> #	Some other parts (C) 2002-2010 International Business Machines
> # 
> #
> # Patches are being accepted ;-)
> #
> # Requires *curl*, wget and GET are not sane/flexible enough.
> #
> # Usage example:
> #
> #  N-node proxy cluster.  Pacemaker manages production virtual IP
> #  (vip).  HAproxy started by init script on all N nodes.  HAproxy is
> #  used several times in the frontend stack and is needed on all nodes
> #  at all times for load distribution between the proxies.
> #     
> #  Production VIP must never be started on a node where HAproxy is not
> #  running but can run on any node where HAproxy does run.
> #
> #  My solution: Create this monitoring agent inspired by the ping and
> #  nginx agents and use it the same way as the ping agent to controll
> #  where the VIP agent can be run.
> 
> # NOTE: This agent will not start or stop the resource.  It is assumed
> #  that the resource is mananged by init script and warnings about
> #  failures are sent by something else (like nagios).
> 
> # 1. Configure status URL in haproxy useing a randomized URL to hide
> #    the status page from random probers (I wanted the status to be
> #    available over the network too).  "pwgen" is useful for generaring
> #    a random url.
> #
> #      listen httpsservice 0.0.0.0:80
> #           ...
> #           stats uri /phei1SaeIevoh4eM
> #
> # 2. Check if working by directing a browser there
> #
> # 3. Configure pacemaker
> # 
> #      primitive vip ocf:heartbeat:IPaddr \
> #         params ip="192.168.5.8"
> #
> #      primitive happing ocf:pacemaker:http_ping \
> #         params name="happing" testurl="http://localhost/phei1SaeIevoh4eM" \
> #         op monitor interval="1s" depth="0"
> #
> #      clone happingall happing \
> #         meta target-role="Started"
> #
> #      location locVip vip \
> #         rule $id="locVipRule" -INF: not_defined happing
> #
> #    If your frontend runs for example a
> #    haproxy/nginx/varnish/whatever mix: set up http pings for all of
> #    the ones that _have_ to be running and combine in the location
> #    rule like this:
> #
> #      location locVip vip \
> #         rule $id="locVipRule" -INF: not_defined happing or not_defined nxping
> #
> # 4. Use crm_mon -A to monitor the vip and the happing token.  Document that the
> #    token is supposed to be defined on all nodes during normal operation.
> #
> #
> # OCF parameters:
> #  OCF_RESKEY_testurl
> #  OCF_RESKEY_bindaddr
> #  OCF_RESKEY_testregex
> #  OCF_RESKEY_name
> #  OCF_RESKEY_timeout
> #  OCF_RESKEY_dampen
> #  OCF_RESKEY_multiplier
> #  OCF_RESKEY_curlopts
> #  OCF_RESKEY_auth
> #  OCF_RESKEY_curl
> #
> 
> : ${OCF_ROOT:="/usr/lib/ocf"}
> : ${OCF_FUNCTIONS_DIR=$OCF_ROOT/lib/heartbeat}
> 
> # No defaults: $OCF_RESKEY_testurl
> 
> : ${OCF_RESKEY_bindaddr:=lo}
> : ${OCF_RESKEY_testregex:=""}
> : ${OCF_RESKEY_name:="httpping"}
> : ${OCF_RESKEY_timeout:="1s"}
> : ${OCF_RESKEY_dampen:="5s"}
> : ${OCF_RESKEY_multiplier:="1000"}
> : ${OCF_RESKEY_curlopts:=""}
> : ${OCF_RESKEY_auth:=""}
> : ${OCF_RESKEY_curl:="curl"}
> 
> . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
> HA_VARRUNDIR=${HA_VARRUN}
> 
> # This kind of check/recalculation should be provided by ocf-shellfuncs
> integer=$(echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*')
> case ${OCF_RESKEY_timeout} in
>     *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(( $integer / 1000 ));;
>     *[0-9]m|*[0-9]min)   OCF_RESKEY_timeout=$(( $integer * 60  ));;
>     *[0-9]h|*[0-9]hr)    OCF_RESKEY_timeout=$(( $integer * 60 * 60 ));;
>     *) OCF_RESKEY_timeout=$integer;;
> esac
> 
> # Reduce timeout by 10%
> NEW=$(($OCF_RESKEY_timeout * 9 / 10))
> 
> # Check the result to avoid a zero timeout (=inifinite), and see if we still can't
> # make sure it's less than the original.
> case $NEW:$OCF_RESKEY_timeout in
>     0:0)			:;;
>     0:1)			OCF_RESKEY_timeout=1;;
>     0:*)		        OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 ));;
>     $OCF_RESKEY_timeout:$NEW)	OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 ));;
>     *)                          OCF_RESKEY_timeout=$NEW;;
> esac
> 
> #######################################################################
> #
> #	Configuration options - usually you don't need to change these
> #
> #######################################################################
> 
> # default options for http clients
> # NB: We _always_ test a local resource, so it should be
> # safe to connect from the local interface.
> 
> CURLOPTS="-Ssk --interface ${OCF_RESKEY_bindaddr} --max-time ${OCF_RESKEY_timeout} ${OCF_RESKEY_curlopts}"
> 
> #
> #	End of Configuration options
> #######################################################################
> 
> CMD=`basename $0`
> 
> #	The config-file-pathname is the pathname to the configuration
> #	file for this web server.  Various appropriate defaults are
> #	assumed if no config file is specified.
> usage() {
>   cat <<EOM
> usage: $0 action
> 
> action:
> 	start	"start" http_ping agent(or rather, if it's running report it as such)
> 
> 	stop	"stop" http_ping agent
> 
> 	status	human readable web server status
> 
> 	monitor return TRUE if the http server appears to be working.
>                 A testurl must be given and this URL must be configured 
>                 and working.
> 
> 	meta-data	show meta data message
> 
> 	validate-all	validate the instance parameters
> EOM
>   exit $1
> }
> 
> #
> # run the http client
> #
> curl_func() {
>     case $OCF_RESKEY_auth in
> 	'')  $OCF_RESKEY_curl "$@";;
> 	*)   echo "-u $OCF_RESKEY_auth" |
> 	     $OCF_RESKEY_curl -K - "$@";;
>     esac
>     return $?
> }
> 
> 
> silent_status() {
> 
>     case $OCF_RESKEY_testregex in
> 	'') HTTP_CODE=$(curl_func -o/dev/null $CURLOPTS \
> 		                  --write-out '%{http_code}\n' \
>                                   "$OCF_RESKEY_testurl" 2>/dev/null)
> 	    curlexit=$?
> 	    # Check headers file since we don't have any RE.  The last header should
>             # be a 200.  There can be redirects before that.
> 	    case $curlexit in
> 		0) case $HTTP_CODE in
> 		     200)   return 0;;
> 		   esac
> 	           return 1;;
> 		*) curlexit=$OCF_ERR_GENERIC;;
> 	    esac
> 	    ;;
> 
> 	*)  curl_func -o- $CURLOPTS "$OCF_RESKEY_testurl" |
> 	        grep -Eiq i"$OCF_RESKEY_testregex" >/dev/null
> 	    curlexit=$?
> 	    ;;
>     esac
> 
>     return $curlexit
> 
> }
> 
> 
> start() {
>     silent_status
>     rc=$?
>     case $rc in
> 
>        0) attrd_updater -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
>           ocf_log info "start: test worked, set token."
>           # return $OCF_SUCCESS
> 	  ;;
> 
>        *) attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
> 	  ocf_log info "start: test failed, deleting token."
>           # return $OCF_ERR_GENERIC
> 	  ;;
> 
>     esac                                                                           
> 
>     return $OCF_SUCCESS
> }
> 
> 
> stop() {
>     ocf_log info "http_ping stoping"
>     attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
>     return $OCF_SUCCESS
> }
> 
> 
> status() {
>     silent_status
>     rc=$?
>     case $rc in
> 	0) ocf_log info "test ($OCF_RESKEY_testurl) worked";;
> 	*) ocf_log info "test ($OCF_RESKEY_testurl) failed"
>     esac
> 
>     return $OCF_SUCCESS
> }
> 
> 
> monitor() {
>   # Monitor action always succeeds.  It just adds or removes the named attribute.
> 
>   silent_status
>   if 
>     [ $? -ne 0 ]
>   then
>     ocf_log info "$CMD not running"
>     attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
>     return $OCF_SUCCESS # $OCF_ERR_GENERIC
>   fi
> 
>   attrd_updater -q -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
>   return $OCF_SUCCESS
> }
> 
> metadata(){
> 	cat <<END
> <?xml version="1.0"?>
> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
> <resource-agent name="http_ping">
> <version>1.0</version>
> <longdesc lang="en">
> This is the resource _monitor_ agent for any httpd by polling a status
> page.
> 
> It provides only one level of testing, get a URL and optionaly look
> for a regular expression.  The HTTP GET should be on this side of
> instant, the default timeout is one second.  We allow a monitoring
> interval down to one second.
> </longdesc>
> <shortdesc lang="en">Monitors a http server</shortdesc>
> 
> <parameters>
> 
> <parameter name="testurl">
> <longdesc lang="en">
> URL to test.  There is no default.  You will need to configure a
> status or "ping" url in your http server.
> </longdesc>
> <shortdesc lang="en">test url</shortdesc>
> <content type="string" />
> </parameter>
> 
> <parameter name="testregex">
> <longdesc lang="en">
> Regular expression (egrep) to match in the output of testurl.  Case
> insensitive.  If no testregex is given then the HTTP status code is
> used.  It must be 200 otherwise the test fails.
> 
> If you want the test to succeed as long as the server responds in any
> way set testregex to ".".
> 
> </longdesc>
> <shortdesc lang="en">monitor regular expression</shortdesc>
> <content type="string" default=""/>
> </parameter>
> 
> <parameter name="bindaddr">
> <longdesc lang="en">
> By default curl is run with "--interface lo".  If you can't reach the
> web server from the loopback (URL containing "localhost") specify the
> interface name or address to bind to with this option.  Try
> 'bindaddr="0.0.0.0"' if the URL is not a localhost URL.
> </longdesc>
> <shortdesc lang="en">network bind</shortdesc>
> <content type="string" default="lo"/>
> </parameter>
> 
> <parameter name="name" unique="0">
> <longdesc lang="en">
> The name of the attribute to set.  This is the name to be used in the
> constraints.
> </longdesc>
> <shortdesc lang="en">Attribute name</shortdesc>
> <content type="string" default="httpping"/>
> </parameter>
> 
> <parameter name="multiplier" unique="0">
> <longdesc lang="en">
> The number by which to set if the httpd is up.
> </longdesc>
> <shortdesc lang="en">Value multiplier</shortdesc>
> <content type="integer" default="1000"/>
> </parameter>
> 
> <parameter name="timeout" unique="0">
> <longdesc lang="en">
> How long (in seconds) to wait before declaring a test lost
> </longdesc>
> <shortdesc lang="en">test timeout in seconds</shortdesc>
> <content type="integer" default="1s"/>
> </parameter>
> 
> <parameter name="dampen" unique="0">
> <longdesc lang="en">
> Amount of time to wait (dampen) before setting any new value.
> </longdesc>
> <shortdesc lang="en">Dampening interval</shortdesc>
> <content type="integer" default="5s"/>
> </parameter>
> 
> </parameters>
> 
> <actions>
> <action name="start"   timeout="1s" />
> <action name="stop"    timeout="1s" />
> <action name="status"  timeout="1s" />
> <action name="monitor" timeout="1s" depth="0" interval="1s" />
> <action name="meta-data"  timeout="5" />
> <action name="validate-all"  timeout="5" />
> </actions>
> </resource-agent>
> END
> 
>    exit $OCF_SUCCESS
> }
> 
> # #####################################################################
> 
> validate_all() {
>   if
>     [ -z $STATUSURL ]
>   then
>     ocf_log err "No testurl given!"
>     exit $OCF_ERR_PARAM
>   fi
> 
>   case $STATUSURL in
>       http://*/*) ;;
>       https://*/*) ;;
>       *) ocf_log err "Invalid STATUSURL $STATUSURL"
>          exit $OCF_ERR_ARGS ;;
>   esac
> 
>   if ! $OCF_RESKEY_curl --help >/dev/null 2>/dev/null; then
>       ocf_log err "curl ($OCF_RESKEY_curl) binary not found! Please verify that you've installed it"
>       exit $OCF_ERR_INSTALLED
>   fi
> 
> }
> 
> # ########################### MAIN ###########################
> 
> if [ $# -eq 1 ]; then
>     COMMAND=$1
> else
>     usage $OCF_ERR_ARGS
> fi
> 
> STATUSURL="$OCF_RESKEY_testurl"
> 
> case $COMMAND in
>   meta-data)			metadata; exit 0;;
>   validate-all)			validate_all; exit 0;;
>   start|stop|status|monitor)	validate_all; eval $COMMAND; exit 0;;
>   *usage|*help) 		usage $OCF_SUCCESS;; # "help" as well as "--help"
>   *)				usage $OCF_ERR_UNIMPLEMENTED;;
> esac
> 
> ocf_log err "$0: Running off end of script?!"
> 
> exit $OCF_ERR_GENERIC

> _______________________________________________
> Pacemaker mailing list: Pacemaker at oss.clusterlabs.org
> http://oss.clusterlabs.org/mailman/listinfo/pacemaker
> 
> Project Home: http://www.clusterlabs.org
> Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
> Bugs: http://bugs.clusterlabs.org





More information about the Pacemaker mailing list