[ClusterLabs] Azure Resource Agent
    Eric Robinson 
    eric.robinson at psmnv.com
       
    Sat Sep 16 00:56:03 CEST 2017
    
    
  
Greetings, all --
If anyone's interested, I wrote a resource agent that works with Microsoft Azure. I'm no expert at shell scripting, so I'm certain it needs a great deal of improvement, but I've done some testing and it works with a 2-node cluster in my Azure environment. Offhand, I don't know any reason why it wouldn't work with larger clusters, too.
My colocation stack looks like this:
mysql -> azure_ip -> cluster_ip -> filesystem -> drbd
Failover takes up to 4 minutes because it takes that long for the Azure IP address de-association and re-association to complete. None of the delay is the fault of the cluster itself.
Right now the script burps a bunch of debug output to syslog, which is helpful if you feel like you're waiting forever for the cluster to failover, you can look at /var/log/messages and see that you're waiting for the Azure cloud to finish something. To eliminate the debug messages, set DEBUG_LEVEL to 0.
The agent requires the Azure client to be installed and the nodes to have been logged into the cloud. It currently only works with one NIC per VM, and two ipconfigs per NIC (one of which is the floating cluster IP).
This is obviously beta as it currently only works with a manual failover. I need to add some code to handle an actual node crash or power-plug test.
Feedback, suggestions, improvements are welcome. If someone who knows awk wants to clean up my azure client calls, that would be a good place to start.
--
#!/bin/sh
#
#             OCF parameters are as below
#             OCF_RESKEY_ip
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
DEBUG_LEVEL=2
MY_HOSTNAME=$(hostname -s)
SCRIPT_NAME=$(basename $0)
#######################################################################
meta_data() {
                logIt "debug1: entered: meta_data()"
                cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="AZaddr2">
<version>1.0</version>
<longdesc lang="en">
Resource agent for managing IP configs in Azure.
</longdesc>
<shortdesc lang="en">Short descrption/</shortdesc>
<parameters>
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
The IPv4 (dotted quad notation)
example IPv4 "192.168.1.1".
</longdesc>
<shortdesc lang="en">IPv4 address</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start"   timeout="20s" />
<action name="stop"    timeout="20s" />
<action name="status" depth="0"  timeout="20s" interval="10s" />
<action name="monitor" depth="0"  timeout="20s" interval="10s" />
<action name="meta-data"  timeout="5s" />
<action name="validate-all"  timeout="20s" />
</actions>
</resource-agent>
END
                logIt "leaving: exiting: meta_data()"
                return $OCF_SUCCESS
}
azip_query() {
                logIt "debug1: entered: azip_query()"
                logIt "debug1: checking to determine if an Azure ipconfig named '$AZ_IPCONFIG_NAME' exists for the interface"
                logIt "debug1: executing: az network nic ip-config show --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME -g $AZ_RG_NAME 2>&1"
        R=$(az network nic ip-config show --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME -g $AZ_RG_NAME 2>&1)
                logIt "debug2: $R"
                R2=$(echo "$R"|grep "does not exist")
                if [ -n "$R2" ]; then
                                logIt "debug1: ipconfig named '$AZ_IPCONFIG_NAME' does not exist"
                                return $OCF_NOT_RUNNING
                else
                                R2=$(echo "$R"|grep "Succeeded")
                                if [ -n "$R2" ]; then
                                                logIt "debug1: ipconfig '$AZ_IPCONFIG_NAME' exists"
                                                return $OCF_SUCCESS
                                else
                                                logIt "debug1: not sure how this happens"
                                                return $OCF_ERR_GENERIC
                                fi
                fi
                logIt "debug1: exiting: azip_query()"
}
azip_usage() {
                cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
                return $OCF_SUCCESS
}
azip_start() {
                logIt "debug1: entered: azip_start()"
                #--if a matching ipconfig already exists in Azure, return success
                azip_query;RC=$?
                if [ $RC -eq 0 ]; then
                                logIt "debug1: $OCF_RESKEY_ip is already associated"
                                return $OCF_SUCCESS
                else
                                #--create an interface ipconfig in Azure
                                logIt "debug1: creating ipconfig '$AZ_IPCONFIG_NAME'"
                                logIt "debug1: executing: az network nic ip-config create --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME --private-ip-address $OCF_RESKEY_ip \
                                                --subnet $AZ_SUBNET_NAME --vnet-name $AZ_VNET_NAME"
                                R=$(az network nic ip-config create --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME --private-ip-address $OCF_RESKEY_ip --subnet $AZ_SUBNET_NAME --vnet-name $AZ_VNET_NAME)
                                logIt "debug2: output: $R"
                                R2=$(echo "$R"|grep Succeeded)
                                if [ -n "$R2" ]; then
                                                logIt "debug1: $AZ_IPCONFIG_NAME created successfully with ip address $OCF_RESKEY_ip"
                                                return $OCF_SUCCESS
                                else
                                                logIt "debug1: failed to create ipconfig $AZ_IPCONFIG_NAME"
                                                return $OCF_ERR_GENERIC
                                fi
                fi
                logIt "debug1: exiting: azip_start()"
}
azip_stop() {
                logIt "debug1: entered: azip_stop()"
        #--if there is no matching ipconfig in Azure, exit with success
        azip_query;RC=$?
                if [ $RC -ne 0 ]; then
                logIt "debug1: $OCF_RESKEY_ip is not associated"
                return $OCF_SUCCESS
                else
               #--delete it
                                logIt "deleting ipconfig '$AZ_IPCONFIG_NAME'"
                                R=$(az network nic ip-config delete --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME)
                                #--verify that it was deleted
                                logIt "verifying that ipconfig '$AZ_IPCONFIG_NAME' got removed"
                                azip_query;RC=$?
               if [ $RC -ne 0 ]; then
                              logIt "debug1: ipconfig $AZ_IPCONFIG_NAME successfully removed"
                              return $OCF_SUCCESS
                                else
                                                logIt "failed to remove $AZ_IPCONFIG_NAME."
                                                return $OCF_ERR_GENERIC
               fi
                fi
                logIt "debug1: exiting: azip_stop()"
}
azip_monitor() {
                logIt "debug1: entered: azip_monitor()"
                azip_query;RC=$?
                logIt "debug1: function azip_query() returned $RC"
                if [ $RC -eq 0 ]; then
                                return $OCF_SUCCESS
                else
                                return $OCF_NOT_RUNNING
                fi
                logIt "debug1: exiting: azip_monitor()"
}
logIt() {
        MSG=$1
        if [ ${MSG:0:5} == "debug" ]
        then
                                MSG_LEVEL=${MSG:5:1}
                                if [ $MSG_LEVEL -le $DEBUG_LEVEL ]; then
                                                if [ $MSG_LEVEL -gt 1 ]; then
                                                                logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): debug$MSG_LEVEL ####################################################################"
                                                                logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1"
                                                                logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): debug$MSG_LEVEL ####################################################################"
                                                else
                                               logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1"
                                                fi
                                else
                                                return
                                fi
                else
               logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1"
        fi
}
logIt "debug1: agent was asked to $__OCF_ACTION resource $OCF_RESOURCE_INSTANCE with IP address $OCF_RESKEY_ip"
######################################################################
#  Azure Initialization
######################################################################
#--check if azure is enabled
AZ_ENABLED=$(az account show|grep Enabled|sed "s/  *//g"|cut -d":" -f2|sed "s/\"//g"|sed "s/,//g")
if [ -z "$AZ_ENABLED" ]
then
        logIt "Azure account not detected"
                logIt "debug1: exiting $SCRIPT_NAME"
        exit $OCF_ERR_GENERIC
else
        logIt "debug1: AZ_ENABLED=$AZ_ENABLED"
fi
#--set the ipconfig name
AZ_IPCONFIG_NAME="ipconfig-""$OCF_RESKEY_ip"
logIt "debug1: AZ_IPCONFIG_NAME=$AZ_IPCONFIG_NAME"
#--get the resource group name
AZ_RG_NAME=$(az group list|grep name|cut -d":" -f2|sed "s/  *//g"|sed "s/\"//g"|sed "s/,//g")
if [ -z "$AZ_RG_NAME" ]
then
        logIt "could not determine the Azure resource group name"
        exit $OCF_ERR_GENERIC
else
        logIt "debug1: AZ_RG_NAME=$AZ_RG_NAME"
fi
#--get the nic name
AZ_NIC_NAME=$(az vm nic list -g $AZ_RG_NAME --vm-name $MY_HOSTNAME|grep networkInterfaces|cut -d"/" -f9|sed "s/\",//g")
if [ -z "$AZ_NIC_NAME" ]
then
        echo "could not determine the Azure NIC name"
        exit $OCF_ERR_GENERIC
else
        logIt "debug1: AZ_NIC_NAME=$AZ_NIC_NAME"
fi
#--get the vnet and subnet names
R=$(az network nic show --name $AZ_NIC_NAME --resource-group $AZ_RG_NAME|grep -i subnets|head -1|sed "s/  */ /g"|cut -d"/" -f9,11|sed "s/\",//g")
LDIFS=$IFS
IFS="/"
R_ARRAY=( $R )
AZ_VNET_NAME=${R_ARRAY[0]}
AZ_SUBNET_NAME=${R_ARRAY[1]}
if [ -z "$AZ_VNET_NAME" ]
then
        logIt "could not determine Azure vnet name"
        exit $OCF_ERR_GENERIC
else
        logIt "debug1: AZ_VNET_NAME=$AZ_VNET_NAME"
fi
if [ -z "$AZ_SUBNET_NAME" ]
then
        logIt "could not determine the Azure subnet name"
        exit $OCF_ERR_GENERIC
else
        logIt "debug1: AZ_SUBNET_NAME=$AZ_SUBNET_NAME"
fi
######################################################################
#  Actions
######################################################################
case $__OCF_ACTION in
meta-data)         meta_data
                                RC=$?
                                ;;
usage|help)       azip_usage
                                RC=$?
                                ;;
start)                     azip_start
                                RC=$?
                                ;;
stop)                     azip_stop
                                RC=$?
                                ;;
status)                  azip_query
                                RC=$?
                                ;;
monitor)              azip_monitor
                                RC=$?
                                ;;
validate-all)        ;;
*)                            azip_usage
                                RC=$OCF_ERR_UNIMPLEMENTED
                                ;;
esac
#--exit with return code
logIt "debug1: exiting $SCRIPT_NAME with code $RC"
exit $RC
#--end
--
Eric Robinson
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/users/attachments/20170915/f0eb6718/attachment-0001.html>
    
    
More information about the Users
mailing list