[ClusterLabs] Azure Resource Agent
Eric Robinson
eric.robinson at psmnv.com
Fri Sep 15 18:56:03 EDT 2017
Greetings, all --
If anyone's interested, I wrote a resource agent that works with Microsoft Azure. I'm no expert at shell scripting, so I'm certain it needs a great deal of improvement, but I've done some testing and it works with a 2-node cluster in my Azure environment. Offhand, I don't know any reason why it wouldn't work with larger clusters, too.
My colocation stack looks like this:
mysql -> azure_ip -> cluster_ip -> filesystem -> drbd
Failover takes up to 4 minutes because it takes that long for the Azure IP address de-association and re-association to complete. None of the delay is the fault of the cluster itself.
Right now the script burps a bunch of debug output to syslog, which is helpful if you feel like you're waiting forever for the cluster to failover, you can look at /var/log/messages and see that you're waiting for the Azure cloud to finish something. To eliminate the debug messages, set DEBUG_LEVEL to 0.
The agent requires the Azure client to be installed and the nodes to have been logged into the cloud. It currently only works with one NIC per VM, and two ipconfigs per NIC (one of which is the floating cluster IP).
This is obviously beta as it currently only works with a manual failover. I need to add some code to handle an actual node crash or power-plug test.
Feedback, suggestions, improvements are welcome. If someone who knows awk wants to clean up my azure client calls, that would be a good place to start.
--
#!/bin/sh
#
# OCF parameters are as below
# OCF_RESKEY_ip
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
DEBUG_LEVEL=2
MY_HOSTNAME=$(hostname -s)
SCRIPT_NAME=$(basename $0)
#######################################################################
meta_data() {
logIt "debug1: entered: meta_data()"
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="AZaddr2">
<version>1.0</version>
<longdesc lang="en">
Resource agent for managing IP configs in Azure.
</longdesc>
<shortdesc lang="en">Short descrption/</shortdesc>
<parameters>
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
The IPv4 (dotted quad notation)
example IPv4 "192.168.1.1".
</longdesc>
<shortdesc lang="en">IPv4 address</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="20s" interval="10s" />
<action name="monitor" depth="0" timeout="20s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
logIt "leaving: exiting: meta_data()"
return $OCF_SUCCESS
}
azip_query() {
logIt "debug1: entered: azip_query()"
logIt "debug1: checking to determine if an Azure ipconfig named '$AZ_IPCONFIG_NAME' exists for the interface"
logIt "debug1: executing: az network nic ip-config show --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME -g $AZ_RG_NAME 2>&1"
R=$(az network nic ip-config show --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME -g $AZ_RG_NAME 2>&1)
logIt "debug2: $R"
R2=$(echo "$R"|grep "does not exist")
if [ -n "$R2" ]; then
logIt "debug1: ipconfig named '$AZ_IPCONFIG_NAME' does not exist"
return $OCF_NOT_RUNNING
else
R2=$(echo "$R"|grep "Succeeded")
if [ -n "$R2" ]; then
logIt "debug1: ipconfig '$AZ_IPCONFIG_NAME' exists"
return $OCF_SUCCESS
else
logIt "debug1: not sure how this happens"
return $OCF_ERR_GENERIC
fi
fi
logIt "debug1: exiting: azip_query()"
}
azip_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
return $OCF_SUCCESS
}
azip_start() {
logIt "debug1: entered: azip_start()"
#--if a matching ipconfig already exists in Azure, return success
azip_query;RC=$?
if [ $RC -eq 0 ]; then
logIt "debug1: $OCF_RESKEY_ip is already associated"
return $OCF_SUCCESS
else
#--create an interface ipconfig in Azure
logIt "debug1: creating ipconfig '$AZ_IPCONFIG_NAME'"
logIt "debug1: executing: az network nic ip-config create --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME --private-ip-address $OCF_RESKEY_ip \
--subnet $AZ_SUBNET_NAME --vnet-name $AZ_VNET_NAME"
R=$(az network nic ip-config create --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME --private-ip-address $OCF_RESKEY_ip --subnet $AZ_SUBNET_NAME --vnet-name $AZ_VNET_NAME)
logIt "debug2: output: $R"
R2=$(echo "$R"|grep Succeeded)
if [ -n "$R2" ]; then
logIt "debug1: $AZ_IPCONFIG_NAME created successfully with ip address $OCF_RESKEY_ip"
return $OCF_SUCCESS
else
logIt "debug1: failed to create ipconfig $AZ_IPCONFIG_NAME"
return $OCF_ERR_GENERIC
fi
fi
logIt "debug1: exiting: azip_start()"
}
azip_stop() {
logIt "debug1: entered: azip_stop()"
#--if there is no matching ipconfig in Azure, exit with success
azip_query;RC=$?
if [ $RC -ne 0 ]; then
logIt "debug1: $OCF_RESKEY_ip is not associated"
return $OCF_SUCCESS
else
#--delete it
logIt "deleting ipconfig '$AZ_IPCONFIG_NAME'"
R=$(az network nic ip-config delete --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME)
#--verify that it was deleted
logIt "verifying that ipconfig '$AZ_IPCONFIG_NAME' got removed"
azip_query;RC=$?
if [ $RC -ne 0 ]; then
logIt "debug1: ipconfig $AZ_IPCONFIG_NAME successfully removed"
return $OCF_SUCCESS
else
logIt "failed to remove $AZ_IPCONFIG_NAME."
return $OCF_ERR_GENERIC
fi
fi
logIt "debug1: exiting: azip_stop()"
}
azip_monitor() {
logIt "debug1: entered: azip_monitor()"
azip_query;RC=$?
logIt "debug1: function azip_query() returned $RC"
if [ $RC -eq 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
logIt "debug1: exiting: azip_monitor()"
}
logIt() {
MSG=$1
if [ ${MSG:0:5} == "debug" ]
then
MSG_LEVEL=${MSG:5:1}
if [ $MSG_LEVEL -le $DEBUG_LEVEL ]; then
if [ $MSG_LEVEL -gt 1 ]; then
logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): debug$MSG_LEVEL ####################################################################"
logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1"
logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): debug$MSG_LEVEL ####################################################################"
else
logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1"
fi
else
return
fi
else
logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1"
fi
}
logIt "debug1: agent was asked to $__OCF_ACTION resource $OCF_RESOURCE_INSTANCE with IP address $OCF_RESKEY_ip"
######################################################################
# Azure Initialization
######################################################################
#--check if azure is enabled
AZ_ENABLED=$(az account show|grep Enabled|sed "s/ *//g"|cut -d":" -f2|sed "s/\"//g"|sed "s/,//g")
if [ -z "$AZ_ENABLED" ]
then
logIt "Azure account not detected"
logIt "debug1: exiting $SCRIPT_NAME"
exit $OCF_ERR_GENERIC
else
logIt "debug1: AZ_ENABLED=$AZ_ENABLED"
fi
#--set the ipconfig name
AZ_IPCONFIG_NAME="ipconfig-""$OCF_RESKEY_ip"
logIt "debug1: AZ_IPCONFIG_NAME=$AZ_IPCONFIG_NAME"
#--get the resource group name
AZ_RG_NAME=$(az group list|grep name|cut -d":" -f2|sed "s/ *//g"|sed "s/\"//g"|sed "s/,//g")
if [ -z "$AZ_RG_NAME" ]
then
logIt "could not determine the Azure resource group name"
exit $OCF_ERR_GENERIC
else
logIt "debug1: AZ_RG_NAME=$AZ_RG_NAME"
fi
#--get the nic name
AZ_NIC_NAME=$(az vm nic list -g $AZ_RG_NAME --vm-name $MY_HOSTNAME|grep networkInterfaces|cut -d"/" -f9|sed "s/\",//g")
if [ -z "$AZ_NIC_NAME" ]
then
echo "could not determine the Azure NIC name"
exit $OCF_ERR_GENERIC
else
logIt "debug1: AZ_NIC_NAME=$AZ_NIC_NAME"
fi
#--get the vnet and subnet names
R=$(az network nic show --name $AZ_NIC_NAME --resource-group $AZ_RG_NAME|grep -i subnets|head -1|sed "s/ */ /g"|cut -d"/" -f9,11|sed "s/\",//g")
LDIFS=$IFS
IFS="/"
R_ARRAY=( $R )
AZ_VNET_NAME=${R_ARRAY[0]}
AZ_SUBNET_NAME=${R_ARRAY[1]}
if [ -z "$AZ_VNET_NAME" ]
then
logIt "could not determine Azure vnet name"
exit $OCF_ERR_GENERIC
else
logIt "debug1: AZ_VNET_NAME=$AZ_VNET_NAME"
fi
if [ -z "$AZ_SUBNET_NAME" ]
then
logIt "could not determine the Azure subnet name"
exit $OCF_ERR_GENERIC
else
logIt "debug1: AZ_SUBNET_NAME=$AZ_SUBNET_NAME"
fi
######################################################################
# Actions
######################################################################
case $__OCF_ACTION in
meta-data) meta_data
RC=$?
;;
usage|help) azip_usage
RC=$?
;;
start) azip_start
RC=$?
;;
stop) azip_stop
RC=$?
;;
status) azip_query
RC=$?
;;
monitor) azip_monitor
RC=$?
;;
validate-all) ;;
*) azip_usage
RC=$OCF_ERR_UNIMPLEMENTED
;;
esac
#--exit with return code
logIt "debug1: exiting $SCRIPT_NAME with code $RC"
exit $RC
#--end
--
Eric Robinson
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.clusterlabs.org/pipermail/users/attachments/20170915/f0eb6718/attachment-0002.html>
More information about the Users
mailing list