[Pacemaker] [RFC] [Patch] DC node preferences (dc-priority)
Lars Ellenberg
lars.ellenberg at linbit.com
Thu May 3 07:38:18 UTC 2012
People sometimes think they have a use case
for influencing which node will be the DC.
Sometimes it is latency (certain cli commands work faster
when done on the DC), sometimes they add a "mostly quorum"
node which may be not quite up to the task of being DC.
Prohibiting a node from becoming DC completely would
mean it can not even be cleanly shutdown (with 1.0.x, no MCP),
or act on its own resources for certain no-quorum policies.
So here is a patch I have been asked to present for discussion,
against Pacemaker 1.0, that introduces a "dc-prio" configuration
parameter, which will add some skew to the election algorithm.
Open questions:
* does it make sense at all?
* election algorithm compatibility, stability:
will the election be correct if some nodes have this patch,
and some don't ?
* How can it be improved so that a node with dc-prio=0 will
"give up" its DC-role as soon as there is at least one other node
with dc-prio > 0?
Lars
--- ./crmd/election.c.orig 2011-05-11 11:36:05.577329600 +0200
+++ ./crmd/election.c 2011-05-12 13:49:04.671484200 +0200
@@ -29,6 +29,7 @@
GHashTable *voted = NULL;
uint highest_born_on = -1;
static int current_election_id = 1;
+static int our_dc_prio = -1;
/* A_ELECTION_VOTE */
void
@@ -55,6 +56,20 @@
break;
}
+ if (our_dc_prio < 0) {
+ char * dc_prio_str = getenv("HA_dc_prio");
+
+ if (dc_prio_str == NULL) {
+ our_dc_prio = 1;
+ } else {
+ our_dc_prio = atoi(dc_prio_str);
+ }
+ }
+
+ if (!our_dc_prio) {
+ not_voting = TRUE;
+ }
+
if(not_voting == FALSE) {
if(is_set(fsa_input_register, R_STARTING)) {
not_voting = TRUE;
@@ -72,12 +87,13 @@
}
vote = create_request(
- CRM_OP_VOTE, NULL, NULL,
+ our_dc_prio?CRM_OP_VOTE:CRM_OP_NOVOTE, NULL, NULL,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
current_election_id++;
crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);
+ crm_xml_add_int(vote, F_CRM_DC_PRIO, our_dc_prio);
send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
free_xml(vote);
@@ -188,6 +204,7 @@
fsa_data_t *msg_data)
{
int election_id = -1;
+ int your_dc_prio = 1;
int log_level = LOG_INFO;
gboolean done = FALSE;
gboolean we_loose = FALSE;
@@ -216,6 +233,17 @@
your_version = crm_element_value(vote->msg, F_CRM_VERSION);
election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
+ crm_element_value_int(vote->msg, F_CRM_DC_PRIO, &your_dc_prio);
+
+ if (our_dc_prio < 0) {
+ char * dc_prio_str = getenv("HA_dc_prio");
+
+ if (dc_prio_str == NULL) {
+ our_dc_prio = 1;
+ } else {
+ our_dc_prio = atoi(dc_prio_str);
+ }
+ }
CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
@@ -269,6 +297,13 @@
reason = "Recorded";
done = TRUE;
+ } else if(our_dc_prio < your_dc_prio) {
+ reason = "DC Prio";
+ we_loose = TRUE;
+
+ } else if(our_dc_prio > your_dc_prio) {
+ reason = "DC Prio";
+
} else if(compare_version(your_version, CRM_FEATURE_SET) < 0) {
reason = "Version";
we_loose = TRUE;
@@ -328,6 +363,7 @@
crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
+ crm_xml_add_int(novote, F_CRM_DC_PRIO, our_dc_prio);
send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
free_xml(novote);
--- ./include/crm/msg_xml.h.orig 2011-05-11 18:22:08.061726000 +0200
+++ ./include/crm/msg_xml.h 2011-05-11 18:24:17.405132000 +0200
@@ -32,6 +32,7 @@
#define F_CRM_ORIGIN "origin"
#define F_CRM_JOIN_ID "join_id"
#define F_CRM_ELECTION_ID "election-id"
+#define F_CRM_DC_PRIO "dc-prio"
#define F_CRM_ELECTION_OWNER "election-owner"
#define F_CRM_TGRAPH "crm-tgraph"
#define F_CRM_TGRAPH_INPUT "crm-tgraph-in"
--- ./lib/ais/plugin.c.orig 2011-05-11 11:29:38.496116000 +0200
+++ ./lib/ais/plugin.c 2011-05-11 17:28:32.385425300 +0200
@@ -421,6 +421,9 @@
get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no");
pcmk_env.use_logd = value;
+ get_config_opt(pcmk_api, local_handle, "dc_prio", &value, "1");
+ pcmk_env.dc_prio = value;
+
get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no");
if(ais_get_boolean(value) == FALSE) {
int lpc = 0;
@@ -584,6 +587,7 @@
pcmk_env.logfile = NULL;
pcmk_env.use_logd = "false";
pcmk_env.syslog = "daemon";
+ pcmk_env.dc_prio = "1";
if(cs_uid != root_uid) {
ais_err("Corosync must be configured to start as 'root',"
--- ./lib/ais/utils.c.orig 2011-05-11 11:27:08.460183200 +0200
+++ ./lib/ais/utils.c 2011-05-11 17:29:09.182064800 +0200
@@ -171,6 +171,7 @@
setenv("HA_logfacility", pcmk_env.syslog, 1);
setenv("HA_LOGFACILITY", pcmk_env.syslog, 1);
setenv("HA_use_logd", pcmk_env.use_logd, 1);
+ setenv("HA_dc_prio", pcmk_env.dc_prio, 1);
if(pcmk_env.logfile) {
setenv("HA_debugfile", pcmk_env.logfile, 1);
}
--- ./lib/ais/utils.h.orig 2011-05-11 11:26:12.757414700 +0200
+++ ./lib/ais/utils.h 2011-05-11 17:36:34.194841700 +0200
@@ -226,6 +226,7 @@
const char *syslog;
const char *logfile;
const char *use_logd;
+ const char *dc_prio;
};
extern struct pcmk_env_s pcmk_env;
--
: Lars Ellenberg
: LINBIT | Your Way to High Availability
: DRBD/HA support and consulting http://www.linbit.com
DRBD® and LINBIT® are registered trademarks of LINBIT, Austria.
More information about the Pacemaker
mailing list