[Pacemaker] RFC: What part of the XML configuration do you hate the most?
Andrew Beekhof
beekhof at gmail.com
Thu Nov 27 07:55:43 UTC 2008
I'm going to fix this properly today.
On Nov 27, 2008, at 8:27 AM, Satomi TANIGUCHI wrote:
> Hi Andrew,
>
> I found another behavior that is caused because the cluster forgets
> the resource is supposed to stay stopped.
>
> For example, in the case of a node which has primitive and master/
> slave resource.
> Their settings of on-fail is "standby".
> When the master/slave resource is failed, all resources on failed
> node are going to stop. And master/slave resource's fail-count is
> increased.
> But then, only primitive resource re-starts on failed node because
> its fail-count is not be increased and the cluster forgets the
> resource is supposed to stay stopped...
>
> When F/O occurs,
> in the case of _not_ master/slave resource,
> pengine creates one graph to stop and restart the resource.
> And in the case of master/slave resource, it creates a graph 2 times.
> One is for the resource's stop-process and another is for restart-
> process.
> And when it creates a graph for restart-process,
> no one remembers that resources are supposed to stay stopped on
> failed node.
>
> This behavior is same as (or similar to) what you are worried, isn't
> it?
>
> To avoid this behavior, it requires to update the status of a node
> before restart-process.
> On trial, I created a patch (for pacemaker-dev 366b14d79780).
> And I attached the graph with patched pacemaker.
> It's not a "general" way, just for reference...
>
>
> Regards,
> Satomi TANIGUCHI
> diff -urN pacemaker-dev/crmd/te_actions.c pacemaker-dev.mod/crmd/
> te_actions.c
> --- pacemaker-dev/crmd/te_actions.c 2008-11-26 10:47:46.000000000
> +0900
> +++ pacemaker-dev.mod/crmd/te_actions.c 2008-11-26
> 10:48:47.000000000 +0900
> @@ -175,6 +175,42 @@
> return TRUE;
> }
>
> +static gboolean
> +te_standby_node(crm_graph_t *graph, crm_action_t *action)
> +{
> + const char *id = NULL;
> + const char *uuid = NULL;
> + const char *target = NULL;
> +
> + id = ID(action->xml);
> + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
> + uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
> +
> + CRM_CHECK(id != NULL,
> + crm_log_xml_warn(action->xml, "BadAction");
> + return FALSE);
> + CRM_CHECK(uuid != NULL,
> + crm_log_xml_warn(action->xml, "BadAction");
> + return FALSE);
> + CRM_CHECK(target != NULL,
> + crm_log_xml_warn(action->xml, "BadAction");
> + return FALSE);
> +
> + te_log_action(LOG_INFO,
> + "Executing standby operation (%s) on %s", id, target);
> +
> + if (cib_ok > set_standby(fsa_cib_conn, uuid, XML_CIB_TAG_NODES,
> "on")) {
> + crm_err("Cannot standby %s: set_standby() call failed.", target);
> + }
> +
> + crm_info("Skipping wait for %d", action->id);
> + action->confirmed = TRUE;
> + update_graph(graph, action);
> + trigger_graph();
> +
> + return TRUE;
> +}
> +
> static int get_target_rc(crm_action_t *action)
> {
> const char *target_rc_s = crm_meta_value(action->params,
> XML_ATTR_TE_TARGET_RC);
> @@ -500,7 +536,8 @@
> te_pseudo_action,
> te_rsc_command,
> te_crm_command,
> - te_fence_node
> + te_fence_node,
> + te_standby_node
> };
>
> void
> diff -urN pacemaker-dev/include/crm/crm.h pacemaker-dev.mod/include/
> crm/crm.h
> --- pacemaker-dev/include/crm/crm.h 2008-11-26 10:47:46.000000000
> +0900
> +++ pacemaker-dev.mod/include/crm/crm.h 2008-11-26
> 10:48:47.000000000 +0900
> @@ -146,6 +146,7 @@
> #define CRM_OP_SHUTDOWN_REQ "req_shutdown"
> #define CRM_OP_SHUTDOWN "do_shutdown"
> #define CRM_OP_FENCE "stonith"
> +#define CRM_OP_STANDBY "standby"
> #define CRM_OP_EVENTCC "event_cc"
> #define CRM_OP_TEABORT "te_abort"
> #define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */
> diff -urN pacemaker-dev/include/crm/pengine/common.h pacemaker-
> dev.mod/include/crm/pengine/common.h
> --- pacemaker-dev/include/crm/pengine/common.h 2008-11-26
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/include/crm/pengine/common.h 2008-11-26
> 10:48:47.000000000 +0900
> @@ -52,6 +52,7 @@
> action_demote,
> action_demoted,
> shutdown_crm,
> + standby_node,
> stonith_node
> };
>
> diff -urN pacemaker-dev/include/crm/pengine/status.h pacemaker-
> dev.mod/include/crm/pengine/status.h
> --- pacemaker-dev/include/crm/pengine/status.h 2008-11-26
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/include/crm/pengine/status.h 2008-11-26
> 10:48:47.000000000 +0900
> @@ -104,6 +104,7 @@
> const char *uname;
> gboolean online;
> gboolean standby;
> + gboolean action_standby;
> gboolean pending;
> gboolean unclean;
> gboolean shutdown;
> diff -urN pacemaker-dev/include/crm/transition.h pacemaker-dev.mod/
> include/crm/transition.h
> --- pacemaker-dev/include/crm/transition.h 2008-11-26
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/include/crm/transition.h 2008-11-26
> 10:48:47.000000000 +0900
> @@ -115,6 +115,7 @@
> gboolean (*rsc)(crm_graph_t *graph, crm_action_t *action);
> gboolean (*crmd)(crm_graph_t *graph, crm_action_t *action);
> gboolean (*stonith)(crm_graph_t *graph, crm_action_t *action);
> + gboolean (*standby)(crm_graph_t *graph, crm_action_t *action);
> } crm_graph_functions_t;
>
> enum transition_status {
> diff -urN pacemaker-dev/lib/pengine/common.c pacemaker-dev.mod/lib/
> pengine/common.c
> --- pacemaker-dev/lib/pengine/common.c 2008-11-26 10:47:46.000000000
> +0900
> +++ pacemaker-dev.mod/lib/pengine/common.c 2008-11-26
> 10:48:47.000000000 +0900
> @@ -178,6 +178,8 @@
> return shutdown_crm;
> } else if(safe_str_eq(task, CRM_OP_FENCE)) {
> return stonith_node;
> + } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
> + return standby_node;
> } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) {
> return monitor_rsc;
> } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
> @@ -245,6 +247,9 @@
> case stonith_node:
> result = CRM_OP_FENCE;
> break;
> + case standby_node:
> + result = CRM_OP_STANDBY;
> + break;
> case monitor_rsc:
> result = CRMD_ACTION_STATUS;
> break;
> diff -urN pacemaker-dev/lib/pengine/unpack.c pacemaker-dev.mod/lib/
> pengine/unpack.c
> --- pacemaker-dev/lib/pengine/unpack.c 2008-11-26 10:47:46.000000000
> +0900
> +++ pacemaker-dev.mod/lib/pengine/unpack.c 2008-11-26
> 10:48:47.000000000 +0900
> @@ -240,6 +240,7 @@
> */
> new_node->details->unclean = TRUE;
> }
> + new_node->details->action_standby = FALSE;
>
> if(type == NULL
> || safe_str_eq(type, "member")
> @@ -832,7 +833,7 @@
> stop_action(rsc, node, FALSE);
>
> } else if(on_fail == action_fail_standby) {
> - node->details->standby = TRUE;
> + node->details->action_standby = TRUE;
>
> } else if(on_fail == action_fail_block) {
> /* is_managed == FALSE will prevent any
> diff -urN pacemaker-dev/lib/transition/graph.c pacemaker-dev.mod/lib/
> transition/graph.c
> --- pacemaker-dev/lib/transition/graph.c 2008-11-26
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/lib/transition/graph.c 2008-11-26
> 10:48:47.000000000 +0900
> @@ -188,6 +188,11 @@
> crm_debug_2("Executing STONITH-event: %d",
> action->id);
> return graph_fns->stonith(graph, action);
> +
> + } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
> + crm_debug_2("Executing STANDBY-event: %d",
> + action->id);
> + return graph_fns->standby(graph, action);
> }
>
> crm_debug_2("Executing crm-event: %d", action->id);
> diff -urN pacemaker-dev/lib/transition/utils.c pacemaker-dev.mod/lib/
> transition/utils.c
> --- pacemaker-dev/lib/transition/utils.c 2008-11-26
> 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/lib/transition/utils.c 2008-11-26
> 10:48:47.000000000 +0900
> @@ -41,6 +41,7 @@
> pseudo_action_dummy,
> pseudo_action_dummy,
> pseudo_action_dummy,
> + pseudo_action_dummy,
> pseudo_action_dummy
> };
>
> @@ -61,6 +62,7 @@
> CRM_ASSERT(graph_fns->crmd != NULL);
> CRM_ASSERT(graph_fns->pseudo != NULL);
> CRM_ASSERT(graph_fns->stonith != NULL);
> + CRM_ASSERT(graph_fns->standby != NULL);
> }
>
> const char *
> diff -urN pacemaker-dev/pengine/allocate.c pacemaker-dev.mod/pengine/
> allocate.c
> --- pacemaker-dev/pengine/allocate.c 2008-11-26 10:47:46.000000000
> +0900
> +++ pacemaker-dev.mod/pengine/allocate.c 2008-11-26
> 10:48:47.000000000 +0900
> @@ -774,6 +774,15 @@
> last_stonith = stonith_op;
> }
>
> + } else if(node->details->online && node->details->action_standby) {
> + action_t *standby_op = NULL;
> +
> + standby_op = custom_action(
> + NULL, crm_strdup(CRM_OP_STANDBY),
> + CRM_OP_STANDBY, node, FALSE, TRUE, data_set);
> +
> + order_actions(standby_op, all_stopped, pe_order_implies_left);
> +
> } else if(node->details->online && node->details->shutdown) {
> action_t *down_op = NULL;
> crm_info("Scheduling Node %s for shutdown",
> diff -urN pacemaker-dev/pengine/graph.c pacemaker-dev.mod/pengine/
> graph.c
> --- pacemaker-dev/pengine/graph.c 2008-11-26 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/pengine/graph.c 2008-11-26 10:48:47.000000000
> +0900
> @@ -368,7 +368,10 @@
> if(safe_str_eq(action->task, CRM_OP_FENCE)) {
> action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
> /* needs_node_info = FALSE; */
> -
> +
> + } else if(safe_str_eq(action->task, CRM_OP_STANDBY)) {
> + action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
> +
> } else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
> action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
>
> diff -urN pacemaker-dev/pengine/group.c pacemaker-dev.mod/pengine/
> group.c
> --- pacemaker-dev/pengine/group.c 2008-11-26 10:47:46.000000000 +0900
> +++ pacemaker-dev.mod/pengine/group.c 2008-11-26 10:48:47.000000000
> +0900
> @@ -423,6 +423,7 @@
> case action_notified:
> case shutdown_crm:
> case stonith_node:
> + case standby_node:
> break;
> case stop_rsc:
> case stopped_rsc:
> diff -urN pacemaker-dev/pengine/utils.c pacemaker-dev.mod/pengine/
> utils.c
> --- pacemaker-dev/pengine/utils.c 2008-11-26 10:47:49.000000000 +0900
> +++ pacemaker-dev.mod/pengine/utils.c 2008-11-26 10:49:54.000000000
> +0900
> @@ -338,6 +338,7 @@
> case monitor_rsc:
> case shutdown_crm:
> case stonith_node:
> + case standby_node:
> task = no_action;
> break;
> default:
> @@ -430,6 +431,7 @@
>
> switch(text2task(action->task)) {
> case stonith_node:
> + case standby_node:
> case shutdown_crm:
> do_crm_log_unlikely(log_level,
> "%s%s%sAction %d: %s%s%s%s%s%s",
> <pe-warn-0.left.gif>_______________________________________________
> Pacemaker mailing list
> Pacemaker at clusterlabs.org
> http://list.clusterlabs.org/mailman/listinfo/pacemaker
More information about the Pacemaker
mailing list