[Pacemaker] RFC: What part of the XML configuration do you hate the most?
Andrew Beekhof
beekhof at gmail.com
Thu Nov 27 15:13:23 UTC 2008
Done:
http://hg.clusterlabs.org/pacemaker/stable-1.0/rev/9919f48d3313
On Thu, Nov 27, 2008 at 08:55, Andrew Beekhof <beekhof at gmail.com> wrote:
> I'm going to fix this properly today.
>
> On Nov 27, 2008, at 8:27 AM, Satomi TANIGUCHI wrote:
>
>> Hi Andrew,
>>
>> I found another behavior that is caused because the cluster forgets the
>> resource is supposed to stay stopped.
>>
>> For example, in the case of a node which has primitive and master/slave
>> resource.
>> Their settings of on-fail is "standby".
>> When the master/slave resource is failed, all resources on failed node are
>> going to stop. And master/slave resource's fail-count is increased.
>> But then, only primitive resource re-starts on failed node because its
>> fail-count is not be increased and the cluster forgets the resource is
>> supposed to stay stopped...
>>
>> When F/O occurs,
>> in the case of _not_ master/slave resource,
>> pengine creates one graph to stop and restart the resource.
>> And in the case of master/slave resource, it creates a graph 2 times.
>> One is for the resource's stop-process and another is for restart-process.
>> And when it creates a graph for restart-process,
>> no one remembers that resources are supposed to stay stopped on failed
>> node.
>>
>> This behavior is same as (or similar to) what you are worried, isn't it?
>>
>> To avoid this behavior, it requires to update the status of a node before
>> restart-process.
>> On trial, I created a patch (for pacemaker-dev 366b14d79780).
>> And I attached the graph with patched pacemaker.
>> It's not a "general" way, just for reference...
>>
>>
>> Regards,
>> Satomi TANIGUCHI
>> diff -urN pacemaker-dev/crmd/te_actions.c
>> pacemaker-dev.mod/crmd/te_actions.c
>> --- pacemaker-dev/crmd/te_actions.c 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/crmd/te_actions.c 2008-11-26 10:48:47.000000000
>> +0900
>> @@ -175,6 +175,42 @@
>> return TRUE;
>> }
>>
>> +static gboolean
>> +te_standby_node(crm_graph_t *graph, crm_action_t *action)
>> +{
>> + const char *id = NULL;
>> + const char *uuid = NULL;
>> + const char *target = NULL;
>> +
>> + id = ID(action->xml);
>> + target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
>> + uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
>> +
>> + CRM_CHECK(id != NULL,
>> + crm_log_xml_warn(action->xml, "BadAction");
>> + return FALSE);
>> + CRM_CHECK(uuid != NULL,
>> + crm_log_xml_warn(action->xml, "BadAction");
>> + return FALSE);
>> + CRM_CHECK(target != NULL,
>> + crm_log_xml_warn(action->xml, "BadAction");
>> + return FALSE);
>> +
>> + te_log_action(LOG_INFO,
>> + "Executing standby operation (%s) on %s", id,
>> target);
>> +
>> + if (cib_ok > set_standby(fsa_cib_conn, uuid, XML_CIB_TAG_NODES,
>> "on")) {
>> + crm_err("Cannot standby %s: set_standby() call failed.",
>> target);
>> + }
>> +
>> + crm_info("Skipping wait for %d", action->id);
>> + action->confirmed = TRUE;
>> + update_graph(graph, action);
>> + trigger_graph();
>> +
>> + return TRUE;
>> +}
>> +
>> static int get_target_rc(crm_action_t *action)
>> {
>> const char *target_rc_s = crm_meta_value(action->params,
>> XML_ATTR_TE_TARGET_RC);
>> @@ -500,7 +536,8 @@
>> te_pseudo_action,
>> te_rsc_command,
>> te_crm_command,
>> - te_fence_node
>> + te_fence_node,
>> + te_standby_node
>> };
>>
>> void
>> diff -urN pacemaker-dev/include/crm/crm.h
>> pacemaker-dev.mod/include/crm/crm.h
>> --- pacemaker-dev/include/crm/crm.h 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/include/crm/crm.h 2008-11-26 10:48:47.000000000
>> +0900
>> @@ -146,6 +146,7 @@
>> #define CRM_OP_SHUTDOWN_REQ "req_shutdown"
>> #define CRM_OP_SHUTDOWN "do_shutdown"
>> #define CRM_OP_FENCE "stonith"
>> +#define CRM_OP_STANDBY "standby"
>> #define CRM_OP_EVENTCC "event_cc"
>> #define CRM_OP_TEABORT "te_abort"
>> #define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */
>> diff -urN pacemaker-dev/include/crm/pengine/common.h
>> pacemaker-dev.mod/include/crm/pengine/common.h
>> --- pacemaker-dev/include/crm/pengine/common.h 2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/include/crm/pengine/common.h 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -52,6 +52,7 @@
>> action_demote,
>> action_demoted,
>> shutdown_crm,
>> + standby_node,
>> stonith_node
>> };
>>
>> diff -urN pacemaker-dev/include/crm/pengine/status.h
>> pacemaker-dev.mod/include/crm/pengine/status.h
>> --- pacemaker-dev/include/crm/pengine/status.h 2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/include/crm/pengine/status.h 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -104,6 +104,7 @@
>> const char *uname;
>> gboolean online;
>> gboolean standby;
>> + gboolean action_standby;
>> gboolean pending;
>> gboolean unclean;
>> gboolean shutdown;
>> diff -urN pacemaker-dev/include/crm/transition.h
>> pacemaker-dev.mod/include/crm/transition.h
>> --- pacemaker-dev/include/crm/transition.h 2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/include/crm/transition.h 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -115,6 +115,7 @@
>> gboolean (*rsc)(crm_graph_t *graph, crm_action_t *action);
>> gboolean (*crmd)(crm_graph_t *graph, crm_action_t *action);
>> gboolean (*stonith)(crm_graph_t *graph, crm_action_t
>> *action);
>> + gboolean (*standby)(crm_graph_t *graph, crm_action_t
>> *action);
>> } crm_graph_functions_t;
>>
>> enum transition_status {
>> diff -urN pacemaker-dev/lib/pengine/common.c
>> pacemaker-dev.mod/lib/pengine/common.c
>> --- pacemaker-dev/lib/pengine/common.c 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/lib/pengine/common.c 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -178,6 +178,8 @@
>> return shutdown_crm;
>> } else if(safe_str_eq(task, CRM_OP_FENCE)) {
>> return stonith_node;
>> + } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
>> + return standby_node;
>> } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) {
>> return monitor_rsc;
>> } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
>> @@ -245,6 +247,9 @@
>> case stonith_node:
>> result = CRM_OP_FENCE;
>> break;
>> + case standby_node:
>> + result = CRM_OP_STANDBY;
>> + break;
>> case monitor_rsc:
>> result = CRMD_ACTION_STATUS;
>> break;
>> diff -urN pacemaker-dev/lib/pengine/unpack.c
>> pacemaker-dev.mod/lib/pengine/unpack.c
>> --- pacemaker-dev/lib/pengine/unpack.c 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/lib/pengine/unpack.c 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -240,6 +240,7 @@
>> */
>> new_node->details->unclean = TRUE;
>> }
>> + new_node->details->action_standby = FALSE;
>>
>> if(type == NULL
>> || safe_str_eq(type, "member")
>> @@ -832,7 +833,7 @@
>> stop_action(rsc, node, FALSE);
>>
>> } else if(on_fail == action_fail_standby) {
>> - node->details->standby = TRUE;
>> + node->details->action_standby = TRUE;
>>
>> } else if(on_fail == action_fail_block) {
>> /* is_managed == FALSE will prevent any
>> diff -urN pacemaker-dev/lib/transition/graph.c
>> pacemaker-dev.mod/lib/transition/graph.c
>> --- pacemaker-dev/lib/transition/graph.c 2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/lib/transition/graph.c 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -188,6 +188,11 @@
>> crm_debug_2("Executing STONITH-event: %d",
>> action->id);
>> return graph_fns->stonith(graph, action);
>> +
>> + } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
>> + crm_debug_2("Executing STANDBY-event: %d",
>> + action->id);
>> + return graph_fns->standby(graph, action);
>> }
>>
>> crm_debug_2("Executing crm-event: %d", action->id);
>> diff -urN pacemaker-dev/lib/transition/utils.c
>> pacemaker-dev.mod/lib/transition/utils.c
>> --- pacemaker-dev/lib/transition/utils.c 2008-11-26
>> 10:47:46.000000000 +0900
>> +++ pacemaker-dev.mod/lib/transition/utils.c 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -41,6 +41,7 @@
>> pseudo_action_dummy,
>> pseudo_action_dummy,
>> pseudo_action_dummy,
>> + pseudo_action_dummy,
>> pseudo_action_dummy
>> };
>>
>> @@ -61,6 +62,7 @@
>> CRM_ASSERT(graph_fns->crmd != NULL);
>> CRM_ASSERT(graph_fns->pseudo != NULL);
>> CRM_ASSERT(graph_fns->stonith != NULL);
>> + CRM_ASSERT(graph_fns->standby != NULL);
>> }
>>
>> const char *
>> diff -urN pacemaker-dev/pengine/allocate.c
>> pacemaker-dev.mod/pengine/allocate.c
>> --- pacemaker-dev/pengine/allocate.c 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/allocate.c 2008-11-26
>> 10:48:47.000000000 +0900
>> @@ -774,6 +774,15 @@
>> last_stonith = stonith_op;
>>
>> }
>>
>> + } else if(node->details->online &&
>> node->details->action_standby) {
>> + action_t *standby_op = NULL;
>> +
>> + standby_op = custom_action(
>> + NULL, crm_strdup(CRM_OP_STANDBY),
>> + CRM_OP_STANDBY, node, FALSE, TRUE,
>> data_set);
>> +
>> + order_actions(standby_op, all_stopped,
>> pe_order_implies_left);
>> +
>> } else if(node->details->online && node->details->shutdown)
>> {
>> action_t *down_op = NULL;
>> crm_info("Scheduling Node %s for shutdown",
>> diff -urN pacemaker-dev/pengine/graph.c pacemaker-dev.mod/pengine/graph.c
>> --- pacemaker-dev/pengine/graph.c 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/graph.c 2008-11-26 10:48:47.000000000
>> +0900
>> @@ -368,7 +368,10 @@
>> if(safe_str_eq(action->task, CRM_OP_FENCE)) {
>> action_xml = create_xml_node(NULL,
>> XML_GRAPH_TAG_CRM_EVENT);
>> /* needs_node_info = FALSE; */
>> -
>> +
>> + } else if(safe_str_eq(action->task, CRM_OP_STANDBY)) {
>> + action_xml = create_xml_node(NULL,
>> XML_GRAPH_TAG_CRM_EVENT);
>> +
>> } else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
>> action_xml = create_xml_node(NULL,
>> XML_GRAPH_TAG_CRM_EVENT);
>>
>> diff -urN pacemaker-dev/pengine/group.c pacemaker-dev.mod/pengine/group.c
>> --- pacemaker-dev/pengine/group.c 2008-11-26 10:47:46.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/group.c 2008-11-26 10:48:47.000000000
>> +0900
>> @@ -423,6 +423,7 @@
>> case action_notified:
>> case shutdown_crm:
>> case stonith_node:
>> + case standby_node:
>> break;
>> case stop_rsc:
>> case stopped_rsc:
>> diff -urN pacemaker-dev/pengine/utils.c pacemaker-dev.mod/pengine/utils.c
>> --- pacemaker-dev/pengine/utils.c 2008-11-26 10:47:49.000000000
>> +0900
>> +++ pacemaker-dev.mod/pengine/utils.c 2008-11-26 10:49:54.000000000
>> +0900
>> @@ -338,6 +338,7 @@
>> case monitor_rsc:
>> case shutdown_crm:
>> case stonith_node:
>> + case standby_node:
>> task = no_action;
>> break;
>> default:
>> @@ -430,6 +431,7 @@
>>
>> switch(text2task(action->task)) {
>> case stonith_node:
>> + case standby_node:
>> case shutdown_crm:
>> do_crm_log_unlikely(log_level,
>> "%s%s%sAction %d: %s%s%s%s%s%s",
>> <pe-warn-0.left.gif>_______________________________________________
>> Pacemaker mailing list
>> Pacemaker at clusterlabs.org
>> http://list.clusterlabs.org/mailman/listinfo/pacemaker
>
>
More information about the Pacemaker
mailing list