Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=02…
Commit: 02e5f7ae8f8f49e0facc33d202e48ae7041d5fe0
Parent: 6292a2220fc4c34967d56035b7dce3ce99690198
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 27 10:09:13 2010 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 27 10:30:56 2010 +0200
config: more cman_tool config reload cleanup
when issuing cman_tool version on the local node, and the local node
cannot read the new config, there is very little point to keep trying
loading it (since we are the issuing node).
make sure that if the local node cannot read the config, we stop immediately
and report a proper error down to cman_tool to display.
Resolves: rhbz#617161, rhbz#617163
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/cman_tool/main.c | 13 +++++++++++--
cman/daemon/commands.c | 26 ++++++++++++++++----------
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/cman/cman_tool/main.c b/cman/cman_tool/main.c
index a731323..af1cc18 100644
--- a/cman/cman_tool/main.c
+++ b/cman/cman_tool/main.c
@@ -783,8 +783,17 @@ static void version(commandline_t *comline)
ver.cv_config = comline->config_version;
- if ((result = cman_set_version(h, &ver)))
- die("can't set version: %s", cman_error(errno));
+ result = cman_set_version(h, &ver);
+
+ switch(result) {
+ case 0:
+ if (comline->verbose)
+ printf("Configuration succesfully updated or already running\n");
+ break;
+ default:
+ die("Error loading configuration in corosync/cman");
+ break;
+ }
out:
cman_finish(h);
}
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 36bb519..3bff57a 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -485,9 +485,7 @@ static int do_cmd_set_version(char *cmdbuf, int *retlen)
version->patch != CNXMAN_PATCH_VERSION)
return -EINVAL;
- reload_config(version->config, 1);
-
- return 0;
+ return reload_config(version->config, 1);
}
static int do_cmd_get_extrainfo(char *cmdbuf, char **retbuf, int retsize, int *retlen, int offset)
@@ -1193,15 +1191,23 @@ static int reload_config(int new_version, int should_broadcast)
config_error = read_cman_nodes(corosync, &config_version, 0);
if (config_error) {
- log_printf(LOG_ERR, "Can't get updated config version %d: %s. Activity suspended on this node\n",
+ log_printf(LOG_ERR, "Can't get updated config version %d: %s.\n",
wanted_config_version, reload_err?reload_err:"version mismatch on this node");
- if (!ccsd_timer_active) {
- log_printf(LOG_ERR, "Error reloading the configuration, will retry every second\n");
- ccsd_timer_should_broadcast = should_broadcast;
- corosync->timer_add_duration((unsigned long long)ccsd_poll_interval*1000000, NULL,
- ccsd_timer_fn, &ccsd_timer);
- ccsd_timer_active = 1;
+ if (should_broadcast) {
+ log_printf(LOG_ERR, "Continuing activity with old configuration\n");
+ config_error=0;
+ return -2;
+ } else {
+ log_printf(LOG_ERR, "Activity suspended on this node\n");
+
+ if (!ccsd_timer_active) {
+ log_printf(LOG_ERR, "Error reloading the configuration, will retry every second\n");
+ ccsd_timer_should_broadcast = should_broadcast;
+ corosync->timer_add_duration((unsigned long long)ccsd_poll_interval*1000000, NULL,
+ ccsd_timer_fn, &ccsd_timer);
+ ccsd_timer_active = 1;
+ }
}
} else {
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=05…
Commit: 050b8f584c9721a63ab093ec34599eb37502723b
Parent: 2f5475b2be1e7ead3501400aea9887b181246e20
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Mon Jul 26 14:35:32 2010 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 27 10:29:49 2010 +0200
config: fix several issues with reload operation
cman-preconfig: in some cases it was possible to trigger cman to load
a config version with random version, trashing the old valid configuration.
- Enforce now a configuration version check before removing the old config
from the objdb and return an error back to cman/corosync instead.
cman corosync plugin: several config reload corner cases where not handled
properly, resulting in different unnecessary config reload attempts, and
unnecessary notification to all cluster daemons.
- move all config reload handling code in one location to avoid duplication.
- add more sanity checks on the configuration reload requests.
- send notification to the cluster only when we are able to load the config
ourselves.
- don't re-notify cluster nodes of config changes, unless the change
request started from our node.
- notify cluster daemons only if we successfully loaded the configuration.
- handle better some startup corner cases (cluster node joins with a newer
or older configuration).
- attempt to do better detection of first cluster transition.
- add some debugging output just in case....
Resolves: rhbz#617161, rhbz#617163
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/daemon/cman-preconfig.c | 29 +++++++
cman/daemon/commands.c | 166 +++++++++++++++++++++---------------------
2 files changed, 113 insertions(+), 82 deletions(-)
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index 36620c6..f5dbcd7 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -1151,6 +1151,8 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
hdb_handle_t object_handle;
hdb_handle_t find_handle;
hdb_handle_t cluster_parent_handle_new;
+ unsigned int config_version = 0, config_version_new = 0;
+ char *config_value = NULL;
/* don't reload if we've been told to run configless */
if (getenv("CMAN_NOCONFIG")) {
@@ -1173,6 +1175,33 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
}
objdb->object_find_destroy(find_handle);
+ if (!objdb->object_key_get(cluster_parent_handle, "config_version", strlen("config_version"), (void *)&config_value, NULL)) {
+ if (config_value) {
+ config_version = atoi(config_value);
+ } else {
+ /* it should never ever happen.. */
+ sprintf (error_reason, "%s", "Cannot find old /cluster/config_version key in configuration\n");
+ goto err;
+ }
+ }
+
+ config_value = NULL;
+
+ if (!objdb->object_key_get(cluster_parent_handle_new, "config_version", strlen("config_version"), (void *)&config_value, NULL)) {
+ if (config_value) {
+ config_version_new = atoi(config_value);
+ } else {
+ sprintf (error_reason, "%s", "Cannot find new /cluster/config_version key in configuration\n");
+ goto err;
+ }
+ }
+
+ if (config_version_new <= config_version) {
+ objdb->object_destroy(cluster_parent_handle_new);
+ sprintf (error_reason, "%s", "New configuration version has to be newer than current running configuration\n");
+ goto err;
+ }
+
/* destroy the old one */
objdb->object_destroy(cluster_parent_handle);
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 604e400..36bb519 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -86,6 +86,7 @@ static int shutdown_yes;
static int shutdown_no;
static int shutdown_expected;
static int ccsd_timer_active = 0;
+static int ccsd_timer_should_broadcast = 0;
static struct cluster_node *find_node_by_nodeid(int nodeid);
static struct cluster_node *find_node_by_name(char *name);
@@ -98,7 +99,7 @@ static void recalculate_quorum(int allow_decrease, int by_current_nodes);
static void send_kill(int nodeid, uint16_t reason);
static const char *killmsg_reason(int reason);
static void ccsd_timer_fn(void *arg);
-static int reread_config(int new_version);
+static int reload_config(int new_version, int should_broadcast);
static void set_port_bit(struct cluster_node *node, uint8_t port)
{
@@ -484,23 +485,8 @@ static int do_cmd_set_version(char *cmdbuf, int *retlen)
version->patch != CNXMAN_PATCH_VERSION)
return -EINVAL;
- if (config_version == version->config)
- return 0;
-
- /* If the passed-in version number is 0 then read the file now, then
- * tell the other nodes to look for that version number.
- * That means we also have to send the notification here, because it will
- * be skipped when we get our own RECONFIGURE message back, as the version
- * number will match.
- */
- if (!version->config) {
- if (!reread_config(0))
- notify_listeners(NULL, EVENT_REASON_CONFIG_UPDATE, config_version);
- version->config = config_version;
- }
+ reload_config(version->config, 1);
- /* We will re-read CCS when we get our own message back */
- send_reconfigure(us->node_id, RECONFIG_PARAM_CONFIG_VERSION, version->config);
return 0;
}
@@ -1179,81 +1165,94 @@ static int do_cmd_unregister_quorum_device(char *cmdbuf, int *retlen)
return 0;
}
-static int reread_config(int new_version)
+static int reload_config(int new_version, int should_broadcast)
{
- int read_err;
const char *reload_err = NULL;
+ if (config_version == new_version) {
+ log_printf(LOG_DEBUG, "We are already using config version [%d]\n",
+ config_version);
+ return 0;
+ }
+
+ if (new_version > 0 && new_version < config_version) {
+ log_printf(LOG_ERR, "Requested version [%d] older than running version [%d]\n",
+ new_version, config_version);
+ return -1;
+ }
+
wanted_config_version = new_version;
/* Tell objdb to reload */
- read_err = corosync->object_reload_config(1, &reload_err);
+ config_error = corosync->object_reload_config(1, &reload_err);
+ if (config_error)
+ log_printf(LOG_ERR, "Unable to load new config in corosync: %s\n",
+ reload_err);
- /* Now get our bits */
- if (!read_err)
- read_err = read_cman_nodes(corosync, &config_version, 0);
+ if (!config_error)
+ config_error = read_cman_nodes(corosync, &config_version, 0);
- if (read_err) {
- config_error = 1;
+ if (config_error) {
log_printf(LOG_ERR, "Can't get updated config version %d: %s. Activity suspended on this node\n",
wanted_config_version, reload_err?reload_err:"version mismatch on this node");
- }
-
- /* Still too old?? */
- if (new_version && config_version < wanted_config_version) {
- log_printf(LOG_ERR, "Can't get updated config version %d, config file is version %d.\n",
- wanted_config_version, config_version);
- }
- /* Keep looking */
- if (read_err || config_version < wanted_config_version) {
- if (!ccsd_timer_active)
+ if (!ccsd_timer_active) {
+ log_printf(LOG_ERR, "Error reloading the configuration, will retry every second\n");
+ ccsd_timer_should_broadcast = should_broadcast;
corosync->timer_add_duration((unsigned long long)ccsd_poll_interval*1000000, NULL,
ccsd_timer_fn, &ccsd_timer);
- ccsd_timer_active = 1;
- }
- else {
+ ccsd_timer_active = 1;
+ }
+ } else {
+
+ /*
+ * at this point we know:
+ * config is loaded in objdb with a newer version than the previous one
+ * we have been able to activate it in cman (via read_cman_nodes)
+ */
+
+ if (should_broadcast) {
+ log_printf(LOG_DEBUG, "Sending reconfigure message to all nodes\n");
+ send_reconfigure(us->node_id, RECONFIG_PARAM_CONFIG_VERSION, config_version);
+ }
+
+ log_printf(LOG_DEBUG, "Recalculating quorum\n");
recalculate_quorum(1, 0);
- send_transition_msg(0,0);
- }
- return read_err;
+ log_printf(LOG_DEBUG, "Notify all listeners\n");
+ notify_listeners(NULL, EVENT_REASON_CONFIG_UPDATE, config_version);
+ }
+ return config_error;
}
static void ccsd_timer_fn(void *arg)
{
log_printf(LOG_DEBUG, "Polling configuration for updated information\n");
- ccsd_timer_active = 0;
+ ccsd_timer_active = 0;
- if (!reread_config(wanted_config_version) && config_version >= wanted_config_version) {
- log_printf(LOG_ERR, "Now got config information version %d, continuing\n", config_version);
+ if (!reload_config(wanted_config_version, ccsd_timer_should_broadcast) &&
+ config_version >= wanted_config_version) {
+ log_printf(LOG_DEBUG, "ccsd_timer_fn got the new config\n");
config_error = 0;
- recalculate_quorum(0, 0);
- notify_listeners(NULL, EVENT_REASON_CONFIG_UPDATE, config_version);
+ return;
}
- else {
- time_t now;
- now = time(NULL);
- log_printf(LOG_DEBUG, "Checking for startup failure: local_first_trans=%d, time=%d\n",
- local_first_trans, (int)(now - join_time));
- /*
- * If we haven't got the 'right' configuration at startup before (default) 30s
- * then quit so the node can boot
- */
+ if (local_first_trans) {
+ time_t now;
+ now = time(NULL);
- if (local_first_trans && now > join_time+startup_config_timeout) {
- log_printf(LOG_ERR, "Failed to get an up-to-date config file, wanted %d, only got %d. Will exit\n",
- wanted_config_version, config_version);
- log_printf(LOG_ERR, "Check your configuration distribution method is working correctly\n");
- cman_finish();
- corosync_shutdown();
- }
+ if (now > join_time+startup_config_timeout) {
+ log_printf(LOG_ERR, "Checking for startup failure: time=%d\n", (int)(now - join_time));
+ log_printf(LOG_ERR, "Failed to get an up-to-date config file, wanted %d, only got %d. Will exit\n",
+ wanted_config_version, config_version);
+ log_printf(LOG_ERR, "Check your configuration distribution method is working correctly\n");
+ cman_finish();
+ corosync_shutdown();
+ }
}
}
-
static void quorum_device_timer_fn(void *arg)
{
struct timeval now;
@@ -1736,25 +1735,31 @@ static int valid_transition_msg(int nodeid, struct cl_transmsg *msg)
return -1;
}
- /* New config version - try to read new file */
- if (msg->config_version > config_version) {
+ if (local_first_trans) {
+ time_t now;
+ now = time(NULL);
- if (!reread_config(msg->config_version)) {
+ if (now > join_time+startup_config_timeout) {
+ log_printf(LOG_DEBUG, "ccs: disable startup transition check\n");
+ local_first_trans = 0;
+ }
+ }
- if (config_version > msg->config_version) {
- /* Tell everyone else to update */
- send_reconfigure(us->node_id, RECONFIG_PARAM_CONFIG_VERSION, config_version);
+ /* New config version - try to read new file */
+ if (msg->config_version > config_version) {
+ log_printf(LOG_DEBUG, "Reloading config from TRANSITION message\n");
+ if (reload_config(msg->config_version, 0)) {
+ if (msg->config_version != config_version) {
+ log_printf(LOG_ERR, "Node %d conflict, remote config version id=%d, local=%d\n",
+ nodeid, msg->config_version, config_version);
+ return -1;
}
- recalculate_quorum(0, 0);
- notify_listeners(NULL, EVENT_REASON_CONFIG_UPDATE, config_version);
}
}
-
- if (msg->config_version != config_version) {
- log_printf(LOG_ERR, "Node %d conflict, remote config version id=%d, local=%d\n",
- nodeid, msg->config_version, config_version);
- return -1;
+ if ((msg->config_version == config_version) && (nodeid != us->node_id)) {
+ log_printf(LOG_DEBUG, "Completed first transition with nodes on the same config versions\n");
+ local_first_trans = 0;
}
return 0;
@@ -1897,10 +1902,7 @@ static void do_reconfigure_msg(void *data)
break;
case RECONFIG_PARAM_CONFIG_VERSION:
- if (config_version != msg->value) {
- if (!reread_config(msg->value))
- notify_listeners(NULL, EVENT_REASON_CONFIG_UPDATE, config_version);
- }
+ reload_config(msg->value, 0);
break;
}
}
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=2f…
Commit: 2f5475b2be1e7ead3501400aea9887b181246e20
Parent: 2dbba8f54346a849a8675c5abf14f39edac42a6f
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Jul 22 14:44:17 2010 -0400
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 27 10:28:38 2010 +0200
cman: Deprecate specifying config version to cman_tool
Historically, updating the configuration file required
two discrete steps:
1) send configuration to all nodes, and
2) tell CMAN about the new configuration version
There has always been a possibility that an incorrect
version number could be sent to CMAN, causing it to have
a different version than the one in cluster.conf.
In the STABLE3 and later branches as of commit 75fb0324,
the necessity for specifying the version # has been
removed; users could instead just use '-r 0' to use the
configuration version in the current cluster.conf.
This patch deprecates specification of the configuration
version number from the cman_tool command line while
attempting to retain command-line tool compatibility.
That is, you may specify '-r123', but it will be ignored
and a warning will be printed.
Resolves: rhbz#617161, rhbz#617163
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/cman_tool/main.c | 27 ++++++++++++++++++++++-----
1 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/cman/cman_tool/main.c b/cman/cman_tool/main.c
index d8b08dd..a731323 100644
--- a/cman/cman_tool/main.c
+++ b/cman/cman_tool/main.c
@@ -10,7 +10,7 @@
#define DEFAULT_CONFIG_MODULE "xmlconfig"
-#define OPTION_STRING ("m:n:v:e:2p:c:r:i:N:t:o:k:F:C:VAPwfqah?XD::Sd::")
+#define OPTION_STRING ("m:n:v:e:2p:c:i:N:t:o:k:F:C:VAPwfqah?XD::Sd::r::")
#define OP_JOIN 1
#define OP_LEAVE 2
#define OP_EXPECTED 3
@@ -118,7 +118,7 @@ static void print_usage(int subcmd)
if (!subcmd || subcmd == OP_VERSION) {
printf("version\n");
- printf(" -r <config> A new config version to set on all members\n");
+ printf(" -r Reload cluster.conf and update config version.\n");
printf(" -D <fail,warn,none> What to do about the config. Default (without -D) is to\n");
printf(" validate the config. with -D no validation will be done. -Dwarn will print errors\n");
printf(" but allow the operation to continue\n");
@@ -876,6 +876,7 @@ static void decode_arguments(int argc, char *argv[], commandline_t *comline)
int optchar, i;
int suboptchar;
int show_help = 0;
+ char buf[16];
while (cont) {
optchar = getopt(argc, argv, OPTION_STRING);
@@ -943,8 +944,13 @@ static void decode_arguments(int argc, char *argv[], commandline_t *comline)
break;
case 'r':
- comline->config_version = get_int_arg(optchar, optarg);
+ comline->config_version = 0;
comline->config_version_opt = TRUE;
+ if (optarg) {
+ fprintf(stderr, "Warning: specifying a "
+ "version for the -r flag is "
+ "deprecated and no longer used\n");
+ }
break;
case 'v':
@@ -1092,8 +1098,19 @@ static void decode_arguments(int argc, char *argv[], commandline_t *comline)
comline->remove = TRUE;
} else if (strcmp(argv[optind], "force") == 0) {
comline->force = TRUE;
- } else
- die("unknown option %s", argv[optind]);
+ } else {
+ snprintf(buf, sizeof(buf),
+ "%d", atoi(argv[optind]));
+ if (!strcmp(buf, argv[optind]) &&
+ (comline->config_version_opt == TRUE) &&
+ comline->operation == OP_VERSION) {
+ fprintf(stderr, "Warning: specifying a "
+ "version for the -r flag is "
+ "deprecated and no longer used\n");
+ } else {
+ die("unknown option %s", argv[optind]);
+ }
+ }
optind++;
}
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=19…
Commit: 198c4860e2c04a979c4ee7500aaf49c7f97d1efe
Parent: 4eb4d4067f5c3b015090a849122795064eb6ef2d
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 27 10:09:13 2010 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 27 10:09:13 2010 +0200
config: more cman_tool config reload cleanup
when issuing cman_tool version on the local node, and the local node
cannot read the new config, there is very little point to keep trying
loading it (since we are the issuing node).
make sure that if the local node cannot read the config, we stop immediately
and report a proper error down to cman_tool to display.
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/cman_tool/main.c | 13 +++++++++++--
cman/daemon/commands.c | 26 ++++++++++++++++----------
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/cman/cman_tool/main.c b/cman/cman_tool/main.c
index a731323..af1cc18 100644
--- a/cman/cman_tool/main.c
+++ b/cman/cman_tool/main.c
@@ -783,8 +783,17 @@ static void version(commandline_t *comline)
ver.cv_config = comline->config_version;
- if ((result = cman_set_version(h, &ver)))
- die("can't set version: %s", cman_error(errno));
+ result = cman_set_version(h, &ver);
+
+ switch(result) {
+ case 0:
+ if (comline->verbose)
+ printf("Configuration succesfully updated or already running\n");
+ break;
+ default:
+ die("Error loading configuration in corosync/cman");
+ break;
+ }
out:
cman_finish(h);
}
diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 36bb519..3bff57a 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -485,9 +485,7 @@ static int do_cmd_set_version(char *cmdbuf, int *retlen)
version->patch != CNXMAN_PATCH_VERSION)
return -EINVAL;
- reload_config(version->config, 1);
-
- return 0;
+ return reload_config(version->config, 1);
}
static int do_cmd_get_extrainfo(char *cmdbuf, char **retbuf, int retsize, int *retlen, int offset)
@@ -1193,15 +1191,23 @@ static int reload_config(int new_version, int should_broadcast)
config_error = read_cman_nodes(corosync, &config_version, 0);
if (config_error) {
- log_printf(LOG_ERR, "Can't get updated config version %d: %s. Activity suspended on this node\n",
+ log_printf(LOG_ERR, "Can't get updated config version %d: %s.\n",
wanted_config_version, reload_err?reload_err:"version mismatch on this node");
- if (!ccsd_timer_active) {
- log_printf(LOG_ERR, "Error reloading the configuration, will retry every second\n");
- ccsd_timer_should_broadcast = should_broadcast;
- corosync->timer_add_duration((unsigned long long)ccsd_poll_interval*1000000, NULL,
- ccsd_timer_fn, &ccsd_timer);
- ccsd_timer_active = 1;
+ if (should_broadcast) {
+ log_printf(LOG_ERR, "Continuing activity with old configuration\n");
+ config_error=0;
+ return -2;
+ } else {
+ log_printf(LOG_ERR, "Activity suspended on this node\n");
+
+ if (!ccsd_timer_active) {
+ log_printf(LOG_ERR, "Error reloading the configuration, will retry every second\n");
+ ccsd_timer_should_broadcast = should_broadcast;
+ corosync->timer_add_duration((unsigned long long)ccsd_poll_interval*1000000, NULL,
+ ccsd_timer_fn, &ccsd_timer);
+ ccsd_timer_active = 1;
+ }
}
} else {