Author: tmckay
Date: 2011-11-08 02:13:01 +0000 (Tue, 08 Nov 2011)
New Revision: 5119
Modified:
trunk/cumin/bin/cumin
trunk/cumin/bin/cumin-data
trunk/cumin/bin/cumin-web
Log:
Merge --init-only case into the main process monitoring loop in bin/cumin.
Use the low bit of the return value from cumin-data and cumin-web to
indicate an error that happened during init checks.
Make cumin exit and terminate all processes if any child fails init checks.
These changes make the preliminary --init-only checks done by the cumin
sysvinit run in parallel (much faster) while also setting cumin up for
migration to systemd scripts under Fedora (and someday RHEL)
Modified: trunk/cumin/bin/cumin
===================================================================
--- trunk/cumin/bin/cumin 2011-11-07 18:47:07 UTC (rev 5118)
+++ trunk/cumin/bin/cumin 2011-11-08 02:13:01 UTC (rev 5119)
@@ -33,7 +33,8 @@
msg = os.fdopen(r, "r").readlines()
if len(msg) > 0:
log.error("".join(msg))
- return True
+ return 1
+ return 0
def get_args(app, section, init_only, console, extra_options=""):
args = [app, "--section="+section.strip()]
@@ -124,34 +125,47 @@
for instance in options.datas.split(','):
args, prog_string = get_args("cumin-data", instance,
options.init_only, console, options.data_options)
apps.append([None, args, prog_string])
+
+ # Launch and babysit, do not restart if options.init_only is set
+ complete = 0
+ return_code = 0
+ sleep_time = 0.25
+ slow_down = 40
+ try:
+ for app in apps:
+ log.info("Starting:" + app[PROG_STRING])
+ app[PROCESS] = subprocess.Popen(app[ARGS])
+ while complete != len(apps):
+ sleep(sleep_time)
+ if slow_down > 0:
+ slow_down -= 1
+ if slow_down == 0:
+ print "set sleep to 5 seconds"
+ sleep_time = 5
- # If we are just checking startup flags, invoke each instance
- # with "--init-only" and return status to caller.
- if options.init_only:
- for app in apps:
- if subprocess.Popen(app[ARGS]).wait() != 0:
- log.error("Subprocess failed init check:" + app[PROG_STRING])
- return 1
- else:
- # Launch and babysit
- try:
for app in apps:
- log.info("Starting:" + app[PROG_STRING])
- app[PROCESS] = subprocess.Popen(app[ARGS])
-
- while True:
- sleep(5)
- for app in apps:
- return_code = app[PROCESS].poll()
- if return_code != None:
- if return_code != 0:
- log.warn("Subprocess exited with status " +
str(return_code))
+ poll = app[PROCESS] and app[PROCESS].poll()
+ if poll is not None:
+ log.warn("Subprocess exited with status " + str(poll))
+ # If the low bit is set on the return code, the
+ # process got an error during init checks.
+ # Exit and shut down any processes that have already
+ # been started, do not start the remaining.
+ if poll & 1:
+ app[PROCESS] = None
+ return_code = 2
+ complete = len(apps)
+ break
+ elif options.init_only:
+ app[PROCESS] = None
+ complete += 1
+ else:
log.info("Restarting:" + app[PROG_STRING])
- app[PROCESS] = subprocess.Popen(app[ARGS])
- finally:
- for app in apps:
- if app[PROCESS]:
- os.kill(app[PROCESS].pid, signal.SIGTERM)
+ app[PROCESS] = subprocess.Popen(app[ARGS])
+ finally:
+ for app in apps:
+ app[PROCESS] and os.kill(app[PROCESS].pid, signal.SIGTERM)
+ return return_code
if __name__ == "__main__":
# This is necessary so that on a SIGTERM we can call sys.exit()
@@ -161,4 +175,5 @@
try:
sys.exit(main())
except KeyboardInterrupt:
- pass
+ sys.exit(0)
+
Modified: trunk/cumin/bin/cumin-data
===================================================================
--- trunk/cumin/bin/cumin-data 2011-11-07 18:47:07 UTC (rev 5118)
+++ trunk/cumin/bin/cumin-data 2011-11-08 02:13:01 UTC (rev 5119)
@@ -85,7 +85,15 @@
return return_code, agents
+def adjust_return(passed_init, ret):
+ # Shift non-zer0 return codes left 1 bit
+ # and OR in whether or not init passed
+ if ret != 0:
+ ret = ret << 1 | passed_init
+ return ret
+
def main():
+ passed_init = 1
# Do our own simple option check so we can redirect IO early
# without worrying about other options or the behavior of optParse
@@ -190,33 +198,37 @@
return_code, agents = process_agents(values.agents, opts.section)
mint.qmf_agents = agents
- if opts.init_only or return_code:
- return
+ # If init_only was set or we failed init, don't proceed...
+ if not opts.init_only and not return_code:
+ passed_init = 0
- mint.start()
+ mint.start()
+ stats = mint.update_thread.stats
+ count = 0
- stats = mint.update_thread.stats
- count = 0
+ if opts.print_stats:
+ print "[Starred columns are the number of events per
second]"
- if opts.print_stats:
- print "[Starred columns are the number of events per second]"
+ while True:
+ if count % 20 == 0:
+ stats.print_headings()
- while True:
- if count % 20 == 0:
- stats.print_headings()
+ count += 1
- count += 1
+ stats.print_values()
- stats.print_values()
+ sleep(5)
+ else:
+ while True:
+ sleep(86400)
- sleep(5)
- else:
- while True:
- sleep(86400)
-
- except (KeyboardInterrupt, SystemExit):
+ except KeyboardInterrupt:
pass
+ except SystemExit:
+ if "--help" not in sys.argv:
+ return_code = 1
+
except:
print_exc()
return_code = 1
@@ -227,10 +239,10 @@
if pipeThread:
pipeThread.stop()
logging.shutdown()
- sys.exit(return_code)
+ return adjust_return(passed_init, return_code)
if __name__ == "__main__":
try:
- main()
+ sys.exit(main())
except KeyboardInterrupt:
- pass
+ sys.exit(0)
Modified: trunk/cumin/bin/cumin-web
===================================================================
--- trunk/cumin/bin/cumin-web 2011-11-07 18:47:07 UTC (rev 5118)
+++ trunk/cumin/bin/cumin-web 2011-11-08 02:13:01 UTC (rev 5119)
@@ -39,7 +39,16 @@
if cumin.wallaby_refresh == 0:
cumin.wallaby_refresh = None
+def adjust_return(passed_init, ret):
+ # Shift non-zer0 return codes left 1 bit
+ # and OR in whether or not init passed
+ if ret != 0:
+ ret = ret << 1 | passed_init
+ return ret
+
def main():
+ passed_init = 1
+
# Do our own simple option check so we can redirect IO early
# without worrying about other options or the behavior of optParse
opts = check_for_options(["--section", "--daemon"],
sys.argv[1:])
@@ -132,22 +141,25 @@
cumin.check()
cumin.init()
- if opts.init_only:
- return
+ if not opts.init_only and not return_code:
+ passed_init = 0
- cumin.start()
+ cumin.start()
+ while True:
+ # print_threads()
+ sleep(1)
+ if not cumin.server_alive():
+ print "web server has stopped, exiting..."
+ return_code = 1
+ break
- while True:
- # print_threads()
- sleep(1)
- if not cumin.server_alive():
- print "web server has stopped, exiting..."
- return_code = 1
- break
-
- except (KeyboardInterrupt, SystemExit):
+ except KeyboardInterrupt:
pass
+ except SystemExit:
+ if "--help" not in sys.argv:
+ return_code = 1
+
except:
print_exc()
return_code = 1
@@ -158,10 +170,10 @@
if pipeThread:
pipeThread.stop()
logging.shutdown()
- sys.exit(return_code)
+ return adjust_return(passed_init, return_code)
if __name__ == "__main__":
try:
- main()
+ sys.exit(main())
except KeyboardInterrupt:
- pass
+ sys.exit(0)