RUN_MODE_KEY = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or AFTER
# Keys used for script configuration (see --help text)
-LOGGING_DIR_KEY = 'WORKSPACE' # Path
+LOG_FILE_KEY = 'MONO_BABYSITTER_LOG_FILE' # Path
RETRY_KEY = 'MONO_BABYSITTER_RETRY' # Equal to an integer
VERBOSE_KEY = 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter
INVOKE_JSON = 'invocation'
COUNT_JSON = 'iteration' # How many times was command executed?
LIMIT_JSON = 'failure_max'
-SUPPORT_JSON = 'retry_support' # Was the test suite running with a babysitter-aware nunit?
+SUPPORT_JSON = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
FINAL_CODE_JSON = 'final_code'
TESTS_JSON = 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below)
TEST_FAILURES = 'normal_failures'
'h' for hours
'd' for days\n
supported environment variables:
- %s: Directory path to save logs into
+ %s: File to write logs to (as line-delimited JSON)
%s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)""" %
- (LOGGING_DIR_KEY, RETRY_KEY),
+ (LOG_FILE_KEY, RETRY_KEY),
formatter_class=argparse.RawTextHelpFormatter)
argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
help="Send this signal to the command on timeout, instead of TERM.")
global_env = copy.deepcopy( os.environ )
verbose = VERBOSE_KEY in global_env
-logging = LOGGING_DIR_KEY in global_env
-logging_dir = global_env[LOGGING_DIR_KEY] if logging else None
-logfile = os.path.join(logging_dir, LOGGING_FILE) if logging else None
+logging = LOG_FILE_KEY in global_env
+logfile = global_env[LOG_FILE_KEY] if logging else None
crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
-babysitting = logging or failmax
+babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
if babysitting:
babysitter_dir = tempfile.mkdtemp()
global_env[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
global_env[RAN_TEST_KEY] = os.path.join(babysitter_dir, RAN_TEST_FILE)
global_env[FAILED_TEST_KEY] = os.path.join(babysitter_dir, FAILED_TEST_FILE)
-### Utility functions
+have_unix_process_groups = 'killpg' in os.__dict__
+have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__
+
+### Timeout implementation
def wait(proc, duration):
# TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
return None
time.sleep(0.05)
+# Popen and send_signal can't be called in their basic forms because we want to
+# send signals to all children, not just to the immediately spawned process.
+# Unfortunately the way to do this varies by operating system.
+def popen(*args, **kwargs):
+ if have_unix_process_groups: # Call function on spawn to become process group leader
+ kwargs['preexec_fn'] = os.setsid
+ elif have_windows_process_groups: # Set magic flag for Windows process groups
+ kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
+ return subprocess.Popen(*args, **kwargs)
+
+def send_signal(proc, sig):
+ if have_unix_process_groups: # UNIX
+ # For compatibility with GNU timeout, pre-send the signal to just the monitored process
+ os.kill(proc.pid, sig)
+ # Send signal to entire group
+ os.killpg(proc.pid, sig)
+ # For compatibility with GNU Timeout, send a SIGCONT after the signal
+ # (so delivery has a chance to occur even for stopped processes)
+ if sig != signal.SIGKILL and sig != signal.SIGCONT:
+ os.kill(proc.pid, signal.SIGCONT)
+ elif have_windows_process_groups: # Windows with Python 2.7 or better
+ os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
+ else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
+ proc.send_signal(sig) # No way to contact group, just kill process
+
+### Utility functions
+
def attemptDelete(path):
try:
os.remove(path)
code = None
# Set up logging
- if logging:
- log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
- INVOKE_JSON: " ".join(command)}
- else:
- log = None
+ log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
+ INVOKE_JSON: " ".join(command)}
def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
- if not logging:
- return
if add is not None:
if key not in target:
target[key] = 0
target[key] = set
def log_test(testname, key, set=None, add=None): # Call to add test-case-level value to log
- if not logging:
- return
if TESTS_JSON not in log:
log[TESTS_JSON] = {}
if testname not in log[TESTS_JSON]:
# Run test suite
try:
- proc = subprocess.Popen(command, env=env)
+ proc = popen(command, env=env)
except OSError:
died_politely = True
sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
code = wait(proc, duration)
timed_out = code is None
if timed_out: # Initial timeout
- proc.send_signal(timeout_signal)
+ send_signal(proc, timeout_signal)
if kill_after is not None: # Kill-after timeout
code = wait(proc, kill_after)
if code is None:
- proc.send_signal(9)
+ send_signal(proc, signal.SIGKILL)
code = proc.wait() # Waits forever
sys.stderr.write("%s: Command `%s` timed out\n" % (scriptname, command[0]))
died_politely = True
bailout = True
code = 124 # See GNU timeout manpage
- if (code or crashed_at) and babysitting: # Process failures
+ if code or crashed_at: # Process failures
# Handle crash failures
if crashed_at and not timed_out:
log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
if failed_tests:
message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
- if bailout:
+ if not (timed_out or crashed_at or failed_tests):
+ message += " Test suite terminated with code %d, " % (code)
+ if log[SUPPORT_JSON]:
+ message += "but failure did not occur during a test case. Halting."
+ else:
+ message += "and suite cannot report test case data. Halting."
+ elif bailout:
message += " Will halt testing."
print(message)
- if bailout or not (babysitting and (resume_after or retry_next)): # If not retrying
+ if bailout or not (resume_after or retry_next): # If not retrying
return code
# If we got here, a retry is occurring
finally:
# Emergency: Ensure command does not outlive this script
if proc is not None and not died_politely:
- proc.send_signal(9)
+ send_signal(proc, signal.SIGKILL)
# Write out logs
log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)