#!/usr/bin/env python # Mimics GNU timeout, but does some fancy tracking based on custom features in mono nunit24. import argparse import subprocess import re import signal import time import sys import os.path import copy import tempfile import calendar import json ### Constants # Here is how the communication with nunit works. It has to work with two constraints: # - We don't invoke nunit directly. We invoke some Makefile which invokes some other Makefile # and at some point down the line someone calls nunit. # - nunit has to be able to report back to us even if (especially if) it terminates improperly. # To deal with all this, communication babysitter->nunit is done by environment variables, # and communication nunit->babysitter is done by leaving behind files in known locations. # Filenames CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt" RAN_TEST_FILE = "babysitter_report_ran_test_file.txt" FAILED_TEST_FILE = "babysitter_report_failed_test_file.txt" LOGGING_FILE = "babysitter_report.json_lines" # Environment keys # Keys used for Babysitter<->Nunit IPC CURRENT_TEST_KEY = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files RAN_TEST_KEY = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE' FAILED_TEST_KEY = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE' RUN_KEY = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names RUN_MODE_KEY = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or AFTER # Keys used for script configuration (see --help text) LOG_FILE_KEY = 'MONO_BABYSITTER_LOG_FILE' # Path RETRY_KEY = 'MONO_BABYSITTER_RETRY' # Equal to an integer VERBOSE_KEY = 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter # JSON keys DATE_JSON = 'date' # POSIX timestamp of test suite run INVOKE_JSON = 'invocation' COUNT_JSON = 'iteration' # How many times was command executed? LIMIT_JSON = 'failure_max' SUPPORT_JSON = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit? FINAL_CODE_JSON = 'final_code' TESTS_JSON = 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below) TEST_FAILURES = 'normal_failures' TEST_CRASH_FAILURES = 'crash_failures' TEST_TIMEOUT_FAILURES = 'timeout_failures' ### Interpret arguments scriptname = sys.argv[0] # This is very silly: This program needs to switch from argparse to manual parsing # after the second positional argument, but argparse doesn't let you do this. # I work around this with a custom subclass which can selectively swallow errors: class Hesitate(Exception): pass class HesitantParser(argparse.ArgumentParser): def __init__(s, *args, **kwargs): s.hesitating = True # Errors will be swallowed until this is set false argparse.ArgumentParser.__init__(s, *args, **kwargs) def error(s, *args, **kwargs): if s.hesitating: raise Hesitate() # Bail out before errors are printed. argparse.ArgumentParser.error(s, *args, **kwargs) # Define args argparser = HesitantParser(description="""\ Run a test suite with a timeout.\n Durations are floating point numbers followed by an optional unit:\n 's' for seconds (the default) 'm' for minutes 'h' for hours 'd' for days\n supported environment variables: %s: File to write logs to (as line-delimited JSON) %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)""" % (LOG_FILE_KEY, RETRY_KEY), formatter_class=argparse.RawTextHelpFormatter) argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM', help="Send this signal to the command on timeout, instead of TERM.") argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration', help="If process continues running after signal, send KILL after this duration.") argparser.add_argument('duration', help="Time to run before sending signal.") argparser.add_argument('command', nargs="+", help="Command+args to run.") # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds. argc = len(sys.argv) extra_args = [] for limit in range(1,argc+1): try: if limit == argc: # On the final pass, parse for real argparser.hesitating = False args = argparser.parse_args(sys.argv[1:limit]) # If we're still here, parse_args succeeded. # The final parsed arg is the command; remaining argv items are its args. extra_args = sys.argv[limit:] break except Hesitate: # Before the final pass, if parse_args fails, skip pass argparser.hesitating = False # Just act like a normal argparser from here durationre = re.compile(r'(\d+)([smhd]?)') def parse_duration(duration): # Accept units match = durationre.match(duration) if not match: argparser.error("Could not understand duration %s" % duration) time, units = match.group(1), match.group(2) time = int(time) if units == 'm': time *= 60 elif units == 'h': time *= 60*60 elif units == 'd': time *= 60*60*24 return time def parse_signal(sig): # Accept names if sig.isdigit(): return int(sig) for k,v in signal.__dict__.iteritems(): if k == ("SIG%s" % sig): return v argparser.error("Could not understand signal name %s" % sig) # Final interpretation of arguments duration = parse_duration(args.duration) kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None timeout_signal = parse_signal(args.signal) command = args.command + extra_args # Process environment global_env = copy.deepcopy( os.environ ) verbose = VERBOSE_KEY in global_env logging = LOG_FILE_KEY in global_env logfile = global_env[LOG_FILE_KEY] if logging else None crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option. failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything. if babysitting: babysitter_dir = tempfile.mkdtemp() global_env[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE) global_env[RAN_TEST_KEY] = os.path.join(babysitter_dir, RAN_TEST_FILE) global_env[FAILED_TEST_KEY] = os.path.join(babysitter_dir, FAILED_TEST_FILE) have_unix_process_groups = 'killpg' in os.__dict__ have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__ ### Timeout implementation def wait(proc, duration): # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use start = time.time() while True: code = proc.poll() if code is not None: return code if time.time()-start > duration: return None time.sleep(0.05) # Popen and send_signal can't be called in their basic forms because we want to # send signals to all children, not just to the immediately spawned process. # Unfortunately the way to do this varies by operating system. def popen(*args, **kwargs): if have_unix_process_groups: # Call function on spawn to become process group leader kwargs['preexec_fn'] = os.setsid elif have_windows_process_groups: # Set magic flag for Windows process groups kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP return subprocess.Popen(*args, **kwargs) def send_signal(proc, sig): if have_unix_process_groups: # UNIX # For compatibility with GNU timeout, pre-send the signal to just the monitored process os.kill(proc.pid, sig) # Send signal to entire group os.killpg(proc.pid, sig) # For compatibility with GNU Timeout, send a SIGCONT after the signal # (so delivery has a chance to occur even for stopped processes) if sig != signal.SIGKILL and sig != signal.SIGCONT: os.kill(proc.pid, signal.SIGCONT) elif have_windows_process_groups: # Windows with Python 2.7 or better os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported proc.send_signal(sig) # No way to contact group, just kill process ### Utility functions def attemptDelete(path): try: os.remove(path) except OSError: pass def attemptLines(path): try: with open(path) as f: return map(lambda s: s.strip('\r\n'), f.readlines()) except (OSError, IOError): return [] def attemptFirstLine(path): lines = attemptLines(path) if lines: return lines[0] return None def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does: return calendar.timegm(time.gmtime()) failcount = {} def failure_may_retry(test): if test not in failcount: failcount[test] = 0 failcount[test] += 1 return failcount[test] < failmax def verbose_print(arg): if (verbose): print(arg) def failure_annotate(test): return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax) def pluralize(lst): return "s" if len(lst) > 1 else "" ### Run command def run(): # Returns exit code resume_after = [] retry_these = [] ever_completed = False died_politely = False proc = None code = None # Set up logging log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False, INVOKE_JSON: " ".join(command)} def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log if add is not None: if key not in target: target[key] = 0 target[key] += add else: target[key] = set def log_test(testname, key, set=None, add=None): # Call to add test-case-level value to log if TESTS_JSON not in log: log[TESTS_JSON] = {} if testname not in log[TESTS_JSON]: log[TESTS_JSON][testname] = {} log_value(key, set=set, add=add, target=log[TESTS_JSON][testname]) # Ready to run tests try: while True: env = copy.copy(global_env) if ever_completed: retry_next = [] else: # Persist reported failures list until first non-crash run retry_next = retry_these log_value(COUNT_JSON, add=1) # Prepare environment/filesystem if babysitting: for key in [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY]: attemptDelete(env[key]) if resume_after: env[RUN_KEY] = ";".join(resume_after) env[RUN_MODE_KEY] = "EXCLUDE" elif retry_these: env[RUN_KEY] = ";".join(retry_these) env[RUN_MODE_KEY] = "RUN" # Run test suite try: proc = popen(command, env=env) except OSError: died_politely = True sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0])) sys.exit(127) code = wait(proc, duration) timed_out = code is None if timed_out: # Initial timeout send_signal(proc, timeout_signal) if kill_after is not None: # Kill-after timeout code = wait(proc, kill_after) if code is None: send_signal(proc, signal.SIGKILL) code = proc.wait() # Waits forever sys.stderr.write("%s: Command `%s` timed out\n" % (scriptname, command[0])) died_politely = True # The test suite has now run, and what happens next varies: # 1. The suite either completed fully without failures, or timed out: Just quit. # 2. The suite crashed (halted without completing): # Remember any failures for later and rerun, using a blacklist of testcases we have completed. # 3. The suite completed, but there were failures reported: # Rerun, using a whitelist of only reported-failed testcases. # 4. The suite crashed partway through a run with a whitelist: # Rerun, using a whitelist consisting of the previous whitelist minus successful testcases. crashed_at = attemptFirstLine(env[CURRENT_TEST_KEY]) failed_tests = attemptLines(env[FAILED_TEST_KEY]) ran_tests = attemptLines(env[RAN_TEST_KEY]) bailout = False if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol log_value(SUPPORT_JSON, True) if not crashed_at and not ever_completed: # The resume_after whitelist is only resume_after = [] # used before the first noncrashing run ever_completed = True if timed_out: # Currently no retries after timeout bailout = True code = 124 # See GNU timeout manpage if code or crashed_at: # Process failures # Handle crash failures if crashed_at and not timed_out: log_test(crashed_at, TEST_CRASH_FAILURES, add=1) if not crash_resuming: bailout = True if failure_may_retry(crashed_at): if ever_completed: # Rerun with whitelist next time for test in retry_these: # Prepopulate with last whitelist minus run testcases if test == crashed_at or test not in ran_tests: # (plus crashed testcase) retry_next.append(test) else: # Rerun with blacklist next time for test in ran_tests: # Add testcases we just ran to blacklist if test != crashed_at: # (except for the crashed testcase) resume_after.append(test) else: bailout = True # Handle reported failures for test in failed_tests: log_test(test, TEST_FAILURES, add=1) if failure_may_retry(test): retry_next.append(test) else: bailout = True # Report human-readable failures for stdout log. message = "%s:" % (scriptname) if timed_out: message += " Saw timeout in test case %s (never allowed)." % (crashed_at) elif crashed_at: message += " Saw crash in test case %s." % (failure_annotate(crashed_at)) if failed_tests: message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests))) if not (timed_out or crashed_at or failed_tests): message += " Test suite terminated with code %d, " % (code) if log[SUPPORT_JSON]: message += "but failure did not occur during a test case. Halting." else: message += "and suite cannot report test case data. Halting." elif bailout: message += " Will halt testing." print(message) if bailout or not (resume_after or retry_next): # If not retrying return code # If we got here, a retry is occurring retry_these = retry_next # Report human-readable retry notice for stdout log. message = "%s: Will rerun test suite" % (scriptname) if log[COUNT_JSON] > 1: message += " (iteration #%d)" % (log[COUNT_JSON]) if resume_after: message += ", resuming at crashed testcase %s." % (crashed_at) else: message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these)) print(message) finally: # Emergency: Ensure command does not outlive this script if proc is not None and not died_politely: send_signal(proc, signal.SIGKILL) # Write out logs log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code) if logging: with open(logfile, "a") as f: f.write(json.dumps(log) + os.linesep) sys.exit( run() )