#!/usr/bin/env python # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests. ######################################### How this works ########################################## # # Because we have several different test harnesses and we don't invoke them directly, communication # between this script and the harness is done through the simplest means possible (environment # variables to communicate babysitter->harness, files in standard locations harness->babysitter). # # The script supports three different ways of extracting test data from the invoked test suite: # # 1. "The babysitter protocol": The babysitter sets five environment variables (see below): # "Ran test file": A path to a file where the harness should write a line-delimited list of # tests which ran to completion. # "Failed test file": A path to a file where the harness should write a line-delimited list # of tests that failed. # "Current test file": A path to a file where the harness should write the currently running # test before a test begins, then delete afterward (used to detect early termination). # "Run test": A list of test names, used by: # "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only # those tests. If EXCLUDE, the list is a blacklist; run all except those tests. # This is the most featureful mode: It can report where we failed in the case of timeouts or # crashes that take down the harness, and if the feature is enabled it can retry failed tests. # However, it requires modification to the test harness. # # 2. NUnit XML: The babysitter also sets a sixth environment variable: # "XML list file": A path to a file where the harness should write a line-delimited list of # paths to NUnit-format XML result files it created. # This also requires modification to the test harness, but less of it. # # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the # babysitter script happens to know where the harness writes its result XML files, the caller # can specify those paths in the "Extra XML" environment variable (see --help) # # A single babysitter invocation can currently handle either the babysitter protocol or the XML, # but never a mix of the two. # ################################################################################################### import argparse import subprocess import re import signal import time import sys import os.path import copy import tempfile import calendar import json import platform from xml.dom.minidom import parse as xmlparse ### Constants # Filenames CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt" RAN_TEST_FILE = "babysitter_report_ran_test_file.txt" FAILED_TEST_FILE = "babysitter_report_failed_test_file.txt" XML_LIST_FILE = "babysitter_report_xml_list_file.txt" LOGGING_FILE = "babysitter_report.json_lines" # Environment keys # Keys used for Babysitter<->Nunit IPC CURRENT_TEST_KEY = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files RAN_TEST_KEY = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE' FAILED_TEST_KEY = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE' XML_LIST_KEY = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE' RUN_KEY = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names RUN_MODE_KEY = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE # Keys used for script configuration (see --help text) LOG_FILE_KEY = 'MONO_BABYSITTER_LOG_FILE' # Path EXTRA_XML_KEY = 'MONO_BABYSITTER_EXTRA_XML' # Semicolon-separated list of paths RETRY_KEY = 'MONO_BABYSITTER_RETRY' # Equal to an integer VERBOSE_KEY = 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter # JSON keys DATE_JSON = 'date' # POSIX timestamp of test suite run INVOKE_JSON = 'invocation' COUNT_JSON = 'iteration' # How many times was command executed? LIMIT_JSON = 'failure_max' SUPPORT_JSON = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit? LOADED_XML_JSON = 'loaded_xml' # True if we loaded result XML from the test suite FINAL_CODE_JSON = 'final_code' TESTS_JSON = 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below) TEST_FAILURES = 'normal_failures' TEST_CRASH_FAILURES = 'crash_failures' TEST_TIMEOUT_FAILURES = 'timeout_failures' ### Interpret arguments scriptname = sys.argv[0] # This is very silly: This program needs to switch from argparse to manual parsing # after the second positional argument, but argparse doesn't let you do this. # I work around this with a custom subclass which can selectively swallow errors: class Hesitate(Exception): pass class HesitantParser(argparse.ArgumentParser): def __init__(s, *args, **kwargs): s.hesitating = True # Errors will be swallowed until this is set false argparse.ArgumentParser.__init__(s, *args, **kwargs) def error(s, *args, **kwargs): if s.hesitating: raise Hesitate() # Bail out before errors are printed. argparse.ArgumentParser.error(s, *args, **kwargs) # Define args argparser = HesitantParser(description="""\ Run a test suite with a timeout.\n Durations are floating point numbers followed by an optional unit:\n 's' for seconds (the default) 'm' for minutes 'h' for hours 'd' for days\n supported environment variables: %s: File to write logs to (as line-delimited JSON) %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only) %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" % (LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY), formatter_class=argparse.RawTextHelpFormatter) argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM', help="Send this signal to the command on timeout, instead of TERM.") argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration', help="If process continues running after signal, send KILL after this duration.") argparser.add_argument('duration', help="Time to run before sending signal.") argparser.add_argument('command', nargs="+", help="Command+args to run.") # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds. argc = len(sys.argv) extra_args = [] for limit in range(1,argc+1): try: if limit == argc: # On the final pass, parse for real argparser.hesitating = False args = argparser.parse_args(sys.argv[1:limit]) # If we're still here, parse_args succeeded. # The final parsed arg is the command; remaining argv items are its args. extra_args = sys.argv[limit:] break except Hesitate: # Before the final pass, if parse_args fails, skip pass argparser.hesitating = False # Just act like a normal argparser from here durationre = re.compile(r'(\d+)([smhd]?)') def parse_duration(duration): # Accept units match = durationre.match(duration) if not match: argparser.error("Could not understand duration %s" % duration) time, units = match.group(1), match.group(2) time = int(time) if units == 'm': time *= 60 elif units == 'h': time *= 60*60 elif units == 'd': time *= 60*60*24 return time def parse_signal(sig): # Accept names if sig.isdigit(): return int(sig) for k,v in signal.__dict__.items(): if k == ("SIG%s" % sig): return v argparser.error("Could not understand signal name %s" % sig) # Final interpretation of arguments duration = parse_duration(args.duration) kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None timeout_signal = parse_signal(args.signal) command = args.command + extra_args # If we are running in Cygwin, Python will believe it is a UNIX application but Mono will be Windows. cygwin = platform.system().startswith("CYGWIN") def outgoingPath(path): if cygwin: # Invoke cygpath and strip newline return subprocess.check_output(["cygpath", "-w", path])[:-1] return path def incomingPath(path): if cygwin: return subprocess.check_output(["cygpath", path])[:-1] return path # Some of the things we put in global_env are paths. If we're in cygwin, we have to keep separate # local-use and env (mono use) copies of these keys. env_source = {} # Process environment global_env = copy.deepcopy( os.environ ) verbose = VERBOSE_KEY in global_env logging = LOG_FILE_KEY in global_env logfile = global_env[LOG_FILE_KEY] if logging else None xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else [] crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option. failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything. if babysitting: babysitter_dir = tempfile.mkdtemp() env_source[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE) env_source[RAN_TEST_KEY] = os.path.join(babysitter_dir, RAN_TEST_FILE) env_source[FAILED_TEST_KEY] = os.path.join(babysitter_dir, FAILED_TEST_FILE) env_source[XML_LIST_KEY] = os.path.join(babysitter_dir, XML_LIST_FILE) env_source_keys = [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY] for key in env_source_keys: global_env[key] = outgoingPath(env_source[key]) have_unix_process_groups = 'killpg' in os.__dict__ have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__ ### Timeout implementation def wait(proc, duration): # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use start = time.time() while True: code = proc.poll() if code is not None: return code if time.time()-start > duration: return None time.sleep(0.05) # Popen and send_signal can't be called in their basic forms because we want to # send signals to all children, not just to the immediately spawned process. # Unfortunately the way to do this varies by operating system. def popen(*args, **kwargs): if have_unix_process_groups: # Call function on spawn to become process group leader kwargs['preexec_fn'] = os.setsid elif have_windows_process_groups: # Set magic flag for Windows process groups kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP return subprocess.Popen(*args, **kwargs) def send_signal(proc, sig): if have_unix_process_groups: # UNIX # For compatibility with GNU timeout, pre-send the signal to just the monitored process os.kill(proc.pid, sig) # Send signal to entire group try: os.killpg(proc.pid, sig) except OSError as e: sys.stderr.write("%s: Warning, could not kill process group %s because %s\n" % (scriptname, proc.pid, e)) # For compatibility with GNU Timeout, send a SIGCONT after the signal # (so delivery has a chance to occur even for stopped processes) if sig != signal.SIGKILL and sig != signal.SIGCONT: os.kill(proc.pid, signal.SIGCONT) elif have_windows_process_groups: # Windows with Python 2.7 or better os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported proc.send_signal(sig) # No way to contact group, just kill process ### Utility functions def attemptDelete(path): try: os.remove(path) except OSError: pass def attemptLines(path): try: with open(path) as f: return map(lambda s: s.strip('\r\n'), f.readlines()) except (OSError, IOError): return [] def attemptFirstLine(path): lines = attemptLines(path) if lines: return lines[0] return None def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does: return calendar.timegm(time.gmtime()) failcount = {} def failure_may_retry(test): if test not in failcount: failcount[test] = 0 failcount[test] += 1 return failcount[test] < failmax def verbose_print(arg): if (verbose): print(arg) def failure_annotate(test): return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax) def pluralize(lst): return "s" if len(lst) > 1 else "" ### Run command def run(): # Returns exit code resume_after = [] retry_these = [] ever_completed = False died_politely = False proc = None code = None # Set up logging log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False, LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)} def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log if add is not None: if key not in target: target[key] = 0 target[key] += add else: target[key] = set def log_test(testname, key, set=None, add=None): # Call to add test-case-level value to log if TESTS_JSON not in log: log[TESTS_JSON] = {} if testname not in log[TESTS_JSON]: log[TESTS_JSON][testname] = {} log_value(key, set=set, add=add, target=log[TESTS_JSON][testname]) # Ready to run tests try: while True: env = copy.copy(global_env) if ever_completed: retry_next = [] else: # Persist reported failures list until first non-crash run retry_next = retry_these log_value(COUNT_JSON, add=1) # Prepare environment/filesystem if babysitting: for key in env_source_keys: # Clear all paths intended for use by mono attemptDelete(env_source[key]) if resume_after: env[RUN_KEY] = ";".join(resume_after) env[RUN_MODE_KEY] = "EXCLUDE" elif retry_these: env[RUN_KEY] = ";".join(retry_these) env[RUN_MODE_KEY] = "RUN" # Run test suite try: proc = popen(command, env=env) except OSError: died_politely = True sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0])) sys.exit(127) code = wait(proc, duration) timed_out = code is None if timed_out: # Initial timeout send_signal(proc, timeout_signal) if kill_after is not None: # Kill-after timeout code = wait(proc, kill_after) if code is None: send_signal(proc, signal.SIGKILL) code = proc.wait() # Waits forever sys.stderr.write("%s: Command `%s` timed out\n" % (scriptname, command[0])) died_politely = True # The test suite has now run, and what happens next varies: # 1. The suite either completed fully without failures, or timed out: Just quit. # 2. The suite crashed (halted without completing): # Remember any failures for later and rerun, using a blacklist of testcases we have completed. # 3. The suite completed, but there were failures reported: # Rerun, using a whitelist of only reported-failed testcases. # 4. The suite crashed partway through a run with a whitelist: # Rerun, using a whitelist consisting of the previous whitelist minus successful testcases. crashed_at = attemptFirstLine(env_source[CURRENT_TEST_KEY]) failed_tests = attemptLines(env_source[FAILED_TEST_KEY]) ran_tests = attemptLines(env_source[RAN_TEST_KEY]) wrote_xml = attemptLines(env_source[XML_LIST_KEY]) bailout = False if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol log_value(SUPPORT_JSON, True) if not crashed_at and not ever_completed: # The resume_after whitelist is only resume_after = [] # used before the first noncrashing run ever_completed = True if timed_out: # Currently no retries after timeout bailout = True code = 124 # See GNU timeout manpage if code or crashed_at: # Process failures # Handle crash failures if crashed_at and not timed_out: log_test(crashed_at, TEST_CRASH_FAILURES, add=1) if not crash_resuming: bailout = True if failure_may_retry(crashed_at): if ever_completed: # Rerun with whitelist next time for test in retry_these: # Prepopulate with last whitelist minus run testcases if test == crashed_at or test not in ran_tests: # (plus crashed testcase) retry_next.append(test) else: # Rerun with blacklist next time for test in ran_tests: # Add testcases we just ran to blacklist if test != crashed_at: # (except for the crashed testcase) resume_after.append(test) else: bailout = True # Handle reported failures for test in failed_tests: log_test(test, TEST_FAILURES, add=1) if failure_may_retry(test): retry_next.append(test) else: bailout = True # Report human-readable failures for stdout log. message = "%s:" % (scriptname) if timed_out: message += " Saw timeout in test case %s (never allowed)." % (crashed_at) elif crashed_at: message += " Saw crash in test case %s." % (failure_annotate(crashed_at)) if failed_tests: message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests))) if not (timed_out or crashed_at or failed_tests): message += " Test suite terminated with code %d, " % (code) if log[SUPPORT_JSON]: message += "but failure did not occur during a test case. Halting." elif xml_list or wrote_xml: message += "will extract test results from XML. Halting." else: message += "and suite cannot report test case data. Halting." elif bailout: message += " Will halt testing." print(message) if not log[SUPPORT_JSON]: for xml in (xml_list + [incomingPath(xml) for xml in wrote_xml]): verbose_print("Will attempt to load XML from %s" % (xml)) try: data = xmlparse(xml).documentElement if data.nodeName != 'test-results': raise ValueError("Toplevel element was not ") log_value(LOADED_XML_JSON, True) search = [data] while search: nextsearch = [] for node in search: for child in node.childNodes: if child.nodeName == 'test-suite' or child.nodeName == 'results': nextsearch.append(child) # Descend elif child.nodeName == 'test-case': name = child.getAttribute("name") if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True": log_test(name, TEST_FAILURES, add=1) search = nextsearch except Exception as e: print("Could not load XML file %s. Reason: %s" % (xml, e)) data if bailout or not (resume_after or retry_next): # If not retrying return code # If we got here, a retry is occurring retry_these = retry_next # Report human-readable retry notice for stdout log. message = "%s: Will rerun test suite" % (scriptname) if log[COUNT_JSON] > 1: message += " (iteration #%d)" % (log[COUNT_JSON]) if resume_after: message += ", resuming at crashed testcase %s." % (crashed_at) else: message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these)) print(message) finally: # Emergency: Ensure command does not outlive this script if proc is not None and not died_politely: send_signal(proc, signal.SIGKILL) # Write out logs log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code) if logging: with open(logfile, "a") as f: f.write(json.dumps(log) + os.linesep) sys.exit( run() )