3 # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.
5 ######################################### How this works ##########################################
7 # Because we have several different test harnesses and we don't invoke them directly, communication
8 # between this script and the harness is done through the simplest means possible (environment
9 # variables to communicate babysitter->harness, files in standard locations harness->babysitter).
11 # The script supports three different ways of extracting test data from the invoked test suite:
13 # 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
14 # "Ran test file": A path to a file where the harness should write a line-delimited list of
15 # tests which ran to completion.
16 # "Failed test file": A path to a file where the harness should write a line-delimited list
17 # of tests that failed.
18 # "Current test file": A path to a file where the harness should write the currently running
19 # test before a test begins, then delete afterward (used to detect early termination).
20 # "Run test": A list of test names, used by:
21 # "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
22 # those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
23 # This is the most featureful mode: It can report where we failed in the case of timeouts or
24 # crashes that take down the harness, and if the feature is enabled it can retry failed tests.
25 # However, it requires modification to the test harness.
27 # 2. NUnit XML: The babysitter also sets a sixth environment variable:
28 # "XML list file": A path to a file where the harness should write a line-delimited list of
29 # paths to NUnit-format XML result files it created.
30 # This also requires modification to the test harness, but less of it.
32 # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
33 # babysitter script happens to know where the harness writes its result XML files, the caller
34 # can specify those paths in the "Extra XML" environment variable (see --help)
36 # A single babysitter invocation can currently handle either the babysitter protocol or the XML,
37 # but never a mix of the two.
39 ###################################################################################################
52 from xml.dom.minidom import parse as xmlparse
58 CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
59 RAN_TEST_FILE = "babysitter_report_ran_test_file.txt"
60 FAILED_TEST_FILE = "babysitter_report_failed_test_file.txt"
61 XML_LIST_FILE = "babysitter_report_xml_list_file.txt"
62 LOGGING_FILE = "babysitter_report.json_lines"
66 # Keys used for Babysitter<->Nunit IPC
67 CURRENT_TEST_KEY = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
68 RAN_TEST_KEY = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
69 FAILED_TEST_KEY = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
70 XML_LIST_KEY = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
71 RUN_KEY = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
72 RUN_MODE_KEY = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE
74 # Keys used for script configuration (see --help text)
75 LOG_FILE_KEY = 'MONO_BABYSITTER_LOG_FILE' # Path
76 EXTRA_XML_KEY = 'MONO_BABYSITTER_EXTRA_XML' # Semicolon-separated list of paths
77 RETRY_KEY = 'MONO_BABYSITTER_RETRY' # Equal to an integer
78 VERBOSE_KEY = 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter
82 DATE_JSON = 'date' # POSIX timestamp of test suite run
83 INVOKE_JSON = 'invocation'
84 COUNT_JSON = 'iteration' # How many times was command executed?
85 LIMIT_JSON = 'failure_max'
86 SUPPORT_JSON = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
87 LOADED_XML_JSON = 'loaded_xml' # True if we loaded result XML from the test suite
88 FINAL_CODE_JSON = 'final_code'
89 TESTS_JSON = 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below)
90 TEST_FAILURES = 'normal_failures'
91 TEST_CRASH_FAILURES = 'crash_failures'
92 TEST_TIMEOUT_FAILURES = 'timeout_failures'
94 ### Interpret arguments
96 scriptname = sys.argv[0]
98 # This is very silly: This program needs to switch from argparse to manual parsing
99 # after the second positional argument, but argparse doesn't let you do this.
100 # I work around this with a custom subclass which can selectively swallow errors:
101 class Hesitate(Exception):
103 class HesitantParser(argparse.ArgumentParser):
104 def __init__(s, *args, **kwargs):
105 s.hesitating = True # Errors will be swallowed until this is set false
106 argparse.ArgumentParser.__init__(s, *args, **kwargs)
107 def error(s, *args, **kwargs):
109 raise Hesitate() # Bail out before errors are printed.
110 argparse.ArgumentParser.error(s, *args, **kwargs)
113 argparser = HesitantParser(description="""\
114 Run a test suite with a timeout.\n
115 Durations are floating point numbers followed by an optional unit:\n
116 's' for seconds (the default)
120 supported environment variables:
121 %s: File to write logs to (as line-delimited JSON)
122 %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
123 %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
124 (LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY),
125 formatter_class=argparse.RawTextHelpFormatter)
126 argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
127 help="Send this signal to the command on timeout, instead of TERM.")
128 argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
129 help="If process continues running after signal, send KILL after this duration.")
130 argparser.add_argument('duration',
131 help="Time to run before sending signal.")
132 argparser.add_argument('command', nargs="+", help="Command+args to run.")
134 # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
137 for limit in range(1,argc+1):
139 if limit == argc: # On the final pass, parse for real
140 argparser.hesitating = False
141 args = argparser.parse_args(sys.argv[1:limit])
142 # If we're still here, parse_args succeeded.
143 # The final parsed arg is the command; remaining argv items are its args.
144 extra_args = sys.argv[limit:]
146 except Hesitate: # Before the final pass, if parse_args fails, skip
149 argparser.hesitating = False # Just act like a normal argparser from here
151 durationre = re.compile(r'(\d+)([smhd]?)')
152 def parse_duration(duration): # Accept units
153 match = durationre.match(duration)
155 argparser.error("Could not understand duration %s" % duration)
156 time, units = match.group(1), match.group(2)
166 def parse_signal(sig): # Accept names
169 for k,v in signal.__dict__.items():
170 if k == ("SIG%s" % sig):
172 argparser.error("Could not understand signal name %s" % sig)
174 # Final interpretation of arguments
175 duration = parse_duration(args.duration)
176 kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
177 timeout_signal = parse_signal(args.signal)
178 command = args.command + extra_args
180 # Process environment
181 global_env = copy.deepcopy( os.environ )
183 verbose = VERBOSE_KEY in global_env
184 logging = LOG_FILE_KEY in global_env
185 logfile = global_env[LOG_FILE_KEY] if logging else None
186 xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else []
187 crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
188 failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
189 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
191 babysitter_dir = tempfile.mkdtemp()
192 global_env[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
193 global_env[RAN_TEST_KEY] = os.path.join(babysitter_dir, RAN_TEST_FILE)
194 global_env[FAILED_TEST_KEY] = os.path.join(babysitter_dir, FAILED_TEST_FILE)
195 global_env[XML_LIST_KEY] = os.path.join(babysitter_dir, XML_LIST_FILE)
197 have_unix_process_groups = 'killpg' in os.__dict__
198 have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__
200 ### Timeout implementation
202 def wait(proc, duration):
203 # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
209 if time.time()-start > duration:
213 # Popen and send_signal can't be called in their basic forms because we want to
214 # send signals to all children, not just to the immediately spawned process.
215 # Unfortunately the way to do this varies by operating system.
216 def popen(*args, **kwargs):
217 if have_unix_process_groups: # Call function on spawn to become process group leader
218 kwargs['preexec_fn'] = os.setsid
219 elif have_windows_process_groups: # Set magic flag for Windows process groups
220 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
221 return subprocess.Popen(*args, **kwargs)
223 def send_signal(proc, sig):
224 if have_unix_process_groups: # UNIX
225 # For compatibility with GNU timeout, pre-send the signal to just the monitored process
226 os.kill(proc.pid, sig)
227 # Send signal to entire group
228 os.killpg(proc.pid, sig)
229 # For compatibility with GNU Timeout, send a SIGCONT after the signal
230 # (so delivery has a chance to occur even for stopped processes)
231 if sig != signal.SIGKILL and sig != signal.SIGCONT:
232 os.kill(proc.pid, signal.SIGCONT)
233 elif have_windows_process_groups: # Windows with Python 2.7 or better
234 os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
235 else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
236 proc.send_signal(sig) # No way to contact group, just kill process
238 ### Utility functions
240 def attemptDelete(path):
246 def attemptLines(path):
248 with open(path) as f:
249 return map(lambda s: s.strip('\r\n'), f.readlines())
250 except (OSError, IOError):
253 def attemptFirstLine(path):
254 lines = attemptLines(path)
259 def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
260 return calendar.timegm(time.gmtime())
263 def failure_may_retry(test):
264 if test not in failcount:
267 return failcount[test] < failmax
269 def verbose_print(arg):
273 def failure_annotate(test):
274 return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)
277 return "s" if len(lst) > 1 else ""
281 def run(): # Returns exit code
284 ever_completed = False
285 died_politely = False
290 log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
291 LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)}
293 def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
295 if key not in target:
301 def log_test(testname, key, set=None, add=None): # Call to add test-case-level value to log
302 if TESTS_JSON not in log:
304 if testname not in log[TESTS_JSON]:
305 log[TESTS_JSON][testname] = {}
306 log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])
311 env = copy.copy(global_env)
314 else: # Persist reported failures list until first non-crash run
315 retry_next = retry_these
317 log_value(COUNT_JSON, add=1)
319 # Prepare environment/filesystem
321 for key in [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY]:
322 attemptDelete(env[key])
324 env[RUN_KEY] = ";".join(resume_after)
325 env[RUN_MODE_KEY] = "EXCLUDE"
327 env[RUN_KEY] = ";".join(retry_these)
328 env[RUN_MODE_KEY] = "RUN"
332 proc = popen(command, env=env)
335 sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
338 code = wait(proc, duration)
339 timed_out = code is None
340 if timed_out: # Initial timeout
341 send_signal(proc, timeout_signal)
342 if kill_after is not None: # Kill-after timeout
343 code = wait(proc, kill_after)
345 send_signal(proc, signal.SIGKILL)
346 code = proc.wait() # Waits forever
347 sys.stderr.write("%s: Command `%s` timed out\n" % (scriptname, command[0]))
350 # The test suite has now run, and what happens next varies:
351 # 1. The suite either completed fully without failures, or timed out: Just quit.
352 # 2. The suite crashed (halted without completing):
353 # Remember any failures for later and rerun, using a blacklist of testcases we have completed.
354 # 3. The suite completed, but there were failures reported:
355 # Rerun, using a whitelist of only reported-failed testcases.
356 # 4. The suite crashed partway through a run with a whitelist:
357 # Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.
359 crashed_at = attemptFirstLine(env[CURRENT_TEST_KEY])
360 failed_tests = attemptLines(env[FAILED_TEST_KEY])
361 ran_tests = attemptLines(env[RAN_TEST_KEY])
362 wrote_xml = attemptLines(env[XML_LIST_KEY])
365 if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
366 log_value(SUPPORT_JSON, True)
368 if not crashed_at and not ever_completed: # The resume_after whitelist is only
369 resume_after = [] # used before the first noncrashing run
370 ever_completed = True
372 if timed_out: # Currently no retries after timeout
374 code = 124 # See GNU timeout manpage
376 if code or crashed_at: # Process failures
377 # Handle crash failures
378 if crashed_at and not timed_out:
379 log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
380 if not crash_resuming:
383 if failure_may_retry(crashed_at):
384 if ever_completed: # Rerun with whitelist next time
385 for test in retry_these: # Prepopulate with last whitelist minus run testcases
386 if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
387 retry_next.append(test)
388 else: # Rerun with blacklist next time
389 for test in ran_tests: # Add testcases we just ran to blacklist
390 if test != crashed_at: # (except for the crashed testcase)
391 resume_after.append(test)
395 # Handle reported failures
396 for test in failed_tests:
397 log_test(test, TEST_FAILURES, add=1)
398 if failure_may_retry(test):
399 retry_next.append(test)
403 # Report human-readable failures for stdout log.
404 message = "%s:" % (scriptname)
406 message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
408 message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
410 message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
411 if not (timed_out or crashed_at or failed_tests):
412 message += " Test suite terminated with code %d, " % (code)
413 if log[SUPPORT_JSON]:
414 message += "but failure did not occur during a test case. Halting."
415 elif xml_list or wrote_xml:
416 message += "will extract test results from XML. Halting."
418 message += "and suite cannot report test case data. Halting."
420 message += " Will halt testing."
423 if not log[SUPPORT_JSON]:
424 for xml in (xml_list + list(wrote_xml)):
425 verbose_print("Will attempt to load XML from %s" % (xml))
427 data = xmlparse(xml).documentElement
428 if data.nodeName != 'test-results':
429 raise ValueError("Toplevel element was not <test-results />")
431 log_value(LOADED_XML_JSON, True)
437 for child in node.childNodes:
438 if child.nodeName == 'test-suite' or child.nodeName == 'results':
439 nextsearch.append(child) # Descend
440 elif child.nodeName == 'test-case':
441 name = child.getAttribute("name")
442 if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True":
443 log_test(name, TEST_FAILURES, add=1)
446 except Exception as e:
447 print("Could not load XML file %s. Reason: %s" % (xml, e))
450 if bailout or not (resume_after or retry_next): # If not retrying
453 # If we got here, a retry is occurring
454 retry_these = retry_next
456 # Report human-readable retry notice for stdout log.
457 message = "%s: Will rerun test suite" % (scriptname)
458 if log[COUNT_JSON] > 1:
459 message += " (iteration #%d)" % (log[COUNT_JSON])
461 message += ", resuming at crashed testcase %s." % (crashed_at)
463 message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
466 # Emergency: Ensure command does not outlive this script
467 if proc is not None and not died_politely:
468 send_signal(proc, signal.SIGKILL)
471 log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
473 with open(logfile, "a") as f:
474 f.write(json.dumps(log) + os.linesep)