scripts/ci/babysitter

   1 #!/usr/bin/env python
   2
   3 # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.
   4
   5 ######################################### How this works ##########################################
   6 #
   7 # Because we have several different test harnesses and we don't invoke them directly, communication
   8 # between this script and the harness is done through the simplest means possible (environment
   9 # variables to communicate babysitter->harness, files in standard locations harness->babysitter).
  10 #
  11 # The script supports three different ways of extracting test data from the invoked test suite:
  12 #
  13 # 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
  14 #        "Ran test file": A path to a file where the harness should write a line-delimited list of
  15 #            tests which ran to completion.
  16 #        "Failed test file": A path to a file where the harness should write a line-delimited list
  17 #            of tests that failed.
  18 #        "Current test file": A path to a file where the harness should write the currently running
  19 #            test before a test begins, then delete afterward (used to detect early termination).
  20 #        "Run test": A list of test names, used by:
  21 #        "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
  22 #            those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
  23 #    This is the most featureful mode: It can report where we failed in the case of timeouts or
  24 #    crashes that take down the harness, and if the feature is enabled it can retry failed tests.
  25 #    However, it requires modification to the test harness.
  26 #
  27 # 2. NUnit XML: The babysitter also sets a sixth environment variable:
  28 #        "XML list file": A path to a file where the harness should write a line-delimited list of
  29 #        paths to NUnit-format XML result files it created.
  30 #    This also requires modification to the test harness, but less of it.
  31 #
  32 # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
  33 #    babysitter script happens to know where the harness writes its result XML files, the caller
  34 #    can specify those paths in the "Extra XML" environment variable (see --help)
  35 #
  36 # A single babysitter invocation can currently handle either the babysitter protocol or the XML,
  37 # but never a mix of the two.
  38 #
  39 ###################################################################################################
  40
  41 import argparse
  42 import subprocess
  43 import re
  44 import signal
  45 import time
  46 import sys
  47 import os.path
  48 import copy
  49 import tempfile
  50 import calendar
  51 import json
  52 from xml.dom.minidom import parse as xmlparse
  53
  54 ### Constants
  55
  56 # Filenames
  57
  58 CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
  59 RAN_TEST_FILE     = "babysitter_report_ran_test_file.txt"
  60 FAILED_TEST_FILE  = "babysitter_report_failed_test_file.txt"
  61 XML_LIST_FILE     = "babysitter_report_xml_list_file.txt"
  62 LOGGING_FILE      = "babysitter_report.json_lines"
  63
  64 # Environment keys
  65
  66 # Keys used for Babysitter<->Nunit IPC
  67 CURRENT_TEST_KEY  = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
  68 RAN_TEST_KEY      = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
  69 FAILED_TEST_KEY   = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
  70 XML_LIST_KEY      = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
  71 RUN_KEY           = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
  72 RUN_MODE_KEY      = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE
  73
  74 # Keys used for script configuration (see --help text)
  75 LOG_FILE_KEY      = 'MONO_BABYSITTER_LOG_FILE'       # Path
  76 EXTRA_XML_KEY     = 'MONO_BABYSITTER_EXTRA_XML'      # Semicolon-separated list of paths
  77 RETRY_KEY         = 'MONO_BABYSITTER_RETRY'          # Equal to an integer
  78 VERBOSE_KEY       = 'MONO_BABYSITTER_VERBOSE'        # "Undocumented"-- used for debugging babysitter
  79
  80 # JSON keys
  81
  82 DATE_JSON       = 'date'          # POSIX timestamp of test suite run
  83 INVOKE_JSON     = 'invocation'
  84 COUNT_JSON      = 'iteration'        # How many times was command executed?
  85 LIMIT_JSON      = 'failure_max'
  86 SUPPORT_JSON    = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
  87 LOADED_XML_JSON = 'loaded_xml'          # True if we loaded result XML from the test suite
  88 FINAL_CODE_JSON = 'final_code'
  89 TESTS_JSON      = 'tests'         # Holds dictionary of (test case name)->(dict with TEST_ keys below)
  90 TEST_FAILURES         = 'normal_failures'
  91 TEST_CRASH_FAILURES   = 'crash_failures'
  92 TEST_TIMEOUT_FAILURES = 'timeout_failures'
  93
  94 ### Interpret arguments
  95
  96 scriptname = sys.argv[0]
  97
  98 # This is very silly: This program needs to switch from argparse to manual parsing
  99 # after the second positional argument, but argparse doesn't let you do this.
 100 # I work around this with a custom subclass which can selectively swallow errors:
 101 class Hesitate(Exception):
 102         pass
 103 class HesitantParser(argparse.ArgumentParser):
 104         def __init__(s, *args, **kwargs):
 105                 s.hesitating = True  # Errors will be swallowed until this is set false
 106                 argparse.ArgumentParser.__init__(s, *args, **kwargs)
 107         def error(s, *args, **kwargs):
 108                 if s.hesitating:
 109                         raise Hesitate() # Bail out before errors are printed.
 110                 argparse.ArgumentParser.error(s, *args, **kwargs)
 111
 112 # Define args
 113 argparser = HesitantParser(description="""\
 114 Run a test suite with a timeout.\n
 115 Durations are floating point numbers followed by an optional unit:\n
 116 's' for seconds (the default)
 117 'm' for minutes
 118 'h' for hours
 119 'd' for days\n
 120 supported environment variables:
 121   %s: File to write logs to (as line-delimited JSON)
 122   %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
 123   %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
 124                 (LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY),
 125         formatter_class=argparse.RawTextHelpFormatter)
 126 argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
 127         help="Send this signal to the command on timeout, instead of TERM.")
 128 argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
 129         help="If process continues running after signal, send KILL after this duration.")
 130 argparser.add_argument('duration',
 131         help="Time to run before sending signal.")
 132 argparser.add_argument('command', nargs="+", help="Command+args to run.")
 133
 134 # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
 135 argc = len(sys.argv)
 136 extra_args = []
 137 for limit in range(1,argc+1):
 138         try:
 139                 if limit == argc: # On the final pass, parse for real
 140                         argparser.hesitating = False
 141                 args = argparser.parse_args(sys.argv[1:limit])
 142                 # If we're still here, parse_args succeeded.
 143                 # The final parsed arg is the command; remaining argv items are its args.
 144                 extra_args = sys.argv[limit:]
 145                 break
 146         except Hesitate: # Before the final pass, if parse_args fails, skip
 147                 pass
 148
 149 argparser.hesitating = False # Just act like a normal argparser from here
 150
 151 durationre = re.compile(r'(\d+)([smhd]?)')
 152 def parse_duration(duration): # Accept units
 153         match = durationre.match(duration)
 154         if not match:
 155                 argparser.error("Could not understand duration %s" % duration)
 156         time, units = match.group(1), match.group(2)
 157         time = int(time)
 158         if units == 'm':
 159                 time *= 60
 160         elif units == 'h':
 161                 time *= 60*60
 162         elif units == 'd':
 163                 time *= 60*60*24
 164         return time
 165
 166 def parse_signal(sig):        # Accept names
 167         if sig.isdigit():
 168                 return int(sig)
 169         for k,v in signal.__dict__.items():
 170                 if k == ("SIG%s" % sig):
 171                         return v
 172         argparser.error("Could not understand signal name %s" % sig)
 173
 174 # Final interpretation of arguments
 175 duration = parse_duration(args.duration)
 176 kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
 177 timeout_signal = parse_signal(args.signal)
 178 command = args.command + extra_args
 179
 180 # Process environment
 181 global_env = copy.deepcopy( os.environ )
 182
 183 verbose = VERBOSE_KEY in global_env
 184 logging = LOG_FILE_KEY in global_env
 185 logfile = global_env[LOG_FILE_KEY] if logging else None
 186 xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else []
 187 crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
 188 failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
 189 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
 190 if babysitting:
 191         babysitter_dir = tempfile.mkdtemp()
 192         global_env[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
 193         global_env[RAN_TEST_KEY]     = os.path.join(babysitter_dir, RAN_TEST_FILE)
 194         global_env[FAILED_TEST_KEY]  = os.path.join(babysitter_dir, FAILED_TEST_FILE)
 195         global_env[XML_LIST_KEY]     = os.path.join(babysitter_dir, XML_LIST_FILE)
 196
 197 have_unix_process_groups = 'killpg' in os.__dict__
 198 have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__
 199
 200 ### Timeout implementation
 201
 202 def wait(proc, duration):
 203         # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
 204         start = time.time()
 205         while True:
 206                 code = proc.poll()
 207                 if code is not None:
 208                         return code
 209                 if time.time()-start > duration:
 210                         return None
 211                 time.sleep(0.05)
 212
 213 # Popen and send_signal can't be called in their basic forms because we want to
 214 # send signals to all children, not just to the immediately spawned process.
 215 # Unfortunately the way to do this varies by operating system.
 216 def popen(*args, **kwargs):
 217         if have_unix_process_groups: # Call function on spawn to become process group leader
 218                 kwargs['preexec_fn'] = os.setsid
 219         elif have_windows_process_groups: # Set magic flag for Windows process groups
 220                 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
 221         return subprocess.Popen(*args, **kwargs)
 222
 223 def send_signal(proc, sig):
 224         if have_unix_process_groups: # UNIX
 225                 # For compatibility with GNU timeout, pre-send the signal to just the monitored process
 226                 os.kill(proc.pid, sig)
 227                 # Send signal to entire group
 228                 os.killpg(proc.pid, sig)
 229                 # For compatibility with GNU Timeout, send a SIGCONT after the signal
 230                 # (so delivery has a chance to occur even for stopped processes)
 231                 if sig != signal.SIGKILL and sig != signal.SIGCONT:
 232                         os.kill(proc.pid, signal.SIGCONT)
 233         elif have_windows_process_groups: # Windows with Python 2.7 or better
 234                 os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
 235         else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
 236                 proc.send_signal(sig) # No way to contact group, just kill process
 237
 238 ### Utility functions
 239
 240 def attemptDelete(path):
 241         try:
 242                 os.remove(path)
 243         except OSError:
 244                 pass
 245
 246 def attemptLines(path):
 247         try:
 248                 with open(path) as f:
 249                         return map(lambda s: s.strip('\r\n'), f.readlines())
 250         except (OSError, IOError):
 251                 return []
 252
 253 def attemptFirstLine(path):
 254         lines = attemptLines(path)
 255         if lines:
 256                 return lines[0]
 257         return None
 258
 259 def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
 260         return calendar.timegm(time.gmtime())
 261
 262 failcount = {}
 263 def failure_may_retry(test):
 264         if test not in failcount:
 265                 failcount[test] = 0
 266         failcount[test] += 1
 267         return failcount[test] < failmax
 268
 269 def verbose_print(arg):
 270         if (verbose):
 271                 print(arg)
 272
 273 def failure_annotate(test):
 274         return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)
 275
 276 def pluralize(lst):
 277         return "s" if len(lst) > 1 else ""
 278
 279 ### Run command
 280
 281 def run(): # Returns exit code
 282         resume_after = []
 283         retry_these = []
 284         ever_completed = False
 285         died_politely = False
 286         proc = None
 287         code = None
 288
 289         # Set up logging
 290         log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
 291                 LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)}
 292
 293         def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
 294                 if add is not None:
 295                         if key not in target:
 296                                 target[key] = 0
 297                         target[key] += add
 298                 else:
 299                         target[key] = set
 300
 301         def log_test(testname, key, set=None, add=None):   # Call to add test-case-level value to log
 302                 if TESTS_JSON not in log:
 303                         log[TESTS_JSON] = {}
 304                 if testname not in log[TESTS_JSON]:
 305                         log[TESTS_JSON][testname] = {}
 306                 log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])
 307
 308         # Ready to run tests
 309         try:
 310                 while True:
 311                         env = copy.copy(global_env)
 312                         if ever_completed:
 313                                 retry_next = []
 314                         else: # Persist reported failures list until first non-crash run
 315                                 retry_next = retry_these
 316
 317                         log_value(COUNT_JSON, add=1)
 318
 319                         # Prepare environment/filesystem
 320                         if babysitting:
 321                                 for key in [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY]:
 322                                         attemptDelete(env[key])
 323                                 if resume_after:
 324                                         env[RUN_KEY] = ";".join(resume_after)
 325                                         env[RUN_MODE_KEY] = "EXCLUDE"
 326                                 elif retry_these:
 327                                         env[RUN_KEY] = ";".join(retry_these)
 328                                         env[RUN_MODE_KEY] = "RUN"
 329
 330                         # Run test suite
 331                         try:
 332                                 proc = popen(command, env=env)
 333                         except OSError:
 334                                 died_politely = True
 335                                 sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
 336                                 sys.exit(127)
 337
 338                         code = wait(proc, duration)
 339                         timed_out = code is None
 340                         if timed_out:                  # Initial timeout
 341                                 send_signal(proc, timeout_signal)
 342                                 if kill_after is not None: # Kill-after timeout
 343                                         code = wait(proc, kill_after)
 344                                         if code is None:
 345                                                 send_signal(proc, signal.SIGKILL)
 346                                 code = proc.wait()         # Waits forever
 347                                 sys.stderr.write("%s: Command `%s` timed out\n" % (scriptname, command[0]))
 348                         died_politely = True
 349
 350                         # The test suite has now run, and what happens next varies:
 351                         # 1. The suite either completed fully without failures, or timed out: Just quit.
 352                         # 2. The suite crashed (halted without completing):
 353                         #   Remember any failures for later and rerun, using a blacklist of testcases we have completed.
 354                         # 3. The suite completed, but there were failures reported:
 355                         #   Rerun, using a whitelist of only reported-failed testcases.
 356                         # 4. The suite crashed partway through a run with a whitelist:
 357                         #   Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.
 358
 359                         crashed_at = attemptFirstLine(env[CURRENT_TEST_KEY])
 360                         failed_tests = attemptLines(env[FAILED_TEST_KEY])
 361                         ran_tests = attemptLines(env[RAN_TEST_KEY])
 362                         wrote_xml = attemptLines(env[XML_LIST_KEY])
 363                         bailout = False
 364
 365                         if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
 366                                 log_value(SUPPORT_JSON, True)
 367
 368                         if not crashed_at and not ever_completed:  # The resume_after whitelist is only
 369                                 resume_after = []                      # used before the first noncrashing run
 370                                 ever_completed = True
 371
 372                         if timed_out:       # Currently no retries after timeout
 373                                 bailout = True
 374                                 code = 124      # See GNU timeout manpage
 375
 376                         if code or crashed_at: # Process failures
 377                                 # Handle crash failures
 378                                 if crashed_at and not timed_out:
 379                                         log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
 380                                         if not crash_resuming:
 381                                                 bailout = True
 382
 383                                         if failure_may_retry(crashed_at):
 384                                                 if ever_completed:           # Rerun with whitelist next time
 385                                                         for test in retry_these: # Prepopulate with last whitelist minus run testcases
 386                                                                 if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
 387                                                                         retry_next.append(test)
 388                                                 else:                            # Rerun with blacklist next time
 389                                                         for test in ran_tests:       # Add testcases we just ran to blacklist
 390                                                                 if test != crashed_at:   # (except for the crashed testcase)
 391                                                                         resume_after.append(test)
 392                                         else:
 393                                                 bailout = True
 394
 395                                 # Handle reported failures
 396                                 for test in failed_tests:
 397                                         log_test(test, TEST_FAILURES, add=1)
 398                                         if failure_may_retry(test):
 399                                                 retry_next.append(test)
 400                                         else:
 401                                                 bailout = True
 402
 403                                 # Report human-readable failures for stdout log.
 404                                 message = "%s:" % (scriptname)
 405                                 if timed_out:
 406                                         message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
 407                                 elif crashed_at:
 408                                         message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
 409                                 if failed_tests:
 410                                         message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
 411                                 if not (timed_out or crashed_at or failed_tests):
 412                                         message += " Test suite terminated with code %d, " % (code)
 413                                         if log[SUPPORT_JSON]:
 414                                                 message += "but failure did not occur during a test case. Halting."
 415                                         elif xml_list or wrote_xml:
 416                                                 message += "will extract test results from XML. Halting."
 417                                         else:
 418                                                 message += "and suite cannot report test case data. Halting."
 419                                 elif bailout:
 420                                         message += " Will halt testing."
 421                                 print(message)
 422
 423                         if not log[SUPPORT_JSON]:
 424                                 for xml in (xml_list + list(wrote_xml)):
 425                                         verbose_print("Will attempt to load XML from %s" % (xml))
 426                                         try:
 427                                                 data = xmlparse(xml).documentElement
 428                                                 if data.nodeName != 'test-results':
 429                                                         raise ValueError("Toplevel element was not <test-results />")
 430
 431                                                 log_value(LOADED_XML_JSON, True)
 432
 433                                                 search = [data]
 434                                                 while search:
 435                                                         nextsearch = []
 436                                                         for node in search:
 437                                                                 for child in node.childNodes:
 438                                                                         if child.nodeName == 'test-suite' or child.nodeName == 'results':
 439                                                                                 nextsearch.append(child) # Descend
 440                                                                         elif child.nodeName == 'test-case':
 441                                                                                 name = child.getAttribute("name")
 442                                                                                 if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True":
 443                                                                                         log_test(name, TEST_FAILURES, add=1)
 444
 445                                                         search = nextsearch
 446                                         except Exception as e:
 447                                                 print("Could not load XML file %s. Reason: %s" % (xml, e))
 448                                         data
 449
 450                         if bailout or not (resume_after or retry_next): # If not retrying
 451                                 return code
 452
 453                         # If we got here, a retry is occurring
 454                         retry_these = retry_next
 455
 456                         # Report human-readable retry notice for stdout log.
 457                         message = "%s: Will rerun test suite" % (scriptname)
 458                         if log[COUNT_JSON] > 1:
 459                                 message += " (iteration #%d)" % (log[COUNT_JSON])
 460                         if resume_after:
 461                                 message += ", resuming at crashed testcase %s." % (crashed_at)
 462                         else:
 463                                 message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
 464                         print(message)
 465         finally:
 466                 # Emergency: Ensure command does not outlive this script
 467                 if proc is not None and not died_politely:
 468                         send_signal(proc, signal.SIGKILL)
 469
 470                 # Write out logs
 471                 log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
 472                 if logging:
 473                         with open(logfile, "a") as f:
 474                                 f.write(json.dumps(log) + os.linesep)
 475
 476 sys.exit( run() )