#!/usr/bin/env python
# Script to analyze code and arrange ld sections.
#
# Copyright (C) 2008  Kevin O'Connor <kevin@koconnor.net>
#
# This file may be distributed under the terms of the GNU GPLv3 license.

import sys

# Align 'pos' to 'alignbytes' offset
def alignpos(pos, alignbytes):
    mask = alignbytes - 1
    return (pos + mask) & ~mask

# LD script headers/trailers
COMMONHEADER = """
/* DO NOT EDIT!  This is an autogenerated file.  See tools/layoutrom.py. */
OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH("i386")
SECTIONS
{
"""
COMMONTRAILER = """
}
"""


######################################################################
# 16bit fixed address section fitting
######################################################################

# Get the maximum start position for a list of sections that end at an
# address.
def getSectionsStart(sections, endaddr, minalign=1):
    totspace = 0
    for size, align, name in sections:
        if align > minalign:
            minalign = align
        totspace = alignpos(totspace, align) + size
    return (endaddr - totspace) / minalign * minalign

# Write LD script includes for the given sections
def outSections(file, sections):
    for size, align, name in sections:
        file.write("*(%s)\n" % (name,))

# The 16bit code can't exceed 64K of space.
MAXPOS = 64*1024

# Layout the 16bit code.  This ensures sections with fixed offset
# requirements are placed in the correct location.  It also places the
# 16bit code as high as possible in the f-segment.
def doLayout16(sections, outname):
    textsections = []
    rodatasections = []
    datasections = []
    # fixedsections = [(addr, sectioninfo, extasectionslist), ...]
    fixedsections = []
    # canrelocate = [(sectioninfo, list), ...]
    canrelocate = []

    # Find desired sections.
    for section in sections:
        size, align, name = section
        if name[:11] == '.fixedaddr.':
            addr = int(name[11:], 16)
            fixedsections.append((addr, section, []))
            if align != 1:
                print "Error: Fixed section %s has non-zero alignment (%d)" % (
                    name, align)
                sys.exit(1)
        if name[:6] == '.text.':
            textsections.append(section)
            canrelocate.append((section, textsections))
        if name[:17] == '.rodata.__func__.' or name == '.rodata.str1.1':
            rodatasections.append(section)
            #canrelocate.append((section, rodatasections))
        if name[:8] == '.data16.':
            datasections.append(section)
            #canrelocate.append((section, datasections))

    # Find freespace in fixed address area
    fixedsections.sort()
    # fixedAddr = [(freespace, sectioninfo), ...]
    fixedAddr = []
    for i in range(len(fixedsections)):
        fixedsectioninfo = fixedsections[i]
        addr, section, extrasectionslist = fixedsectioninfo
        if i == len(fixedsections) - 1:
            nextaddr = MAXPOS
        else:
            nextaddr = fixedsections[i+1][0]
        avail = nextaddr - addr - section[0]
        fixedAddr.append((avail, fixedsectioninfo))

    # Attempt to fit other sections into fixed area
    fixedAddr.sort()
    canrelocate.sort()
    totalused = 0
    for freespace, fixedsectioninfo in fixedAddr:
        fixedaddr, fixedsection, extrasections = fixedsectioninfo
        addpos = fixedaddr + fixedsection[0]
        totalused += fixedsection[0]
        nextfixedaddr = addpos + freespace
#        print "Filling section %x uses %d, next=%x, available=%d" % (
#            fixedaddr, fixedsection[0], nextfixedaddr, freespace)
        while 1:
            canfit = None
            for fixedaddrinfo in canrelocate:
                fitsection, inlist = fixedaddrinfo
                fitsize, fitalign, fitname = fitsection
                if addpos + fitsize > nextfixedaddr:
                    # Can't fit and nothing else will fit.
                    break
                fitnextaddr = alignpos(addpos, fitalign) + fitsize
#                print "Test %s - %x vs %x" % (
#                    fitname, fitnextaddr, nextfixedaddr)
                if fitnextaddr > nextfixedaddr:
                    # This item can't fit.
                    continue
                canfit = (fitnextaddr, fixedaddrinfo)
            if canfit is None:
                break
            # Found a section that can fit.
            fitnextaddr, fixedaddrinfo = canfit
            canrelocate.remove(fixedaddrinfo)
            fitsection, inlist = fixedaddrinfo
            inlist.remove(fitsection)
            extrasections.append(fitsection)
            addpos = fitnextaddr
            totalused += fitsection[0]
#            print "    Adding %s (size %d align %d) pos=%x avail=%d" % (
#                fitsection[2], fitsection[0], fitsection[1]
#                , fitnextaddr, nextfixedaddr - fitnextaddr)
    firstfixed = fixedsections[0][0]

    # Report stats
    total = MAXPOS-firstfixed
    slack = total - totalused
    print ("Fixed space: 0x%x-0x%x  total: %d  slack: %d"
           "  Percent slack: %.1f%%" % (
            firstfixed, MAXPOS, total, slack,
            (float(slack) / total) * 100.0))

    # Find start positions
    text16_start = getSectionsStart(textsections, firstfixed)
    data16_start = getSectionsStart(rodatasections + datasections, text16_start)

    # Write header and regular sections
    output = open(outname, 'wb')
    output.write(COMMONHEADER + """
        data16_start = 0x%x ;
        .data16 data16_start : {
                freespace_end = . ;
""" % data16_start)
    outSections(output, datasections)
    output.write("code16_rodata = . ;\n")
    outSections(output, rodatasections)
    output.write("""
        }

        text16_start = 0x%x ;
        .text16 text16_start : {
""" % text16_start)
    outSections(output, textsections)

    # Write fixed sections
    for addr, section, extrasections in fixedsections:
        name = section[2]
        output.write(". = ( 0x%x - text16_start ) ;\n" % (addr,))
        output.write("*(%s)\n" % (name,))
        for extrasection in extrasections:
            output.write("*(%s)\n" % (extrasection[2],))

    # Write trailer
    output.write("""
                text16_end = ABSOLUTE(.) ;
        }

        /* Discard regular data sections to force a link error if
         * 16bit code attempts to access data not marked with VAR16
         */
        /DISCARD/ : { *(.text*) *(.rodata*) *(.data*) *(.bss*) *(COMMON) }
""" + COMMONTRAILER)

    return data16_start


######################################################################
# 32bit section outputting
######################################################################

# Return the subset of sections with a given name prefix
def getSectionsPrefix(sections, prefix):
    lp = len(prefix)
    out = []
    for size, align, name in sections:
        if name[:lp] == prefix:
            out.append((size, align, name))
    return out

# Layout the 32bit code.  This places the code as high as possible.
def doLayout32(sections, outname, start16):
    start16 += 0xf0000
    # Find sections to output
    textsections = getSectionsPrefix(sections, '.text.')
    rodatasections = getSectionsPrefix(sections, '.rodata')
    datasections = getSectionsPrefix(sections, '.data.')
    bsssections = getSectionsPrefix(sections, '.bss.')
    start32 = getSectionsStart(
        textsections + rodatasections + datasections + bsssections, start16, 512)

    # Write sections
    output = open(outname, 'wb')
    output.write(COMMONHEADER + """
        .text32 0x%x : {
                code32_start = ABSOLUTE(.) ;
""" % start32)

    outSections(output, textsections)
    output.write("code32_rodata = . ;\n")
    outSections(output, rodatasections)
    outSections(output, datasections)
    outSections(output, bsssections)

    output.write("""
                freespace_start = . ;
                code32_end = ABSOLUTE(.) ;
        }
""" + COMMONTRAILER)


######################################################################
# Section garbage collection
######################################################################

# Note required section, and recursively set all referenced sections
# as required.
def keepsection(name, pri, alt):
    if name in pri[3]:
        # Already kept - nothing to do.
        return
    pri[3].append(name)
    relocs = pri[2].get(name)
    if relocs is None:
        return
    # Keep all sections that this section points to
    for symbol in relocs:
        addr, section = pri[1].get(symbol, (None, None))
        if (section is not None and '*' not in section
            and section[:9] != '.discard.'):
            keepsection(section, pri, alt)
            continue
        # Not in primary sections - it may be a cross 16/32 reference
        addr, section = alt[1].get(symbol, (None, None))
        if section is not None and '*' not in section:
            keepsection(section, alt, pri)

# Determine which sections are actually referenced and need to be
# placed into the output file.
def gc(info16, info32):
    # pri = (sections, symbols, relocs, keep sections)
    pri = (info16[0], info16[1], info16[2], [])
    alt = (info32[0], info32[1], info32[2], [])
    # Start by keeping sections that are globally visible.
    for size, align, section in info16[0]:
        if section[:11] == '.fixedaddr.' or '.export.' in section:
            keepsection(section, pri, alt)
    # Return sections found.
    sections16 = []
    for info in info16[0]:
        size, align, section = info
        if section not in pri[3]:
#            print "gc16", section
            continue
        sections16.append(info)
    sections32 = []
    for info in info32[0]:
        size, align, section = info
        if section not in alt[3]:
#            print "gc32", section
            continue
        sections32.append(info)
    return sections16, sections32


######################################################################
# Startup and input parsing
######################################################################

# Read in output from objdump
def parseObjDump(file):
    # sections = [(size, align, section), ...]
    sections = []
    # symbols[symbol] = section
    symbols = {}
    # relocs[section] = [symbol, ...]
    relocs = {}

    state = None
    for line in file.readlines():
        line = line.rstrip()
        if line == 'Sections:':
            state = 'section'
            continue
        if line == 'SYMBOL TABLE:':
            state = 'symbol'
            continue
        if line[:24] == 'RELOCATION RECORDS FOR [':
            state = 'reloc'
            relocsection = line[24:-2]
            continue

        if state == 'section':
            try:
                idx, name, size, vma, lma, fileoff, align = line.split()
                if align[:3] != '2**':
                    continue
                sections.append((int(size, 16), 2**int(align[3:]), name))
            except:
                pass
            continue
        if state == 'symbol':
            try:
                section, off, symbol = line[17:].split()
                off = int(off, 16)
                addr = int(line[:8], 16)
                symbols[symbol] = addr, section
            except:
                pass
            continue
        if state == 'reloc':
            try:
                off, type, symbol = line.split()
                off = int(off, 16)
                relocs.setdefault(relocsection, []).append(symbol)
            except:
                pass
    return sections, symbols, relocs

def main():
    # Get output name
    in16, in32, out16, out32 = sys.argv[1:]

    infile16 = open(in16, 'rb')
    infile32 = open(in32, 'rb')

    info16 = parseObjDump(infile16)
    info32 = parseObjDump(infile32)

    sections16, sections32 = gc(info16, info32)

    start16 = doLayout16(sections16, out16)
    doLayout32(sections32, out32, start16)

if __name__ == '__main__':
    main()