#!/usr/bin/env python
# Script to analyze code and arrange ld sections.
#
# Copyright (C) 2008-2010  Kevin O'Connor <kevin@koconnor.net>
#
# This file may be distributed under the terms of the GNU GPLv3 license.

import sys

# LD script headers/trailers
COMMONHEADER = """
/* DO NOT EDIT!  This is an autogenerated file.  See tools/layoutrom.py. */
OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH("i386")
SECTIONS
{
"""
COMMONTRAILER = """

        /* Discard regular data sections to force a link error if
         * code attempts to access data not marked with VAR16 (or other
         * appropriate macro)
         */
        /DISCARD/ : {
                *(.text*) *(.data*) *(.bss*) *(.rodata*)
                *(COMMON) *(.discard*) *(.eh_frame)
                }
}
"""


######################################################################
# Determine section locations
######################################################################

# Align 'pos' to 'alignbytes' offset
def alignpos(pos, alignbytes):
    mask = alignbytes - 1
    return (pos + mask) & ~mask

# Determine the final addresses for a list of sections that end at an
# address.
def setSectionsStart(sections, endaddr, minalign=1):
    totspace = 0
    for section in sections:
        if section.align > minalign:
            minalign = section.align
        totspace = alignpos(totspace, section.align) + section.size
    startaddr = (endaddr - totspace) / minalign * minalign
    curaddr = startaddr
    # out = [(addr, sectioninfo), ...]
    out = []
    for section in sections:
        curaddr = alignpos(curaddr, section.align)
        section.finalloc = curaddr
        curaddr += section.size
    return startaddr

# The 16bit code can't exceed 64K of space.
BUILD_BIOS_ADDR = 0xf0000
BUILD_BIOS_SIZE = 0x10000

# Layout the 16bit code.  This ensures sections with fixed offset
# requirements are placed in the correct location.  It also places the
# 16bit code as high as possible in the f-segment.
def fitSections(sections, fillsections):
    # fixedsections = [(addr, section), ...]
    fixedsections = []
    for section in sections:
        if section.name.startswith('.fixedaddr.'):
            addr = int(section.name[11:], 16)
            section.finalloc = addr
            fixedsections.append((addr, section))
            if section.align != 1:
                print "Error: Fixed section %s has non-zero alignment (%d)" % (
                    section.name, section.align)
                sys.exit(1)
    fixedsections.sort()
    firstfixed = fixedsections[0][0]

    # Find freespace in fixed address area
    # fixedAddr = [(freespace, section), ...]
    fixedAddr = []
    for i in range(len(fixedsections)):
        fixedsectioninfo = fixedsections[i]
        addr, section = fixedsectioninfo
        if i == len(fixedsections) - 1:
            nextaddr = BUILD_BIOS_SIZE
        else:
            nextaddr = fixedsections[i+1][0]
        avail = nextaddr - addr - section.size
        fixedAddr.append((avail, section))
    fixedAddr.sort()

    # Attempt to fit other sections into fixed area
    canrelocate = [(section.size, section.align, section.name, section)
                   for section in fillsections]
    canrelocate.sort()
    canrelocate = [section for size, align, name, section in canrelocate]
    totalused = 0
    for freespace, fixedsection in fixedAddr:
        addpos = fixedsection.finalloc + fixedsection.size
        totalused += fixedsection.size
        nextfixedaddr = addpos + freespace
#        print "Filling section %x uses %d, next=%x, available=%d" % (
#            fixedsection.finalloc, fixedsection.size, nextfixedaddr, freespace)
        while 1:
            canfit = None
            for fitsection in canrelocate:
                if addpos + fitsection.size > nextfixedaddr:
                    # Can't fit and nothing else will fit.
                    break
                fitnextaddr = alignpos(addpos, fitsection.align) + fitsection.size
#                print "Test %s - %x vs %x" % (
#                    fitsection.name, fitnextaddr, nextfixedaddr)
                if fitnextaddr > nextfixedaddr:
                    # This item can't fit.
                    continue
                canfit = (fitnextaddr, fitsection)
            if canfit is None:
                break
            # Found a section that can fit.
            fitnextaddr, fitsection = canfit
            canrelocate.remove(fitsection)
            fitsection.finalloc = addpos
            addpos = fitnextaddr
            totalused += fitsection.size
#            print "    Adding %s (size %d align %d) pos=%x avail=%d" % (
#                fitsection[2], fitsection[0], fitsection[1]
#                , fitnextaddr, nextfixedaddr - fitnextaddr)

    # Report stats
    total = BUILD_BIOS_SIZE-firstfixed
    slack = total - totalused
    print ("Fixed space: 0x%x-0x%x  total: %d  slack: %d"
           "  Percent slack: %.1f%%" % (
            firstfixed, BUILD_BIOS_SIZE, total, slack,
            (float(slack) / total) * 100.0))

    return firstfixed

# Return the subset of sections with a given name prefix
def getSectionsPrefix(sections, category, prefix):
    return [section for section in sections
            if section.category == category and section.name.startswith(prefix)]

def doLayout(sections):
    # Determine 16bit positions
    textsections = getSectionsPrefix(sections, '16', '.text.')
    rodatasections = (
        getSectionsPrefix(sections, '16', '.rodata.str1.1')
        + getSectionsPrefix(sections, '16', '.rodata.__func__.')
        + getSectionsPrefix(sections, '16', '.rodata.__PRETTY_FUNCTION__.'))
    datasections = getSectionsPrefix(sections, '16', '.data16.')
    fixedsections = getSectionsPrefix(sections, '16', '.fixedaddr.')

    firstfixed = fitSections(fixedsections, textsections)
    remsections = [s for s in textsections+rodatasections+datasections
                   if s.finalloc is None]
    code16_start = setSectionsStart(remsections, firstfixed)

    # Determine 32seg positions
    textsections = getSectionsPrefix(sections, '32seg', '.text.')
    rodatasections = (
        getSectionsPrefix(sections, '32seg', '.rodata.str1.1')
        + getSectionsPrefix(sections, '32seg', '.rodata.__func__.')
        + getSectionsPrefix(sections, '32seg', '.rodata.__PRETTY_FUNCTION__.'))
    datasections = getSectionsPrefix(sections, '32seg', '.data32seg.')

    code32seg_start = setSectionsStart(
        textsections + rodatasections + datasections, code16_start)

    # Determine 32flat runtime positions
    textsections = getSectionsPrefix(sections, '32flat', '.text.')
    rodatasections = getSectionsPrefix(sections, '32flat', '.rodata')
    datasections = getSectionsPrefix(sections, '32flat', '.data.')
    bsssections = getSectionsPrefix(sections, '32flat', '.bss.')

    code32flat_start = setSectionsStart(
        textsections + rodatasections + datasections + bsssections
        , code32seg_start + BUILD_BIOS_ADDR, 16)

    # Determine 32flat init positions
    textsections = getSectionsPrefix(sections, '32init', '.text.')
    rodatasections = getSectionsPrefix(sections, '32init', '.rodata')
    datasections = getSectionsPrefix(sections, '32init', '.data.')
    bsssections = getSectionsPrefix(sections, '32init', '.bss.')

    code32init_start = setSectionsStart(
        textsections + rodatasections + datasections + bsssections
        , code32flat_start, 16)

    # Print statistics
    size16 = BUILD_BIOS_SIZE - code16_start
    size32seg = code16_start - code32seg_start
    size32flat = code32seg_start + BUILD_BIOS_ADDR - code32flat_start
    size32init = code32flat_start - code32init_start
    print "16bit size:           %d" % size16
    print "32bit segmented size: %d" % size32seg
    print "32bit flat size:      %d" % size32flat
    print "32bit flat init size: %d" % size32init


######################################################################
# Linker script output
######################################################################

# Write LD script includes for the given cross references
def outXRefs(sections):
    xrefs = {}
    out = ""
    for section in sections:
        for reloc in section.relocs:
            symbol = reloc.symbol
            if (symbol.section is None
                or (symbol.section.fileid == section.fileid
                    and symbol.name == reloc.symbolname)
                or reloc.symbolname in xrefs):
                continue
            xrefs[reloc.symbolname] = 1
            addr = symbol.section.finalloc + symbol.offset
            if (section.fileid == '32flat'
                and symbol.section.fileid in ('16', '32seg')):
                addr += BUILD_BIOS_ADDR
            out += "%s = 0x%x ;\n" % (reloc.symbolname, addr)
    return out

# Write LD script includes for the given sections using relative offsets
def outRelSections(sections, startsym):
    out = ""
    for section in sections:
        out += ". = ( 0x%x - %s ) ;\n" % (section.finalloc, startsym)
        if section.name == '.rodata.str1.1':
            out += "_rodata = . ;\n"
        out += "*(%s)\n" % (section.name,)
    return out

def getSectionsFile(sections, fileid, defaddr=0):
    sections = [(section.finalloc, section)
                for section in sections if section.fileid == fileid]
    sections.sort()
    sections = [section for addr, section in sections]
    pos = defaddr
    if sections:
        pos = sections[0].finalloc
    return sections, pos

# Layout the 32bit segmented code.  This places the code as high as possible.
def writeLinkerScripts(sections, entrysym, genreloc, out16, out32seg, out32flat):
    # Write 16bit linker script
    sections16, code16_start = getSectionsFile(sections, '16')
    output = open(out16, 'wb')
    output.write(COMMONHEADER + outXRefs(sections16) + """
    code16_start = 0x%x ;
    .text16 code16_start : {
""" % (code16_start)
                 + outRelSections(sections16, 'code16_start')
                 + """
    }
"""
                 + COMMONTRAILER)
    output.close()

    # Write 32seg linker script
    sections32seg, code32seg_start = getSectionsFile(
        sections, '32seg', code16_start)
    output = open(out32seg, 'wb')
    output.write(COMMONHEADER + outXRefs(sections32seg) + """
    code32seg_start = 0x%x ;
    .text32seg code32seg_start : {
""" % (code32seg_start)
                 + outRelSections(sections32seg, 'code32seg_start')
                 + """
    }
"""
                 + COMMONTRAILER)
    output.close()

    # Write 32flat linker script
    sections32flat, code32flat_start = getSectionsFile(
        sections, '32flat', code32seg_start)
    relocstr = ""
    relocminalign = 0
    if genreloc:
        # Generate relocations
        relocstr, size, relocminalign = genRelocs(sections)
        code32flat_start -= size
    output = open(out32flat, 'wb')
    output.write(COMMONHEADER
                 + outXRefs(sections32flat) + """
    %s = 0x%x ;
    _reloc_min_align = 0x%x ;
    code32flat_start = 0x%x ;
    .text code32flat_start : {
""" % (entrysym.name,
       entrysym.section.finalloc + entrysym.offset + BUILD_BIOS_ADDR,
       relocminalign, code32flat_start)
                 + relocstr
                 + """
        code32init_start = ABSOLUTE(.) ;
"""
                 + outRelSections(getSectionsPrefix(sections32flat, '32init', '')
                                  , 'code32flat_start')
                 + """
        code32init_end = ABSOLUTE(.) ;
"""
                 + outRelSections(getSectionsPrefix(sections32flat, '32flat', '')
                                  , 'code32flat_start')
                 + """
        . = ( 0x%x - code32flat_start ) ;
        *(.text32seg)
        . = ( 0x%x - code32flat_start ) ;
        *(.text16)
        code32flat_end = ABSOLUTE(.) ;
    } :text
""" % (code32seg_start + BUILD_BIOS_ADDR, code16_start + BUILD_BIOS_ADDR)
                 + COMMONTRAILER
                 + """
ENTRY(%s)
PHDRS
{
        text PT_LOAD AT ( code32flat_start ) ;
}
""" % (entrysym.name,))
    output.close()


######################################################################
# Detection of init code
######################################################################

# Determine init section relocations
def genRelocs(sections):
    absrelocs = []
    relrelocs = []
    initrelocs = []
    minalign = 16
    for section in sections:
        if section.category == '32init' and section.align > minalign:
            minalign = section.align
        for reloc in section.relocs:
            symbol = reloc.symbol
            if symbol.section is None:
                continue
            relocpos = section.finalloc + reloc.offset
            if (reloc.type == 'R_386_32' and section.category == '32init'
                and symbol.section.category == '32init'):
                # Absolute relocation
                absrelocs.append(relocpos)
            elif (reloc.type == 'R_386_PC32' and section.category == '32init'
                  and symbol.section.category != '32init'):
                # Relative relocation
                relrelocs.append(relocpos)
            elif (section.category != '32init'
                  and symbol.section.category == '32init'):
                # Relocation to the init section
                if section.fileid in ('16', '32seg'):
                    relocpos += BUILD_BIOS_ADDR
                initrelocs.append(relocpos)
    absrelocs.sort()
    relrelocs.sort()
    initrelocs.sort()
    out = ("        _reloc_abs_start = ABSOLUTE(.) ;\n"
           + "".join(["LONG(0x%x - code32init_start)\n" % (pos,)
                      for pos in absrelocs])
           + "        _reloc_abs_end = ABSOLUTE(.) ;\n"
           + "        _reloc_rel_start = ABSOLUTE(.) ;\n"
           + "".join(["LONG(0x%x - code32init_start)\n" % (pos,)
                      for pos in relrelocs])
           + "        _reloc_rel_end = ABSOLUTE(.) ;\n"
           + "        _reloc_init_start = ABSOLUTE(.) ;\n"
           + "".join(["LONG(0x%x - code32flat_start)\n" % (pos,)
                      for pos in initrelocs])
           + "        _reloc_init_end = ABSOLUTE(.) ;\n")
    return out, len(absrelocs + relrelocs + initrelocs) * 4, minalign

def markRuntime(section, sections):
    if (section is None or not section.keep or section.category is not None
        or '.init.' in section.name or section.fileid != '32flat'):
        return
    section.category = '32flat'
    # Recursively mark all sections this section points to
    for reloc in section.relocs:
        markRuntime(reloc.symbol.section, sections)

def findInit(sections):
    # Recursively find and mark all "runtime" sections.
    for section in sections:
        if '.runtime.' in section.name or '.export.' in section.name:
            markRuntime(section, sections)
    for section in sections:
        if section.category is not None:
            continue
        if section.fileid == '32flat':
            section.category = '32init'
        else:
            section.category = section.fileid


######################################################################
# Section garbage collection
######################################################################

CFUNCPREFIX = [('_cfunc16_', 0), ('_cfunc32seg_', 1), ('_cfunc32flat_', 2)]

# Find and keep the section associated with a symbol (if available).
def keepsymbol(reloc, infos, pos, isxref):
    symbolname = reloc.symbolname
    mustbecfunc = 0
    for symprefix, needpos in CFUNCPREFIX:
        if symbolname.startswith(symprefix):
            if needpos != pos:
                return -1
            symbolname = symbolname[len(symprefix):]
            mustbecfunc = 1
            break
    symbol = infos[pos][1].get(symbolname)
    if (symbol is None or symbol.section is None
        or symbol.section.name.startswith('.discard.')):
        return -1
    isdestcfunc = (symbol.section.name.startswith('.text.')
                   and not symbol.section.name.startswith('.text.asm.'))
    if ((mustbecfunc and not isdestcfunc)
        or (not mustbecfunc and isdestcfunc and isxref)):
        return -1

    reloc.symbol = symbol
    keepsection(symbol.section, infos, pos)
    return 0

# Note required section, and recursively set all referenced sections
# as required.
def keepsection(section, infos, pos=0):
    if section.keep:
        # Already kept - nothing to do.
        return
    section.keep = 1
    # Keep all sections that this section points to
    for reloc in section.relocs:
        ret = keepsymbol(reloc, infos, pos, 0)
        if not ret:
            continue
        # Not in primary sections - it may be a cross 16/32 reference
        ret = keepsymbol(reloc, infos, (pos+1)%3, 1)
        if not ret:
            continue
        ret = keepsymbol(reloc, infos, (pos+2)%3, 1)
        if not ret:
            continue

# Determine which sections are actually referenced and need to be
# placed into the output file.
def gc(info16, info32seg, info32flat):
    # infos = ((sections16, symbols16), (sect32seg, sym32seg)
    #          , (sect32flat, sym32flat))
    infos = (info16, info32seg, info32flat)
    # Start by keeping sections that are globally visible.
    for section in info16[0]:
        if section.name.startswith('.fixedaddr.') or '.export.' in section.name:
            keepsection(section, infos)
    return [section for section in info16[0]+info32seg[0]+info32flat[0]
            if section.keep]


######################################################################
# Startup and input parsing
######################################################################

class Section:
    name = size = alignment = fileid = relocs = None
    finalloc = category = keep = None
class Reloc:
    offset = type = symbolname = symbol = None
class Symbol:
    name = offset = section = None

# Read in output from objdump
def parseObjDump(file, fileid):
    # sections = [section, ...]
    sections = []
    sectionmap = {}
    # symbols[symbolname] = symbol
    symbols = {}

    state = None
    for line in file.readlines():
        line = line.rstrip()
        if line == 'Sections:':
            state = 'section'
            continue
        if line == 'SYMBOL TABLE:':
            state = 'symbol'
            continue
        if line.startswith('RELOCATION RECORDS FOR ['):
            sectionname = line[24:-2]
            if sectionname.startswith('.debug_'):
                # Skip debugging sections (to reduce parsing time)
                state = None
                continue
            state = 'reloc'
            relocsection = sectionmap[sectionname]
            continue

        if state == 'section':
            try:
                idx, name, size, vma, lma, fileoff, align = line.split()
                if align[:3] != '2**':
                    continue
                section = Section()
                section.name = name
                section.size = int(size, 16)
                section.align = 2**int(align[3:])
                section.fileid = fileid
                section.relocs = []
                sections.append(section)
                sectionmap[name] = section
            except ValueError:
                pass
            continue
        if state == 'symbol':
            try:
                sectionname, size, name = line[17:].split()
                symbol = Symbol()
                symbol.size = int(size, 16)
                symbol.offset = int(line[:8], 16)
                symbol.name = name
                symbol.section = sectionmap.get(sectionname)
                symbols[name] = symbol
            except ValueError:
                pass
            continue
        if state == 'reloc':
            try:
                off, type, symbolname = line.split()
                reloc = Reloc()
                reloc.offset = int(off, 16)
                reloc.type = type
                reloc.symbolname = symbolname
                reloc.symbol = symbols.get(symbolname)
                if reloc.symbol is None:
                    # Some binutils (2.20.1) give section name instead
                    # of a symbol - create a dummy symbol.
                    reloc.symbol = symbol = Symbol()
                    symbol.size = 0
                    symbol.offset = 0
                    symbol.name = symbolname
                    symbol.section = sectionmap.get(symbolname)
                    symbols[symbolname] = symbol
                relocsection.relocs.append(reloc)
            except ValueError:
                pass
    return sections, symbols

def main():
    # Get output name
    in16, in32seg, in32flat, out16, out32seg, out32flat = sys.argv[1:]

    # Read in the objdump information
    infile16 = open(in16, 'rb')
    infile32seg = open(in32seg, 'rb')
    infile32flat = open(in32flat, 'rb')

    # infoX = (sections, symbols)
    info16 = parseObjDump(infile16, '16')
    info32seg = parseObjDump(infile32seg, '32seg')
    info32flat = parseObjDump(infile32flat, '32flat')

    # Figure out which sections to keep.
    sections = gc(info16, info32seg, info32flat)

    # Separate 32bit flat into runtime and init parts
    findInit(sections)

    # Determine the final memory locations of each kept section.
    doLayout(sections)

    # Write out linker script files.
    entrysym = info16[1]['entry_elf']
    genreloc = '_reloc_abs_start' in info32flat[1]
    writeLinkerScripts(sections, entrysym, genreloc, out16, out32seg, out32flat)

if __name__ == '__main__':
    main()
