Standard section names sampling script

This tool aims to compute the list of section names to be considered as standard from a set of reference cleanware samples. It also ensures that basic section names from the executable formats' reference documentations are included. NB: It works only for a reference set of only one executable format at a time.

#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import lief
from tinyscript import *

FILE_DESCRIPTORS = set(os.listdir("/proc/self/fd/"))
STD_SECTIONS = {
    # https://www.cs.cmu.edu/afs/cs/academic/class/15213-f00/docs/elf.pdf
    'ELF': [".bss", ".comment", ".conflict", ".data", ".data1", ".debug", ".dynamic", ".dynstr", ".fini", ".fini_array",
            ".gnu_debuglink", ".got", ".gptab", ".hash", ".init", ".init_array", ".interp", ".liblist", ".line",
            ".lit4", ".lit8", ".note", ".plt", ".reginfo", ".rodata", ".rodata1", ".sbss", ".sdata", ".shstrtab",
            ".strtab", ".symtab", ".tdesc", ".text"],
    # https://github.com/roussieau/masterthesis/blob/master/src/detector/tools/pefeats/pefeats.cpp
    'PE': [".bss",".cormeta", ".data", ".debug", ".debug$F", ".debug$P", ".debug$S", ".debug$T", ".drective", ".edata",
           ".idata", ".idlsym", ".pdata", ".rdata", ".reloc", ".rsrc", ".sbss", ".sdata", ".srdata", ".sxdata", ".text",
           ".tls", ".tls$", ".vsdata", ".xdata"]
}
# possible relocation sections for the ELF format
for x in STD_SECTIONS['ELF'][:]:
    STD_SECTIONS['ELF'].append(".rel" + x)
    STD_SECTIONS['ELF'].append(".rela" + x)

def open_exe(path):
    # try to parse the binary first ; capture the stderr messages from LIEF
    tmp_fd, null_fd = os.dup(2), os.open(os.devnull, os.O_RDWR)
    os.dup2(null_fd, 2)
    binary = lief.parse(str(path))
    os.dup2(tmp_fd, 2)  # restore stderr
    os.close(null_fd)
    return binary

if __name__ == '__main__':
    parser.add_argument("folder", type=ts.folder_exists, help="folder of reference cleanware samples")
    parser.add_argument("-o", "--output", type=ts.file_does_not_exist, help="output the sections to a raw text file")
    initialize()
    l, etype = [], None
    for file in ts.Path(args.folder).listdir():
        if not file.is_file():
            continue
        b = open_exe(file)
        if b is None:
            continue
        logger.debug(file)
        if etype is None:
            etype = b.format.name
        if b.format.name != etype:
            raise ValueError("Found a %s (while reference format is: %s) ; this tool does not support mixed formats"
                             % (b.format.name, etype))
        for s in b.sections:
            l.append(s.name)
        for fd in set(os.listdir("/proc/self/fd/")) - FILE_DESCRIPTORS:
            try:
                os.fdopen(int(fd)).close()
            except OSError:
                continue
    for x in STD_SECTIONS.get(etype, []):
        l.append(x)
    l = list(sorted(set(l)))
    for n in l:
        print(n)
    if args.output:
        with ts.Path(args.output).open("w") as f:
            f.write("\n".join(l))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Standard section names sampling script

Clone this wiki locally