#!/usr/bin/env python
"""
libpoppler fuzzer using "pdftotext" command line program.
"""

MANGLE = "auto"
USE_TIME = False
USE_STDOUT = True

from fusil.application import Application
from fusil.process.time_watch import ProcessTimeWatch
from fusil.process.mangle import MangleProcess
from fusil.process.watch import WatchProcess
from fusil.process.stdout import WatchStdout
if MANGLE == "auto":
    from fusil.auto_mangle import AutoMangle as MangleFile
elif MANGLE == "incr":
    from fusil.incr_mangle import IncrMangle as MangleFile
else:
    from fusil.mangle import MangleFile
import re

class Fuzzer(Application):
    NAME = "poppler"
    USAGE = "%prog [options] document.pdf"
    NB_ARGUMENTS = 1

    def setupProject(self):
        project = self.project

        if USE_TIME:
            ProcessTimeWatch(project,
                too_slow=3.0, too_slow_score=0.10,
                too_fast=0.100, too_fast_score=-0.80,
            )

        orig_filename = self.arguments[0]
        mangle = MangleFile(project, orig_filename)
        if MANGLE == "auto":
            mangle.hard_max_op = 1000
        elif MANGLE == "incr":
            mangle.operation_per_version = 100
            mangle.max_version = 50
        else:
            mangle.config.max_op = 1000

        options = {'timeout': 5.0}
        if not USE_STDOUT:
            options['stdout'] = 'null'
        process = MangleProcess(project, ['pdftotext', '<pdf>', 'output.txt'], '<pdf>', **options)
        WatchProcess(process, exitcode_score=-0.10)

        if USE_STDOUT:
            stdout = WatchStdout(process)
            def cleanupLine(line):
                match = re.match(r"Error(?: \([0-9]+\))?: (.*)", line)
                if match:
                    line = match.group(1)
                return line
            stdout.cleanup_func = cleanupLine
            del stdout.words['error']
            del stdout.words['unknown']

#            stdout.show_not_matching = True
#            stdout.ignoreRegex(r"Unknown operator 'allocate'$")
#            stdout.ignoreRegex(r" operator is wrong type \(error\)$")
#            stdout.ignoreRegex(r'^No current point in lineto$')
#            stdout.ignoreRegex(r'^No current point in lineto')
#            stdout.ignoreRegex(r'^Unknown operator ')
#            stdout.ignoreRegex(r"^Couldn't open 'nameToUnicode' file ")
#            stdout.ignoreRegex(r"^Illegal character ")
#            stdout.ignoreRegex(r"^No font in show$")
#            stdout.ignoreRegex(r"^Element of show/space array must be number or string$")
#            stdout.ignoreRegex(r"^No current point in curveto$")
#            stdout.ignoreRegex(r"^Badly formatted number$")
#            stdout.ignoreRegex(r"^Dictionary key must be a name object$")
#            stdout.ignoreRegex(r"^End of file inside array$")
#            stdout.ignoreRegex(r"^Too few \([0-9]+\) args to .* operator$")
#            stdout.ignoreRegex(r"Too many args in content stream")

            stdout.max_nb_line = (100, 0.20)

if __name__ == "__main__":
    Fuzzer().main()

