#!/usr/bin/python3

import csv, os, sys, re, os.path, base64
import rpy2.robjects as robjects
import naturalsort
from functools import reduce

options = {'separator': ':', 'ctree_mincriterion': .999, 'title': 'Benchmark analysis', 'num_multiple_trials': 1, 'decisiontree_height': 800, 'decisiontree_width': 800, 'boxplot_height': 100, 'boxplot_width': 800, 'sort_keys': True}


def getDataURL(pngFilename):
    data = open(pngFilename, 'rb').read()
    #s = base64.urlsafe_b64encode(data)
    dataBytes = base64.b64encode(data)
    return 'data:image/png;base64,%s' % dataBytes.decode('ascii')

def makeSafeFilename(str):
    str = str.replace('/', '')
    return str


def minMaxAvg(ratios):
    min = 100000000.0
    max = -100000000.0
    sum = 0
    for r in ratios:
        if (r < min):
            min = r
        if (r > max):
            max = r
        sum += r
    return (min, max, float(sum)/len(ratios))

def median(ratios):
    rs = ratios[:]
    rs.sort()
    if (len(rs) % 2 == 1):
        return rs[(len(rs) - 1)//2]
    else:
        return (rs[(len(rs)//2)-1] + rs[len(rs)//2])/2
    
def CommonPrefixString2( cStr1, cStr2 ):
    """Return the common prefix, if any, of both strings."""
    nLen = min( len( cStr1 ), len( cStr2 ) )
    cPrefix = ""
    for i in range( nLen ):
        c1 = cStr1[i]
        c2 = cStr2[i]
        if c1 == c2:
            cPrefix += c1
        else:
            break
    return cPrefix

def CommonPrefixString( *cStrs ):
    """Return the common prefix, if any, of the strings."""
    return reduce( CommonPrefixString2, cStrs )

def classifyNames(rowNames):
    keys = []
    features = []
    for row in rowNames:
        # TODO - this should do something awesome
        components = [x.strip() for x in row.split(options['separator'])]
        feature = []
        for i, c in enumerate(components):
            featureNumber = -1
            if len(keys) <= i:
                keys.append([])
                keys[i].append(c)
                featureNumber = 0
            elif c not in keys[i]:
                keys[i].append(c)
                featureNumber = len(keys[i]) - 1
            else:
                featureNumber = keys[i].index(c)
            feature.append(featureNumber)
        features.append(feature)
    # Remove all keys that only have one possible value
    keysToRemove = []
    for i, k in enumerate(keys):
        if len(k) <= 1:
            keysToRemove.append(i)
    for i in range(len(keysToRemove)):
        toRemove = keysToRemove[i] - i
        if toRemove == 0:
            keys = keys[1:]
            features = [f[1:] for f in features]
        elif toRemove == len(keys) - 1:
            keys = keys[:-1]
            features = [f[:-1] for f in features]
        else:
            del keys[toRemove]
            for f in features:
                del f[toRemove]
    labels = ['Feature %d' % x for x in range(len(keys))]
    for keyindex, keylist in enumerate(keys):
        prefix = CommonPrefixString(*keylist).strip()
        # Make sure prefix doesn't end in a number
        while len(prefix) > 0 and prefix[len(prefix)-1] in [chr(x) for x in range(48, 58)]:
            prefix = prefix[:-1]
        if prefix != '' and prefix not in labels:
            labels[keyindex] = prefix
    # Sort the keys if requested.
    if options['sort_keys']:
        for doingkey in range(len(keys)):
            transformedkeys = [(x[1], x[0]) for x in enumerate(keys[doingkey])]
            transformedkeys.sort(key=lambda pair: (naturalsort.natural_key(pair[0]), pair[1]))
            transformedindices = [x[1] for x in transformedkeys]
            transformmap = [0] * len(transformedindices)
            oldkeys = list(keys[doingkey])
            for i in range(len(transformedindices)):
                transformmap[transformedindices[i]] = i
                keys[doingkey][i] = oldkeys[transformedindices[i]]
            # Fix up the features
            for f in features:
                f[doingkey] = transformmap[f[doingkey]]
    return keys, features, labels

def stripLeading(target, label, index):
    if label == 'Feature %d' % index:
        return target
    return target[len(label):]

def makeBaselineSelect(column, dataKeys, datas):
    thisData = datas[column]
    toReturn = '<select name="baseline%d" id="baseline%d">' % (thisData['index'], thisData['index'])
    if 'isBaseline' in thisData and thisData['isBaseline'] == True:
        baseline = None
    else:
        baseline = thisData['baseline']
    toReturn += '<option value="-1" %s>None</option>' % ("selected=\"selected\"" if baseline == None else "")
    for key in dataKeys:
        if key != column:
            isSelected = (baseline == key)
            toReturn += '<option value="%d" %s>%s</option>' % (datas[key]['index'], "selected=\"selected\"" if isSelected else "", key)
    toReturn += '</select>'
    return toReturn

def analyzefile(filecontents, newOptions):
    options.update(newOptions)
    rowNames = []
    trials = []
    datas = {}
    # Read the data from the file
    csvReader = csv.reader(filecontents)
    first = True
    baseline = None
    for row in csvReader:
        #print row
        if first:
            headers = [x.strip() for x in row]
            lastContiguousHeader = None
            for i in range(len(headers)):
                h = headers[i]
                if h != '':
                    datas[h] = {'index': i, 'data': []}
                    if first:
                        baseline = h
                        first = False
                        datas[h]['isBaseline'] = True
                    else:
                        trials.append(h)
                    if lastContiguousHeader == None:
                        lastContiguousHeader = h
                        datas[h]['baseline'] = baseline
                    else:
                        datas[h]['baseline'] = lastContiguousHeader
                        datas[lastContiguousHeader]['isBaseline'] = True
                else:
                    lastContiguousHeader = None
        else:
            if len(row) > 0 and row[0].strip() != '':
                rowNames.append(row[0].strip())
                for h in datas:
                    datas[h]['data'].append(float(row[datas[h]['index']]))
    #print datas
    # Classify the row names.
    keys, features, labels = classifyNames(rowNames)
    originalLabels = list(labels)
    for i in range(len(labels)):
        if 'label%d' % i in options:
            labels[i] = options['label%d' % i]
    # Fix up baseline comparisons.
    for key in datas:
        optionToCheck = 'baseline%d' % datas[key]['index']
        if optionToCheck in options:
            try:
                newBaseline = int(options[optionToCheck])
                if newBaseline == -1:
                    datas[key]['isBaseline'] = True
                    if 'baseline' in datas[key]:
                        del datas[key]['baseline']
                else:
                    # Find what should be its baseline
                    baselineKey = None
                    for bKey in datas:
                        if datas[bKey]['index'] == newBaseline:
                            baselineKey = bKey
                    if baselineKey != None:
                        if 'isBaseline' in datas[key]:
                            del datas[key]['isBaseline']
                        datas[key]['baseline'] = baselineKey
            except ValueError:
                # bad baseline, skip it
                pass

    # Now, consolidate the data into vectors.
    allRatios = {}
    for h in datas:
        if 'isBaseline' not in datas[h]:
            allRatios[h] = []
            thisBaseline = baseline
            if 'baseline' in datas[h]:
                thisBaseline = datas[h]['baseline']
            for (base, new) in zip(datas[thisBaseline]['data'], datas[h]['data']):
                # FFV - is this formula right?
                allRatios[h].append(base*100/new) 
    # Using keys, features, labels, allRatios
    print('<!DOCTYPE html><html lang="en"><head>')
    print('<meta charset="utf-8">')
    print('<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js" type="text/javascript"></script>')
    print('<script type="text/javascript">')
    print(' function changeToggleName() { var link = $("#control" + this.id); if (link.text() == "+") { link.text("-"); } else { link.text("+"); } }')
    print('</script>')
    print('<style type="text/css">')
    print(' a.controllink { font-family: monospace; }')
    print('</style>')
    print('<title>%s</title></head><body>' % options['title'])
    print('<h1>%s</h1>' % options['title'])
    print('<p><a href="javascript:void(0)" class="controllink" id="controlchangeoptions">%s</a> Change options</p>' % ('-' if 'firstTime' in options else '+'))
    print('<form style="%s" id="changeoptions" method="post" enctype="multipart/form-data" action="doanalysis.cgi">' % ('' if 'firstTime' in options else 'display: none;'))
    print('<p><label for="num_multiple_trials">Number of trials per data point (for decision tree):</label> <input type="text" name="num_multiple_trials" id="num_multiple_trials" size="5" value="%d"></p>' % options['num_multiple_trials'])
    print('<p><label for="title">Report title:</label> <input type="text" name="title" id="title" size="80" value="%s"></p>' % options['title'])
    filecontentsstr = base64.b64encode('\n'.join(filecontents).encode('utf-8')).decode('ascii')
    print('<p>Feature labels:</p>')
    print('<ul>')
    for labelindex, label in enumerate(labels):
        print('<li><input type="text" name="label%d" id="label%d" value="%s"> - label for values %s</li>' % (labelindex, labelindex, label, ','.join(keys[labelindex])))
    print('</ul>')
    print('<p>Baseline comparisons:</p>')
    print('<ul>')
    dataKeys = list(datas.keys())
    dataKeys.sort(key=lambda x: datas[x]['index'])
    for column in dataKeys:
        print('<li><label for="baseline%d">%s:</label> %s</li>' % (datas[column]['index'], column, makeBaselineSelect(column, dataKeys, datas)))
    print('</ul>')
    print('<p><label for="ctree_mincriterion">Min p-value for significance in decision tree (from 0-1):</label> <input type="text" name="ctree_mincriterion" id="ctree_mincriterion" size="5" value="%s"></p>' % options['ctree_mincriterion'])
    print('<p><label for="sort_keys">Sort keys?</label><input type="checkbox" name="sort_keys" id="sort_keys" %s></p>' % ('checked="checked"' if options['sort_keys'] else ''))
    print('<p><label for="boxplot_width">Boxplot size:</label><input type="text" name="boxplot_width" id="boxplot_width" value="%d" size="3">x<input type="text" name="boxplot_height" id="boxplot_height" value="%d" size="3"></p>' % (options['boxplot_width'], options['boxplot_height']))
    print('<p><label for="decisiontree_width">Decision tree size:</label><input type="text" name="decisiontree_width" id="decisiontree_width" value="%d" size="3">x<input type="text" name="decisiontree_height" id="decisiontree_height" value="%d" size="3"></p>' % (options['decisiontree_width'], options['decisiontree_height']))
    print('<p><label for="separator">Separator for different features:</label> <input type="text" name="separator" id="separator" size="3" value="%s"></p>' % options['separator'])
    print('<input type="hidden" name="filecontents" id="filecontents" value="%s">' % filecontentsstr)
    print('<p><input type="submit" value="Reanalyze"></p>')
    print('</form>')
    print('<p>Benchmark results:</p>')
    print('<ul>')
    if not os.path.exists('tempimages'):
        os.mkdir('tempimages')
    for name in os.listdir('tempimages'):
        os.remove(os.path.join('tempimages', name))
    robjects.r('suppressMessages(library(party))')
    for hindex, h in enumerate(trials):
        if h in allRatios:
            ratios = allRatios[h]
            mi, max, avg = minMaxAvg(ratios)
            thisBaseline = datas[h]['baseline']
            print('<li><a href="javascript:void(0)" class="controllink" id="controltopresults%d">+</a> %s: average <b>%f%%</b> median <b>%f%%</b> (against %s)' % (hindex, h, avg, median(ratios), thisBaseline))
            # Try splitting on different keys
            keyData = []
            for i, unused in enumerate(keys):
                keyData.append([])
                for j in range(len(keys[i])):
                    keyData[i].append([])
                for j, f in enumerate(features):
                    val = f[i]
                    keyData[i][val].append(ratios[j])
            for i, unused in enumerate(keys):
                globalMin = 1000
                globalMax = -1000
                for j, k in enumerate(keys[i]):
                    mi, max, avg = minMaxAvg(keyData[i][j])
                    if (mi < globalMin):
                        globalMin = mi
                    if (max > globalMax):
                        globalMax = max
            filename = makeSafeFilename(h + 'total.png')
            robjects.r('png("tempimages/%s", width=%d, height=%d)' % (filename, options['boxplot_width'], options['boxplot_height']))
            robjects.r('par(mai = c(.5,0,0,0), bty="n")')
            robjects.r('x <- c(%s)' % ','.join([str(k) for k in ratios]))
            robjects.r('boxplot(x, horizontal=TRUE, ylim=c(%f, %f))' % (globalMin, globalMax))
            robjects.r('dev.off()')
            print('<br><img src="%s">' % getDataURL('tempimages/' + filename))

            print('<ul id="topresults%d" style="display: none;">' % hindex)
            pngfilename = makeSafeFilename(h + 'ctree.png')
            robjects.r('png("tempimages/%s", width=%d, height=%d)' % (pngfilename, options['decisiontree_width'], options['decisiontree_height']))
            # Create the data table file
            datatablefilename = makeSafeFilename(h + 'datatable.txt')
            tempfile = open('tempimages/%s' % datatablefilename, 'w')
            tempfile.write('\t'.join(labels))
            tempfile.write('\ttime')
            tempfile.write('\n')
            for findex, f in enumerate(features):
                towrite = [stripLeading(keys[index][feature], originalLabels[index], index) for index, feature in zip(list(range(len(f))), f)]
                towrite = [x if x != '' else '(none)' for x in towrite]
                towrite.append(ratios[findex])
                towrite = [str(x) for x in towrite]
                for i in range(options['num_multiple_trials']):
                    tempfile.write('\t'.join(towrite))
                    tempfile.write('\n')
            tempfile.close()
            robjects.r('benchmarkdata <- read.table("tempimages/%s", header=TRUE, sep="\\t")' % datatablefilename)
            robjects.r('testplot <- ctree(time ~ ., data=benchmarkdata, controls=ctree_control(mincriterion=%f))' % options['ctree_mincriterion'])
            robjects.r('plot(testplot, tp_args=list(id=FALSE), ip_args=list(id=FALSE))')
            robjects.r('dev.off()')
            print('<p>Decision tree of significance:</p><br><img src="%s">' % getDataURL('tempimages/' + pngfilename))
            for i, unused in enumerate(keys):
                print('<li><a href="javascript:void(0)" class="controllink" id="controltopresults%dfeature%d">-</a> %s</li>' % (hindex, i, labels[i]))
                print('<ul id="topresults%dfeature%d">' % (hindex, i))
                for j, k in enumerate(keys[i]):
                    print('<li>%s: average <b>%f%%</b> median <b>%f%%</b>' % (k, sum(keyData[i][j])/float(len(keyData[i][j])), median(keyData[i][j])))
                    filename = makeSafeFilename(h + str(i) + k + '.png')
                    robjects.r('png("tempimages/%s", width=800, height=100)' % filename)
                    robjects.r('par(mai = c(.5,0,0,0), bty="n")')
                    robjects.r('x <- c(%s)' % ','.join([str(k) for k in keyData[i][j]]))
                    robjects.r('boxplot(x, horizontal=TRUE, ylim=c(%f, %f))' % (globalMin, globalMax))
                    robjects.r('dev.off()')
                    print('<br><img src="%s">' % getDataURL('tempimages/' + filename))
                    print('</li>')
                print('</ul>')
            print('</ul>')
            print('</li>')
    print('</ul>')
    print('<script type="text/javascript">')
    print(' $(document).ready(function() {')
    print('    $(".controllink").click(function() { $(\'#\' + this.id.substring(7)).toggle(\'fast\', changeToggleName); });')
    print(' });')
    print('</script>')
    print('</body></html>')


def main(filename):
    f = open(filename)
    contents = f.readlines()
    f.close()
    analyzefile(contents, {})


if (__name__ == '__main__'):
    if (len(sys.argv) > 1):
        name = sys.argv[1]
    else:
        #name = 'colorhistogrambenchmarks.csv'
        name = 'PM multicore benchmarks.csv'
    main(name)