#!/bin/sh
# ----------------------------------------------------------------------
#  RPTIMES
#
#  Scans through a series of run.xml files and puts their runtime info
#  into a SQLite database.  This database can be used to predict
#  future runtimes for runs with similar parameters.
#
#    USAGE:
#    % rptimes <run.xml> ?<run.xml>...?
#    % rptimes -v <sqliteDB>
#
#  Exits with status 0 if successful, and non-zero if any run.xml
#  files cannot be processed.
#
#  Creates an SQLite DB for each unique tool referenced by a run.xml
#  file.  Each DB file contains a table of parameters and a table of
#  "jobs" info.  The jobs table includes CPU time and Wall time
#  measurements.
#
# ======================================================================
#  AUTHOR:  Michael McLennan, Purdue University
#  Copyright (c) 2004-2013  HUBzero Foundation, LLC
#
#  See the file "license.terms" for information on usage and
#  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# ======================================================================
# \
exec tclsh "$0" ${1+"$@"}
# tclsh executes the rest...

package require Rappture
package require sqlite3
package require md5
package require base64

if {[llength $argv] < 1} {
    puts stderr "USAGE: rptimes run.xml ?run.xml...?"
    puts stderr "USAGE: rptimes -v tool.sql3"
    exit 1
}

set op "ingest"
while {[llength $argv] > 0} {
    set flag [lindex $argv 0]
    if {![string match -* $flag]} {
        break
    }
    switch -- $flag {
        -v {
            set op "view"
            if {[llength $argv] < 2} {
                puts stderr "USAGE: rptimes -v tool.sql3"
                exit 1
            }
            set dbfile [lindex $argv 1]
            set argv [lrange $argv 2 end]
        }
        default {
            puts stderr "bad flag \"$flag\": should be -v"
            exit 1
        }
    }
}

# ----------------------------------------------------------------------
# USAGE: normval <libObj> <rappturePath> ?-default?
#
# Returns information about the normalized value of a particular input
# in the Rappture run <libObj> at the specified <rappturePath> for the
# input.  The optional -default switch causes the processing to apply
# to the default value instead of the current value.
#
# Returns info as a list of values in the form:
#
#   <rappturePath> <type> <normValue>
#   <rappturePath2> <type2> <normValue2>
#   ...
#
# Most inputs return just 3 values: path/type/value.  But some inputs
# split into multiple parts and return several triples.  Images, for
# example, return a hash for the image itself, but also width/height
# parameters.  The width/height may actually be the better predictors
# of CPU time required.
# ----------------------------------------------------------------------
proc normval {libobj path {what "-current"}} {
    set rlist ""

    # get the default or the current raw value
    set raw ""
    switch -- $what {
        -default {
            set raw [$libobj get $path.default]
        }
        -current {
            if {[$libobj element $path.current] ne ""} {
                set raw [$libobj get $path.current]
            } else {
                set raw [$libobj get $path.default]
            }
        }
        default {
            error "bad option \"$what\": should be -current or -default"
        }
    }

    # normalize the value depending on the type
    switch -- [$libobj element -as type $path] {
        integer {
            lappend rlist $path "INTEGER" $raw
        }
        number {
            set norm ""
            if {$raw ne ""} {
                # then normalize to default units
                set units [$libobj get $path.units]
                if {$units ne ""} {
                    set norm [Rappture::Units::convert $raw \
                        -context $units -to $units -units off]
                }
            }
            lappend rlist $path "REAL" $norm
        }
        boolean - choice - loader - periodicelement {
            lappend rlist $path "TEXT" $raw
        }
        image {
            # convert long string inputs into a unique (short) hash
            set norm [base64::encode [md5::md5 $raw]]
            if {[catch {package require Img; wm withdraw .} result]} {
                error "can't analyze <image> types: $result"
            }
            if {[catch {image create photo -data $raw} result]} {
                error "can't analyze <image> data: $result"
            }
            set width [image width $result]
            set height [image height $result]

            lappend rlist $path "TEXT" $norm
            lappend rlist $path-WIDTH "INTEGER" $width
            lappend rlist $path-HEIGHT "INTEGER" $height
        }
        string {
            # convert long string inputs into a unique (short) hash
            set norm [base64::encode [md5::md5 $raw]]
            lappend rlist $path "TEXT" $norm
        }
        structure {
            # oops! structure doesn't have a clear .current value
            # use the XML dump of the data as its "value"
            if {$what eq "-current"} {
                if {[catch {$libobj xml $path.current} raw]} {
                    if {[catch {$libobj xml $path.default} raw]} {
                        set raw ""
                    }
                }
            } elseif {[catch {$libobj xml $path.default} raw]} {
                set raw ""
            }
            set norm [base64::encode [md5::md5 $raw]]
            lappend rlist $path "TEXT" $norm
        }
        default {
            # for anything else, punt and use the raw value
            lappend rlist $path "TEXT" $raw
        }
    }
    return $rlist
}

# ----------------------------------------------------------------------
# USAGE: escapeQuotes <string>
#
# Escapes single quotes in the given <string> by escaping the quote.
# In SQLite, this is done by doubling the quote.  This makes it
# possible to embed the string in another quoted string like
# 'hello ''world'''.  Returns a string with escaped quotes.
# ----------------------------------------------------------------------
proc escapeQuotes {str} {
    regsub -all {([^\'])\'} $str {\1''} str
    return $str
}

#
# Handle "view" operation
#
if {$op eq "view"} {
    sqlite3 db $dbfile

    puts "PARAMETERS:"
    db eval {SELECT nickName,rappturePath,type FROM parameters;} data {
        puts " - $data(nickName) ($data(type)) = $data(rappturePath)"
    }

    puts "\nJOBS:"
    set n 0
    db eval {SELECT runToken,date,cpuTime,wallTime FROM jobs;} data {
        puts [format " [incr n]) [clock format $data(date)]:  %6.2f CPU, %6.2f Wall" $data(cpuTime) $data(wallTime)]
    }

    catch {db close}
    exit 0
}

#
# Run through each run.xml file and load params and execution time
#
set status 0
foreach fname $argv {
    set db ""
    set err ""
    if {[catch {Rappture::library $fname} libobj]} {
        set err "failed: $fname ($libobj)"
    }

    set app [$libobj get tool.id]
    set rev [$libobj get tool.version.application.revision]
    if {$err eq ""} {
        if {$app eq "" || $rev eq ""} {
            set err "failed: $fname (missing app info -- is tool deployed?)"
        } else {
            set dbfile "${app}_r$rev.sql3"
            if {![file exists $dbfile]} {
                sqlite3 db $dbfile
                db eval {CREATE TABLE parameters(nickName TEXT PRIMARY KEY, rappturePath TEXT, defValue TEXT, type TEXT);}
                db eval {CREATE TABLE jobs(runToken TEXT, date TEXT, cpuTime REAL, wallTime REAL, nCpus INTEGER, venue TEXT);}
            } else {
                sqlite3 db $dbfile
            }
        }
    }

    if {$err eq "" && [set cput [$libobj get output.cputime]] eq ""} {
        set err "failed: $fname (missing cpu time)"
    }
    if {$err eq "" && [set wallt [$libobj get output.walltime]] eq ""} {
        set err "failed: $fname (missing wall time)"
    }

    set date "?"
    if {$err eq "" && [$libobj get output.time] ne "" && [catch {clock scan [$libobj get output.time]} d] == 0} {
        set date $d
    }

    if {$err eq "" && [$libobj get output.venue.name] ne ""} {
        set venue [$libobj get output.venue.name]
    } else {
        set venue ""
    }

    if {$err eq "" && [$libobj get output.venue.ncpus] ne ""} {
        set ncpus [$libobj get output.venue.ncpus]
    } else {
        set ncpus 1
    }

    if {$err eq ""} {
        set runtoken [base64::encode [md5::md5 [$libobj xml]]]

        set cols ""
        set vals ""
        foreach path [Rappture::entities $libobj input] {
            # get the nickname (column name) for this paraemter
            set id [db eval "SELECT nickName from parameters where rappturePath='$path'"]
            if {$id eq ""} {
                # haven't seen this parameter before -- add it
                foreach {rp type def} [normval $libobj $path -default] {
                    set num [db eval "SELECT COUNT(nickName) from parameters;"]
                    set id [format "x%03d" [incr num]]

                    db eval "INSERT INTO parameters values('$id','$rp','[escapeQuotes $def]','$type')"
                    db eval "ALTER TABLE jobs ADD COLUMN $id $type;"
                }
            }

            # add the current value onto the values we're building up for ALTER
            foreach {raw norm} [Rappture::LibraryObj::value $libobj $path] break

            foreach {rp type val} [normval $libobj $path] {
                set num [db eval "SELECT COUNT(nickName) from parameters;"]
                set id [db eval "SELECT nickName FROM parameters WHERE rappturePath='$rp'"]
                if {$id eq ""} {
                    set err "INTERNAL ERROR: couldn't find nickName for existing parameter $rp"
                    break
                }
                lappend cols $id
                if {$type eq "TEXT"} {
                    lappend vals '[escapeQuotes $val]'
                } elseif {$val ne ""} {
                    lappend vals $val
                } else {
                    lappend vals ''
                }
            }
        }

        if {$err eq ""} {
            # add the info for this job
            db eval "DELETE from jobs WHERE runToken='$runtoken';"
            db eval "INSERT INTO jobs (runToken,date,cpuTime,wallTime,nCpus,venue,[join $cols ,]) values ('$runtoken','$date',$cput,$wallt,$ncpus,'$venue',[join $vals ,]);"
        }
    }

    if {$err ne ""} {
        puts stderr $err
        set status 1
    }
    catch {db close}
}

exit $status
