#!/usr/bin/env python
#
# Copyright (c) 2004-2010 Purdue University All rights reserved.
# 
# Developed by: HUBzero Technology Group, Purdue University
#               http://hubzero.org
# 
# HUBzero is free software: you can redistribute it and/or modify it under the terms of the
# GNU Lesser General Public License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
# 
# HUBzero is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Lesser General Public License for more details.  You should have received a
# copy of the GNU Lesser General Public License along with HUBzero.
# If not, see <http://www.gnu.org/licenses/>.
# 
# GNU LESSER GENERAL PUBLIC LICENSE
# Version 3, 29 June 2007
# Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
#
# ----------------------------------------------------------------------
#  monitorPBS.py
#
#  script which monitors the PBS queue and reports changes in job status
#
import sys
import os
import os.path
import select
import time
import popen2
import re
import signal

siteDesignator     = "pbsHost"
monitorRoot        = os.path.join(os.sep,'home','pbsUser','Submit','pbsHost')
qstatCommand       = "/usr/pbs/bin/qstat -u pbsUser"
monitorLogLocation = "logs"
monitorLogFileName = "monitorPBS.log"
historyFileName    = "monitorPBS.history"

logFile            = sys.stdout
historyFile        = None
activeJobs         = {}
updates            = []


def cleanup():
   global historyFile

   if historyFile:
      historyFile.close()


def sigGEN_handler(signal, frame):
   global siteDesignator

   cleanup()
   log("%s monitor stopped" % (siteDesignator))
   sys.exit(1)

def sigINT_handler(signal, frame):
   log("Received SIGINT!")
   sigGEN_handler(signal, frame)

def sigHUP_handler(signal, frame):
   log("Received SIGHUP!")
   sigGEN_handler(signal, frame)

def sigQUIT_handler(signal, frame):
   log("Received SIGQUIT!")
   sigGEN_handler(signal, frame)

def sigABRT_handler(signal, frame):
   log("Received SIGABRT!")
   sigGEN_handler(signal, frame)

def sigTERM_handler(signal, frame):
   log("Received SIGTERM!")
   sigGEN_handler(signal, frame)

#def sigSTOP_handler(signal, frame):
#   log("Received SIGSTOP!")
#   sigGEN_handler(signal, frame)

#def sigKILL_handler(signal, frame):
#   log("Received SIGKILL!")
#   sigGEN_handler(signal, frame)

signal.signal(signal.SIGINT, sigINT_handler)
signal.signal(signal.SIGHUP, sigHUP_handler)
signal.signal(signal.SIGQUIT, sigQUIT_handler)
signal.signal(signal.SIGABRT, sigABRT_handler)
signal.signal(signal.SIGTERM, sigTERM_handler)
#signal.signal(signal.SIGSTOP, sigSTOP_handler)
#signal.signal(signal.SIGKILL, sigKILL_handler)


def log(message):
   global logFile

   if message != "":
      logFile.write("[%s] %s\n" % (time.ctime(),message))
      logFile.flush()


def recordHistory(id):
   global updates
   global activeJobs

   historyFile.write("%s:%s %s %s\n" % (siteDesignator,str(id),activeJobs[id][0],activeJobs[id][1]))
   historyFile.flush()
   updates.append(str(id) + " " + activeJobs[id][0] + " " + activeJobs[id][1])


def openLog(logName):
   global logFile

   try:
      logFile = open(logName,"a")
   except:
      logFile = sys.stdout


def openHistory(historyName,
                accessMode):
   global historyFile

   if accessMode == "r":
      if os.path.isfile(historyName):
         historyFile = open(historyName,accessMode)
      else:
         historyFile = None
   else:
      historyFile = open(historyName,accessMode)


def getCommandOutput(command,
                     streamOutput=False):
   child = popen2.Popen3(command,1)
   child.tochild.close() # don't need to talk to child
   childout = child.fromchild
   childoutFd = childout.fileno()
   childerr = child.childerr
   childerrFd = childerr.fileno()

   outEOF = errEOF = 0
   BUFSIZ = 4096

   outData = []
   errData = []

   while 1:
      toCheck = []
      if not outEOF:
         toCheck.append(childoutFd)
      if not errEOF:
         toCheck.append(childerrFd)
      ready = select.select(toCheck,[],[]) # wait for input
      if childoutFd in ready[0]:
         outChunk = os.read(childoutFd,BUFSIZ)
         if outChunk == '':
            outEOF = 1
         outData.append(outChunk)
         if streamOutput:
            sys.stdout.write(outChunk)
            sys.stdout.flush()

      if childerrFd in ready[0]:
         errChunk = os.read(childerrFd,BUFSIZ)
         if errChunk == '':
            errEOF = 1
         errData.append(errChunk)
         if streamOutput:
            sys.stderr.write(errChunk)
            sys.stderr.flush()

      if outEOF and errEOF:
         break

   err = child.wait()
   if err != 0:
      log("%s failed w/ exit code %d" % (command,err))
      if not streamOutput:
         log("%s" % ("".join(errData)))

   return  err,"".join(outData),"".join(errData)


if __name__ == '__main__':

   if monitorLogFileName != "stdout":
      openLog(os.path.join(monitorRoot,monitorLogLocation,monitorLogFileName))

   log("%s monitor started" % (siteDesignator))

   sleepTime = 10
   pauseTime = 5.
   maximumConsectutiveEmptyQueues = 30*60/sleepTime

   openHistory(os.path.join(monitorRoot,historyFileName),"r")
   if historyFile:
#  lPBS:6760 R
#  --------------------
      records = historyFile.readlines()
      for record in records:
         colon = record.find(":")
         if colon > 0:
            jobState = record[colon+1:].split()
            id = jobState[0]
            status = jobState[1]
            stage  = "Simulation"
            activeJobs[id] = (status,stage)
      historyFile.close()

      completedJobs = []
      for activeJob in activeJobs:
         if activeJobs[activeJob][0] == "D":
            completedJobs.append(activeJob)

      for completedJob in completedJobs:
         del activeJobs[completedJob]

   openHistory(os.path.join(monitorRoot,historyFileName),"a")
   consectutiveEmptyQueues = 0

   toCheck = []
   toCheck.append(sys.stdin.fileno())
   while 1:
      updates       = []
      currentJobs   = {}
      completedJobs = []

      delayTime = 0
      while delayTime <= sleepTime:
         if os.getppid() == 1:
            os.kill(os.getpid(),signal.SIGTERM)

         ready = select.select(toCheck,[],[],pauseTime) # wait for input
         if sys.stdin.fileno() in ready[0]:
            newJob = sys.stdin.readline().strip()
            if newJob != "":
               if not newJob in activeJobs:
                  activeJobs[newJob] = ("N","Job")
                  recordHistory(newJob)
               consectutiveEmptyQueues = 0
         delayTime += pauseTime

   # $ qstat
   #  Job id              Name             User            Time Use S Queue
   #  ------------------- ---------------- --------------- -------- - -----
   #  6759.vma111         Nanowire-51394L  biswajit025     00:00:00 R workq          
   #  6769.vma111         Nanowire-51411L  mmclennan       00:00:00 R workq          

   # $ qstat -u
   #                                                                    Req'd  Req'd   Elap
   # Job ID               Username Queue    Jobname    SessID NDS   TSK Memory Time  S Time
   # -------------------- -------- -------- ---------- ------ ----- --- ------ ----- - -----
   # 8901.vma111.punch.pu saumitra workq    Nanowire-5  12530     1  --    --  08:00 R 05:56

      jobs = getCommandOutput(qstatCommand)[1].splitlines()
      for job in jobs:
         if re.match("[0-9]*\.",job):
            jobState = job.split()
            id = jobState[0].split('.')[0]
            status = jobState[len(jobState)-2]
            stage  = "Simulation"
            currentJobs[id] = (status,stage)

      if len(currentJobs) == 0:
         consectutiveEmptyQueues += 1
      else:
         consectutiveEmptyQueues = 0

      for activeJob in activeJobs:
         if not activeJob in currentJobs:
            activeJobs[activeJob] = ("D",activeJobs[activeJob][1])
            recordHistory(activeJob)
            completedJobs.append(activeJob)

      for currentJob in currentJobs:
         if   not currentJob in activeJobs:
            activeJobs[currentJob] = currentJobs[currentJob]
            recordHistory(currentJob)
         elif currentJobs[currentJob] != activeJobs[currentJob]:
            activeJobs[currentJob] = currentJobs[currentJob]
            recordHistory(currentJob)
         if activeJobs[currentJob][0] == "D":
            completedJobs.append(currentJob)

      for completedJob in completedJobs:
         del activeJobs[completedJob]

      del currentJobs
      del completedJobs

      if len(updates) > 0:
         updateMessage = str(len(updates)) + " " + siteDesignator + ":" + ":".join(updates)
         sys.stdout.write("%s\n" % (updateMessage))
         sys.stdout.flush()

      del updates

      if consectutiveEmptyQueues == maximumConsectutiveEmptyQueues:
         cleanup()
         log("%s monitor stopped" % (siteDesignator))
         sys.exit(0)

