#!/usr/bin/env python
#
# @package      hubzero-submit-monitors
# @file         BatchMonitors/monitorBOINC.py
# @author       Steven Clark <clarks@purdue.edu>
# @copyright    Copyright (c) 2004-2015 HUBzero Foundation, LLC.
# @license      http://opensource.org/licenses/MIT MIT
#
# Copyright (c) 2004-2015 HUBzero Foundation, LLC.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# HUBzero is a registered trademark of HUBzero Foundation, LLC.
#
# ----------------------------------------------------------------------
#  monitorBOINC.py
#
#  script which monitors the BOINC queue and reports changes in job status
#
import sys
import os
import select
import subprocess
import re
import signal
import socket
import json
import traceback
import xml.etree.ElementTree as elementTree

from LogMessage import openLog, log
from submit_api import *

SITEDESIGNATOR     = ""
USERNAME           = os.getenv("USER")
MONITORROOT        = os.path.dirname(os.path.abspath(__file__))
BOINCPROJECTURL    = ''
BOINCAUTHENTICATOR = ''
MONITORLOGLOCATION = os.path.join(MONITORROOT,'log')
MONITORLOGFILENAME = "monitorBOINC.log"
LOGPATH            = os.path.join(MONITORLOGLOCATION,MONITORLOGFILENAME)
HISTORYFILENAME    = "monitorBOINC.history"
HISTORYFILEPATH    = os.path.join(MONITORROOT,HISTORYFILENAME)

SLEEPTIME       = 60
PAUSETIME       = 5.
MAXIMUMIDLETIME = 30*60


class QueueMonitor:
   def __init__(self,
                siteDesignator,
                userName,
                boincProjectURL,
                boincAuthenticator,
                historyFilePath,
                sleepTime,
                pauseTime,
                maximumIdleTime):
      self.siteDesignator                = siteDesignator
      self.userName                      = userName
      self.boincProjectURL               = boincProjectURL
      self.boincAuthenticator            = boincAuthenticator
      self.historyFilePath               = historyFilePath
      self.sleepTime                     = sleepTime
      self.pauseTime                     = pauseTime
      self.maximumConsecutiveEmptyQueues = maximumIdleTime/sleepTime

      self.historyFile = None
      self.updates     = []
      self.activeJobs  = {}

      signal.signal(signal.SIGINT,self.sigINT_handler)
      signal.signal(signal.SIGHUP,self.sigHUP_handler)
      signal.signal(signal.SIGQUIT,self.sigQUIT_handler)
      signal.signal(signal.SIGABRT,self.sigABRT_handler)
      signal.signal(signal.SIGTERM,self.sigTERM_handler)


   def cleanup(self):
      if self.historyFile:
         self.historyFile.close()
         self.historyFile = None


   def sigGEN_handler(self,
                      signalNumber,
                      frame):
      self.cleanup()
      log("%s signal monitor stopped" % (self.siteDesignator))
      sys.exit(1)


   def sigINT_handler(self,
                      signalNumber,
                      frame):
      log("Received SIGINT!")
      self.sigGEN_handler(signalNumber,frame)


   def sigHUP_handler(self,
                      signalNumber,
                      frame):
      log("Received SIGHUP!")
      self.sigGEN_handler(signalNumber,frame)


   def sigQUIT_handler(self,
                       signalNumber,
                       frame):
      log("Received SIGQUIT!")
      self.sigGEN_handler(signalNumber,frame)


   def sigABRT_handler(self,
                       signalNumber,
                       frame):
      log("Received SIGABRT!")
      self.sigGEN_handler(signalNumber,frame)


   def sigTERM_handler(self,
                       signalNumber,
                       frame):
      log("Received SIGTERM!")
      self.sigGEN_handler(signalNumber,frame)


   def openHistory(self,
                   accessMode):
      if accessMode == 'r':
         if os.path.isfile(self.historyFilePath):
            self.historyFile = open(self.historyFilePath,accessMode)
         else:
            self.historyFile = None
      else:
         self.historyFile = open(self.historyFilePath,accessMode)


   def recordHistory(self,
                     jobId):
      self.historyFile.write("%s:%s %s %s %s\n" % (self.siteDesignator,str(jobId),self.activeJobs[jobId]['status'], \
                                                                                  self.activeJobs[jobId]['stage'], \
                                                                                  self.activeJobs[jobId]['queue']))
      self.historyFile.flush()
      jobState = {'jobId':jobId,
                  'status':self.activeJobs[jobId]['status'],
                  'stage':self.activeJobs[jobId]['stage'],
                  'queue':self.activeJobs[jobId]['queue'],
                  'tailFiles':{}}
      if 'tailFiles' in self.activeJobs[jobId]:
         for tailFile in self.activeJobs[jobId]['tailFiles']:
            if self.activeJobs[jobId]['status'] == 'D':
               text = ""
            else:
               tailPath = self.activeJobs[jobId]['tailFiles'][tailFile]['path']
               if tailPath:
                  nLines          = self.activeJobs[jobId]['tailFiles'][tailFile]['nLines']
                  lastEndPosition = self.activeJobs[jobId]['tailFiles'][tailFile]['endPosition']
                  text,endPosition = self.__tailFile(tailPath,nLines,lastEndPosition)
                  self.activeJobs[jobId]['tailFiles'][tailFile]['endPosition'] = endPosition
               else:
                  text = ""
            jobState['tailFiles'][tailFile] = text

#     log(str(jobState))
      self.updates.append(jobState)


   def loadHistory(self):
      self.openHistory('r')
      if self.historyFile:
# boinc:344 X Simulation boincQ
         records = self.historyFile.readlines()
         for record in records:
            colon = record.find(':')
            if colon > 0:
               jobId,status,stage,queue = record[colon+1:].split()
               if   status == 'D':
                  if jobId in self.activeJobs:
                     del self.activeJobs[jobId]
               else:
                  self.activeJobs[jobId] = {'jobId':jobId,
                                            'status':status,
                                            'stage':stage,
                                            'queue':queue}
         self.historyFile.close()
         self.historyFile = None


   def saveHistory(self):
      self.openHistory('w')
      if self.historyFile:
         for activeJob in self.activeJobs:
            self.historyFile.write("%s:%s %s %s %s\n" % (self.siteDesignator,str(activeJob), \
                                                                             self.activeJobs[activeJob]['status'], \
                                                                             self.activeJobs[activeJob]['stage'], \
                                                                             self.activeJobs[activeJob]['queue']))

         self.historyFile.close()
         self.historyFile = None


   def executeQstatCommand(self):
      queryBatchesRequest = REQUEST()
      queryBatchesRequest.project       = self.boincProjectURL
      queryBatchesRequest.authenticator = self.boincAuthenticator
      queryBatchesRequest.get_cpu_time  = False
      queryBatchesResponse = query_batches(queryBatchesRequest)

      err = 0
      errMessage = ""
      outMessage = ""
      if queryBatchesResponse.find('error') is not None:
         err = 1
         errMessage = queryBatchesResponse.find('error').find('error_msg').text
      else:
         for batch in queryBatchesResponse.getiterator('batch'):
            batchId = batch.find('id').text
            state   = batch.find('state').text
            name    = batch.find('name').text
            isSweep = name.endswith('_0')

            queryBatchRequest = REQUEST()
            queryBatchRequest.project         = self.boincProjectURL
            queryBatchRequest.authenticator   = self.boincAuthenticator
            queryBatchRequest.get_cpu_time    = False
            queryBatchRequest.get_job_details = True
            queryBatchRequest.batch_id        = batchId
            queryBatchResponse = query_batch(queryBatchRequest)

            if queryBatchResponse.find('error') is not None:
               err = 1
               errMessage = queryBatchResponse.find('error').find('error_msg').text
               break
            else:
               if isSweep:
                  outMessage += "%s:%s\n" % (batchId,state)
               instance = 0
               for job in queryBatchResponse.getiterator('job'):
                  status = job.find('status').text
                  if isSweep:
                     instance += 1
                     outMessage += "%s.%d:%s\n" % (batchId,instance,status)
                  else:
                     outMessage += "%s:%s\n" % (batchId,status)

      return(err,outMessage,errMessage)


   def filterQstat(self,
                   qstatRecords):
      filteredRecords = []

      BATCH_STATE_INIT        = 0
      BATCH_STATE_IN_PROGRESS = 1
      BATCH_STATE_COMPLETE    = 2
      BATCH_STATE_ABORTED     = 3
      BATCH_STATE_RETIRED     = 4

      for qstatRecord in qstatRecords:
         if re.match("[0-9.]*:",qstatRecord):
            jobId,status = qstatRecord.split(':')
            if   status == 'unsent':
               status = 'PD'
            elif status == 'queued':
               status = 'PD'
            elif status == 'in_progress':
               status = 'R'
            elif status == 'error':
               status = 'D'
            elif status == 'done':
               status = 'D'
            else:
               try:
                  statusCode = int(status)
               except:
                  status = '?'
               else:
                  if   statusCode == BATCH_STATE_INIT:
                     status = 'PD'
                  elif statusCode == BATCH_STATE_IN_PROGRESS:
                     status = 'R'
                  elif statusCode == BATCH_STATE_COMPLETE:
                     status = 'D'
                  elif statusCode == BATCH_STATE_ABORTED:
                     status = 'A'
                  elif statusCode == BATCH_STATE_RETIRED:
                     status = 'RT'
                  else:
                     status = '?'

            filteredRecord = {'jobId':jobId,
                              'queue':"?",
                              'status':status,
                              'stage':'Simulation'}
            filteredRecords.append(filteredRecord)

      return(filteredRecords)


   @staticmethod
   def __tailFile(tailPath,
                  nLines,
                  lastEndPosition=0):
      text        = ""
      nTextLines  = 0
      endPosition = lastEndPosition
      bufsize = 2048
      if os.path.exists(tailPath):
         fileSize = os.stat(tailPath).st_size
         if fileSize > lastEndPosition:
            try:
               fpTail = open(tailPath,'r')
               try:
                  iter = 0
                  while True:
                     iter += 1
                     location = max(fileSize-bufsize*iter,lastEndPosition)
                     fpTail.seek(location)
                     data = fpTail.readlines()
                     endPosition = fpTail.tell()
                     if len(data) > nLines or location == lastEndPosition:
                        text = ''.join(data[-nLines:])
                        nTextLines += min(len(data),nLines)
                        if lastEndPosition > 0 and location > lastEndPosition+1:
                           text = "...\n" + text
                        del data
                        break
               except (IOError,OSError):
                  log("%s could not be read" % (tailPath))
               finally:
                  fpTail.close()
            except (IOError,OSError):
               log("%s could not be opened" % (tailPath))

      if nTextLines > 0:
         log("tailed %d lines of %s" % (nTextLines,tailPath))

      return(text,endPosition)


   def monitorQ(self):
      self.openHistory('a')
      consecutiveEmptyQueues = 0
      lastReportedActiveJobCount = -1

      toCheck = []
      toCheck.append(sys.stdin.fileno())
      while True:
         activeJobCount = len(self.activeJobs)
         if activeJobCount != lastReportedActiveJobCount:
            log("%d monitored jobs" % (activeJobCount))
         lastReportedActiveJobCount = activeJobCount

         self.updates  = []
         currentJobs   = {}
         completedJobs = []

         delayTime = 0
         while delayTime <= self.sleepTime:
            if os.getppid() == 1:
               os.kill(os.getpid(),signal.SIGTERM)

            ready = select.select(toCheck,[],[],self.pauseTime) # wait for input
            if sys.stdin.fileno() in ready[0]:
               newJob = ""
               message = sys.stdin.readline()
#              log(message)

               if message != '':
                  try:
                     centralMessage = json.loads(message)
                  except:
                     log(traceback.format_exc())
                  else:
#                    log("READ: " + str(centralMessage))
                     if centralMessage['messageType'] == 'newJobId':
                        newJob = centralMessage['remoteJobId']
                  if newJob != "":
                     self.activeJobs[newJob] = {'jobId':newJob,
                                                'status':'N',
                                                'stage':'Job',
                                                'queue':'?',
                                                'jobWorkDirectory':centralMessage.get('jobWorkDirectory','?'),
                                                'localJobId':centralMessage.get('localJobId','?'),
                                                'instanceId':centralMessage.get('instanceId','?'),
                                                'runName':centralMessage.get('runName','?'),
                                                'tailFiles':centralMessage.get('tailFiles',[])}
                     jobWorkDirectory = os.path.expandvars(os.path.expanduser(self.activeJobs[newJob]['jobWorkDirectory']))
                     if 'tailFiles' in self.activeJobs[newJob]:
                        if 'runName' in self.activeJobs[newJob]:
                           runName = self.activeJobs[newJob]['runName']
                           if 'instanceId' in self.activeJobs[newJob]:
                              instanceId = self.activeJobs[newJob]['instanceId']
                              if "#STDOUT#" in self.activeJobs[newJob]['tailFiles']:
                                 stdFile = "%s_%s.stdout" % (runName,instanceId)
                                 self.activeJobs[newJob]['tailFiles']["#STDOUT#"]['path'] = os.path.join(jobWorkDirectory,
                                                                                                         stdFile)
                                 self.activeJobs[newJob]['tailFiles']["#STDOUT#"]['endPosition'] = 0
                              if "#STDERR#" in self.activeJobs[newJob]['tailFiles']:
                                 stdFile = "%s_%s.stderr" % (runName,instanceId)
                                 self.activeJobs[newJob]['tailFiles']["#STDERR#"]['path'] = os.path.join(jobWorkDirectory,
                                                                                                         stdFile)
                                 self.activeJobs[newJob]['tailFiles']["#STDERR#"]['endPosition'] = 0
                        for tailFile in self.activeJobs[newJob]['tailFiles']:
                           if tailFile != "#STDOUT#" and tailFile != "#STDERR#":
                              self.activeJobs[newJob]['tailFiles'][tailFile]['path'] = os.path.join(jobWorkDirectory,
                                                                                                    tailFile)
                              self.activeJobs[newJob]['tailFiles'][tailFile]['endPosition'] = 0
                     self.recordHistory(newJob)
                     self.activeJobs[newJob]['status'] = 'n'
                     consecutiveEmptyQueues = 0
               else:
                  self.cleanup()
                  log("%s empty job monitor stopped" % (self.siteDesignator))
                  sys.exit(0)
            delayTime += self.pauseTime

         errStatus,qstatOutput,qstatError = self.executeQstatCommand()
         if errStatus == 0:
            jobs = self.filterQstat(qstatOutput.splitlines())
            for job in jobs:
               jobId = job['jobId']
               currentJobs[jobId] = job

            if len(currentJobs) == 0:
               consecutiveEmptyQueues += 1
            else:
               consecutiveEmptyQueues = 0

            for activeJob in self.activeJobs:
               if self.activeJobs[activeJob]['status'] == 'n':
                  self.activeJobs[activeJob]['status'] = 'N'
                  self.activeJobs[activeJob]['stage']  = 'Job'
                  self.activeJobs[activeJob]['queue']  = '?'
               else:
                  if not activeJob in currentJobs:
                     self.activeJobs[activeJob]['status'] = 'D'
                     self.recordHistory(activeJob)
                     completedJobs.append(activeJob)

            for currentJob in currentJobs:
               if not currentJob in self.activeJobs:
                  log("recordHistory: currentJob not in activeJobs")
                  self.activeJobs[currentJob] = currentJobs[currentJob]
                  self.recordHistory(currentJob)
               else:
                  somethingChanged = False
                  for key in currentJobs[currentJob]:
                     if currentJobs[currentJob][key] != self.activeJobs[currentJob][key]:
                        somethingChanged = True
                  if somethingChanged:
                     log("recordHistory: currentJob different than activeJob")
                     for key in currentJobs[currentJob]:
                        self.activeJobs[currentJob][key] = currentJobs[currentJob][key]
                     self.recordHistory(currentJob)
                  elif 'tailFiles' in self.activeJobs[currentJob]:
                     for tailFile in self.activeJobs[currentJob]['tailFiles']:
                        tailPath = self.activeJobs[currentJob]['tailFiles'][tailFile]['path']
                        if os.path.exists(tailPath):
                           fileSize = os.stat(tailPath).st_size
                           if fileSize > self.activeJobs[currentJob]['tailFiles'][tailFile]['endPosition']:
                              log("recordHistory: change in tailFiles")
                              self.recordHistory(currentJob)
                              break

               if self.activeJobs[currentJob]['status'] == 'D':
                  completedJobs.append(currentJob)

            for completedJob in completedJobs:
               del self.activeJobs[completedJob]

            del currentJobs
            del completedJobs

            if len(self.updates) > 0:
               siteMessage = {'messageType':'siteUpdate','siteDesignator':self.siteDesignator,
                              'nJobStates':len(self.updates),'jobStates':self.updates}
               try:
#                 log("WRITE: " + str(siteMessage))
                  sys.stdout.write(json.dumps(siteMessage) + '\n')
               except:
                  log("Site update failed")
                  log(traceback.format_exc())
               else:
                  sys.stdout.flush()

            del self.updates

            if self.historyFile:
               self.historyFile.close()
               self.historyFile = None
               self.saveHistory()
               self.openHistory('a')

            if consecutiveEmptyQueues == self.maximumConsecutiveEmptyQueues:
               self.cleanup()
               log("%s idle monitor stopped" % (self.siteDesignator))
               sys.exit(0)
         else:
            log("Error %d in query_batches command:\n%s" % (errStatus,qstatError))


if __name__ == '__main__':

   openLog(LOGPATH)

   if len(sys.argv) == 2:
      if SITEDESIGNATOR == "":
         siteDesignator = sys.argv[1]
      else:
         if sys.argv[1] != SITEDESIGNATOR:
            log("Site designators do not match.\n   internal name = %s\n   external name = %s" % (SITEDESIGNATOR,sys.argv[1]))
            siteDesignator = ""
         else:
            siteDesignator = sys.argv[1]
   else:
      siteDesignator = SITEDESIGNATOR

   if not siteDesignator:
      sys.exit(2)

   log("%s monitor started on %s" % (siteDesignator,socket.gethostname()))

   __queueMonitor__ = QueueMonitor(siteDesignator,USERNAME,BOINCPROJECTURL,BOINCAUTHENTICATOR,
                                   HISTORYFILEPATH,SLEEPTIME,PAUSETIME,MAXIMUMIDLETIME)

   __queueMonitor__.loadHistory()
   __queueMonitor__.saveHistory()
   __queueMonitor__.monitorQ()


