# @package      hubzero-submit-common
# @file         JobMonitor.py
# @author       Steven Clark <clarks@purdue.edu>
# @copyright    Copyright (c) 2012 HUBzero Foundation, LLC.
# @license      http://www.gnu.org/licenses/lgpl-3.0.html LGPLv3
#
# Copyright (c) 2012 HUBzero Foundation, LLC.
#
# This file is part of: The HUBzero(R) Platform for Scientific Collaboration
#
# The HUBzero(R) Platform for Scientific Collaboration (HUBzero) is free
# software: you can redistribute it and/or modify it under the terms of
# the GNU Lesser General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# HUBzero is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# HUBzero is a registered trademark of HUBzero Foundation, LLC.
#

import re
import shelve
import time
import traceback

from hubzero.submit.LogMessage  import logID as log
from hubzero.submit.MessageCore import MessageCore

class JobMonitor(MessageCore):
   def __init__(self,
                host,
                port,
                repeatDelay=5,
                fixedBufferSize=64,
                activeJobDBPath="monitorJobDB",
                activeJobDumpPath=""):
      bindLabel = "%s:%d" % (host if host else "localhost",port)
      MessageCore.__init__(self,bindHost=host,bindPort=port,bindLabel=bindLabel,repeatDelay=repeatDelay)
      self.fixedBufferSize   = fixedBufferSize
      self.activeJobDBPath   = activeJobDBPath
      self.activeJobDumpPath = activeJobDumpPath
      self.activeJobs        = {}
      self.activeJobSites    = {}
      self.localToGlobalMap  = {}


   def loadActiveJobs(self):
      log("loading active jobs")
      if self.activeJobDBPath != "":
         try:
            self.activeJobs = shelve.open(self.activeJobDBPath)
            log("%d jobs loaded from DB file" % (len(self.activeJobs)))
            jobQueue   = '?'
            hubUserId  = '?'
            localJobId = '?'
            for globalJobId,activeJob in self.activeJobs.items():
               if len(activeJob) == 3:
                  jobStatus,jobStage,timeRecorded = activeJob
                  self.upgradeActiveJob(globalJobId,jobStatus,jobStage,jobQueue,hubUserId,localJobId)
            for globalJobId,activeJob in self.activeJobs.items():
               if len(activeJob) == 4:
                  jobStatus,jobStage,jobQueue,timeRecorded = activeJob
                  self.upgradeActiveJob(globalJobId,jobStatus,jobStage,jobQueue,hubUserId,localJobId)
         except IOError:
            log(traceback.format_exc())

      if self.activeJobDumpPath != "":
         dumpedJobs = []
         try:
            dumpFile = open(self.activeJobDumpPath,'r')
            dumpedJobs = dumpFile.readlines()
            dumpFile.close()
         except:
            pass

         for dumpedJob in dumpedJobs:
            timeRecorded = 0.
            jobStatus    = 'D'
            jobStage     = '?'
            jobQueue     = '?'
            hubUserId    = '?'
            localJobId   = '?'
            fields = dumpedJob.split()
            nFields = len(fields)
            if nFields > 0:
               globalJobId = fields[0]
               if nFields > 1:
                  jobStatus = fields[1]
                  if nFields > 2:
                     jobStage = fields[2]
                     if   nFields > 4:
                        jobQueue = fields[3]
                        timeRecorded = float(fields[4])
                     elif nFields > 3:
                        timeRecorded = float(fields[3])
                  if nFields > 5:
                     hubUserId = fields[5]
                     localJobId = fields[6]
               self.addActiveJob(globalJobId,jobStatus,jobStage,jobQueue,hubUserId,localJobId,timeRecorded)

         log("%d jobs loaded from dump file" % (len(dumpedJobs)))

      for globalJobId,activeJob in self.activeJobs.items():
         jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = activeJob
         self.localToGlobalMap[localJobId] = globalJobId


   def dumpActiveJobs(self):
      dumpFile = open(self.activeJobDumpPath,'w')
      for activeJob in self.activeJobs.items():
         dumpFile.write("%s %s %s %s %f %s %s\n" % (activeJob[0], \
                                                    activeJob[1][0],activeJob[1][1],activeJob[1][2],activeJob[1][3], \
                                                    activeJob[1][4],activeJob[1][5]))
      dumpFile.close()


   def close(self):
      self.activeJobs.close()
      MessageCore.close(self)


   def addActiveJobSite(self,
                        jobSite,
                        value):
      self.activeJobSites[jobSite] = value


   def deleteActiveJobSite(self,
                           jobSite):
      del self.activeJobSites[jobSite]


   def isJobSiteActive(self,
                       jobSite):
      return(jobSite in self.activeJobSites)


   def getActiveJobSiteQueues(self,
                              jobSite):
      activeJobSiteQueues = []
      for globalJobId,activeJob in self.activeJobs.items():
         site,remoteJobId = globalJobId.split(':')
         if site == jobSite:
            (jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId) = activeJob
            if jobQueue not in activeJobSiteQueues:
               activeJobSiteQueues.append(jobQueue)

      return(activeJobSiteQueues)


   def addActiveJob(self,
                    globalJobId,
                    jobStatus,
                    jobStage,
                    jobQueue,
                    hubUserId,
                    localJobId,
                    timeRecorded=0.):
      if timeRecorded > 0.:
         self.activeJobs[globalJobId] = (jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId)
      else:
         self.activeJobs[globalJobId] = (jobStatus,jobStage,jobQueue,time.time(),hubUserId,localJobId)
      self.localToGlobalMap[localJobId] = globalJobId


   def updateActiveJob(self,
                       globalJobId,
                       newJobStatus,
                       newJobStage,
                       newJobQueue):
      if globalJobId in self.activeJobs:
         jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
         self.activeJobs[globalJobId] = (newJobStatus,newJobStage,newJobQueue,time.time(),hubUserId,localJobId)
      self.localToGlobalMap[localJobId] = globalJobId


   def upgradeActiveJob(self,
                        globalJobId,
                        jobStatus,
                        jobStage,
                        jobQueue,
                        hubUserId,
                        localJobId):
      if globalJobId in self.activeJobs:
         self.activeJobs[globalJobId] = (jobStatus,jobStage,jobQueue,time.time(),hubUserId,localJobId)


   def markNewActiveJobsAsDone(self,
                               jobSite):
      siteMatch = re.compile('^'+jobSite+':')
      newJobs = filter(siteMatch.match,self.activeJobs.keys())
      for newJob in newJobs:
         jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[newJob]
         if jobStatus == 'N':
            jobStatus = 'D'
            self.updateActiveJob(newJob,jobStatus,jobStage,jobQueue)


   def purgeActiveJobs(self,
                       purgeJobStatus,
                       cutOffAge=0.):
      cutOffTime = time.time()-cutOffAge
      markedForDeletion = []
      for globalJobId in self.activeJobs:
         jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
         if jobStatus == purgeJobStatus and timeRecorded <= cutOffTime:
            markedForDeletion.append(globalJobId)

      nPurgeActiveJobs = len(markedForDeletion)
      for globalJobId in markedForDeletion:
         jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
         if localJobId in self.localToGlobalMap:
            del self.localToGlobalMap[localJobId]
         del self.activeJobs[globalJobId]
      del markedForDeletion

      return(nPurgeActiveJobs)


   def getActiveJobCount(self):
      return(len(self.activeJobs))


   def processRequest(self,
                      channel):
      channelClosed = False
      newJobSite    = ""
      newJobId      = ""

      message = self.receiveMessage(channel,self.fixedBufferSize)
      if message != "":
         if   re.match('[QSRT]:',message):
            nInstances = 0
            try:
               messageType,siteId = message.split(':')
               nWords = len(siteId.split())
               if nWords == 1:
                  localJobId = siteId
                  try:
                     globalJobId = self.localToGlobalMap[localJobId]
                     messageSite,remoteJobId = globalJobId.split(':')
                  except:
                     log("Local Job ID %s is not registered" % (localJobId))
                     messageType = ''
               elif nWords == 2:
                  messageSite,remoteJobId = siteId.split()
               elif nWords == 5:
                  messageSite,remoteJobId,hubUserId,localJobId,destination = siteId.split()
                  if localJobId.startswith('WF;'):
                     nInstances = int(localJobId.split(';')[2])
                     localJobId = localJobId.split(';')[1]
               else:
                  log("Failed %s message request: %s" % (messageType,siteId))
                  messageType = ''
            except:
               log("Failed QSRT message request: %s" % (message))
               messageType = ''
         elif re.match('[W]:',message):
            try:
               messageType,siteId = message.split(':')
               nWords = len(siteId.split())
               if nWords == 3:
                  messageSite,remoteJobId,nInstances = siteId.split()
                  nInstances = int(nInstances)
               else:
                  log("Failed %s message request: %s" % (messageType,siteId))
                  messageType = ''
            except:
               log("Failed W message request: %s" % (message))
               messageType = ''
         elif re.match('[U]:',message):
            try:
               messageType,reportHubUserId = message.strip().split(':')
            except:
               log("Failed U message request: %s" % (message))
               messageType = ''
         else:
            log("Failed message request: " + message)
            messageType = ''

         if   messageType == 'Q':                        # job Query
            globalJobId = messageSite + ':' + remoteJobId
            markReportedAsDone = False
            if globalJobId in self.activeJobs:
               jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
               if   jobStatus == 'D':
                  markReportedAsDone = True
               elif jobStatus == 'Dr':
                  jobStatus = 'D'
            else:
               jobStatus,jobStage,jobQueue = ('?','?','?')
            if self.sendMessage(channel,jobStatus + " " + jobStage,self.fixedBufferSize) > 0:
               newJobSite = messageSite
               if markReportedAsDone:
                  jobStatus = 'Dr'
                  self.updateActiveJob(globalJobId,jobStatus,jobStage,jobQueue)
         elif messageType == 'W':                        # workflow Query
            globalJobId = messageSite + ':' + remoteJobId
            markReportedAsDone = False
            if globalJobId in self.activeJobs:
               jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
               if   jobStatus == 'D':
                  markReportedAsDone = True
               elif jobStatus == 'Dr':
                  jobStatus = 'D'
            else:
               jobStatus,jobStage,jobQueue = ('?','?','?')
            wfInstances = {}
            if '.' in remoteJobId:
               wfGlobalJobIdBase = messageSite + ':' + remoteJobId.split('.')[0] + '.'
            else:
               wfGlobalJobIdBase = globalJobId + '.'
            for instance in xrange(1,nInstances+1):
               wfGlobalJobId = wfGlobalJobIdBase + str(instance)
               wfMarkReportedAsDone = False
               if wfGlobalJobId in self.activeJobs:
                  wfJobStatus,wfJobStage,wfJobQueue,wfTimeRecorded,wfHubUserId,wfLocalJobId = self.activeJobs[wfGlobalJobId]
                  if   wfJobStatus == 'D':
                     wfMarkReportedAsDone = True
                  elif wfJobStatus == 'Dr':
                     wfJobStatus = 'D'
               else:
                  wfJobStatus,wfJobStage,wfJobQueue = ('?','?','?')
               wfInstances[instance] = {}
               wfInstances[instance]['globalJobId']        = wfGlobalJobId
               wfInstances[instance]['jobStatus']          = wfJobStatus
               wfInstances[instance]['jobStage']           = wfJobStage
               wfInstances[instance]['jobQueue']           = wfJobQueue
               wfInstances[instance]['markReportedAsDone'] = wfMarkReportedAsDone
            statusMessageDelimiter = ':'
            statusMessage = jobStatus + " " + jobStage
            for instance in xrange(1,nInstances+1):
               if wfInstances[instance]['jobStatus'] != '?':
                  statusMessage += statusMessageDelimiter + str(instance) + " " + \
                                                            wfInstances[instance]['jobStatus'] + " " + \
                                                            wfInstances[instance]['jobStage']
            if self.sendMessage(channel,str(len(statusMessage)),self.fixedBufferSize) > 0:
               if self.sendMessage(channel,statusMessage) > 0:
                  newJobSite = messageSite
                  if markReportedAsDone:
                     jobStatus = 'Dr'
                     self.updateActiveJob(globalJobId,jobStatus,jobStage,jobQueue)
                  for instance in xrange(1,nInstances+1):
                     if wfInstances[instance]['markReportedAsDone']:
                        wfGlobalJobId = wfInstances[instance]['globalJobId']
                        wfJobStatus   = 'Dr'
                        wfJobStage    = wfInstances[instance]['jobStage']
                        wfJobQueue    = wfInstances[instance]['jobQueue']
                        self.updateActiveJob(wfGlobalJobId,wfJobStatus,wfJobStage,wfJobQueue)
            del wfInstances
         elif messageType == 'S':                        # new job Submission
            globalJobId = messageSite + ':' + remoteJobId
            if globalJobId in self.activeJobs:
               jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
            else:
               if nInstances > 0:
                  jobStatus,jobStage,jobQueue = ('N','DAG','?')
               else:
                  jobStatus,jobStage,jobQueue = ('N','Job','?')
               self.addActiveJob(globalJobId,jobStatus,jobStage,jobQueue,hubUserId,localJobId)
            self.dumpActiveJobs()
            if self.sendMessage(channel,jobStatus + " " + jobStage,self.fixedBufferSize) > 0:
               newJobSite = messageSite
               newJobId   = remoteJobId
               if '.' in remoteJobId:
                  wfGlobalJobIdBase = messageSite + ':' + remoteJobId.split('.')[0] + '.'
               else:
                  wfGlobalJobIdBase = globalJobId + '.'
               wfJobStatus,wfJobStage,wfJobQueue = ('WF','Simulation','?')
               for instance in xrange(1,nInstances+1):
                  wfGlobalJobId = wfGlobalJobIdBase + str(instance)
                  if not wfGlobalJobId in self.activeJobs:
                     self.addActiveJob(wfGlobalJobId,wfJobStatus,wfJobStage,wfJobQueue,hubUserId,localJobId)
               self.dumpActiveJobs()
         elif messageType == 'T':                        # Terminate job
            globalJobId = messageSite + ':' + remoteJobId
            if globalJobId in self.activeJobs:
               jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
            else:
               jobStatus,jobStage,jobQueue = ('?','?','?')
            if self.sendMessage(channel,jobStatus + " " + jobStage,self.fixedBufferSize) > 0:
               if globalJobId in self.activeJobs:
                  jobStatus,jobStage = ('D','Job')
                  self.updateActiveJob(globalJobId,jobStatus,jobStage,jobQueue)
         elif messageType == 'R':                        # Report active jobs
            report = ""
            messageSites = []
            if messageSite == "*":
               for globalJobId in self.activeJobs.keys():
                  globalJobMessageSite,globalJobLocalJobId = globalJobId.split(':')
                  if not globalJobMessageSite in messageSites:
                     messageSites.append(globalJobMessageSite)
            else:
               messageSites.append(messageSite)

            siteDelimiter = ''
            maxLastReportTime = 0
            for messageSite in messageSites:
               if messageSite in self.activeJobSites:
                  lastReportTime = self.activeJobSites[messageSite]
               else:
                  lastReportTime = 0
               maxLastReportTime = max(maxLastReportTime,lastReportTime)
               siteReport = messageSite + " " + str(lastReportTime)

               queueDelimiter = ' @ '
               activeJobSiteQueues = self.getActiveJobSiteQueues(messageSite)
               for activeJobSiteQueue in activeJobSiteQueues:
                  queueReport = activeJobSiteQueue

                  jobDelimiter = ' : '
                  if remoteJobId == "*":
                     siteMatch = re.compile('^'+messageSite+':')
                     reportJobs = filter(siteMatch.match,self.activeJobs.keys())
                     for reportJob in reportJobs:
                        jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[reportJob]
                        if jobQueue == activeJobSiteQueue:
                           queueReport += jobDelimiter + reportJob.split(':')[1] + " " + jobStatus + " " + jobStage
                  else:
                     globalJobId = messageSite + ':' + remoteJobId
                     if globalJobId in self.activeJobs:
                        jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = self.activeJobs[globalJobId]
                     else:
                        jobStatus,jobStage,jobQueue = ('?','?','?')
                     if jobQueue == activeJobSiteQueue:
                        queueReport += jobDelimiter + remoteJobId + " " + jobStatus + " " + jobStage

                  siteReport += queueDelimiter + queueReport

               report += siteDelimiter + siteReport
               siteDelimiter = ' | '

            reportLength = len(report)
            if self.sendMessage(channel,str(reportLength) + " " + str(maxLastReportTime),self.fixedBufferSize) > 0:
               if reportLength > 0:
                  self.sendMessage(channel,report)
            del messageSites
         elif messageType == 'U':                        # List user jobs
            queueReport  = ""
            jobDelimiter = ''
            for globalJobId,activeJob in self.activeJobs.items():
               site,remoteJobId = globalJobId.split(':')
               jobStatus,jobStage,jobQueue,timeRecorded,hubUserId,localJobId = activeJob
               if hubUserId == reportHubUserId:
                  if jobStatus != 'Dr' and jobStatus != 'D':
                     queueReport += jobDelimiter + localJobId + " " + jobQueue + " " + site + " " + jobStatus + " " + jobStage
                     jobDelimiter = ' : '

            reportLength = len(queueReport)
            if self.sendMessage(channel,str(reportLength) + " " + str(time.time()),self.fixedBufferSize) > 0:
               if reportLength > 0:
                  self.sendMessage(channel,queueReport)
      else:
         try:
            channel.close()
            channelClosed = True
         except:
            log("close channel failed")

      return(channelClosed,newJobSite,newJobId)


