#!/usr/bin/env python
#
# @package      hubzero-submit-distributor
# @file         executeRemoteBatch.py
# @copyright    Copyright (c) 2014-2020 The Regents of the University of California.
# @license      http://opensource.org/licenses/MIT MIT
#
# Copyright (c) 2004-2020 The Regents of the University of California.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# HUBzero is a registered trademark of The Regents of the University of California.
#
"""execute a job on a remote cluster
   """
import os
import sys
import pwd
import re
import subprocess
import select
import time
import shutil
import logging
from logging.handlers import SocketHandler,SysLogHandler

from hubzero.submit.LogMessage            import getLogIDMessage as getLogMessage, logSetJobId
from hubzero.submit.CommandParser         import CommandParser
from hubzero.submit.ParameterTemplate     import ParameterTemplate
from hubzero.submit.TimeConversion        import minTohhmmss
from hubzero.submit.DaemonsInfo           import DaemonsInfo
from hubzero.submit.InfosInfo             import InfosInfo
from hubzero.submit.SitesInfo             import SitesInfo
from hubzero.submit.ManagersInfo          import ManagersInfo
from hubzero.submit.SubmissionScriptsInfo import SubmissionScriptsInfo
from hubzero.submit.RemoteJobMonitor      import RemoteJobMonitor
from hubzero.submit.RemoteIdentityManager import RemoteIdentityManager

APPLICATIONLOGGER = logging.getLogger('')

CONFIGURATIONDIRECTORY       = os.path.join(os.sep,'etc','submit')
DISTRIBUTORCONFIGURATIONFILE = 'distributor.conf'
DAEMONSCONFIGURATIONFILE     = 'daemons.conf'
INFOSCONFIGURATIONFILE       = 'infos.conf'

def openLogger():
   class EmptyFilter(logging.Filter):
      """
      This is a filter which rejects empty messages

      """

      def filter(self,record):
         if record.getMessage() == "":
            emptyRecord = True
         else:
            emptyRecord = False

         return(not emptyRecord)

   APPLICATIONLOGGER.setLevel(logging.DEBUG)

   logHandler = logging.StreamHandler()

   emptyFilter = EmptyFilter()
   logHandler.addFilter(emptyFilter)

   logFormatter = logging.Formatter('%(asctime)s %(message)s','%s [%a %b %d %H:%M:%S %Y]')
   logHandler.setFormatter(logFormatter)
   APPLICATIONLOGGER.addHandler(logHandler)

   logSyslogHandler = SysLogHandler(address="/dev/log",facility=SysLogHandler.LOG_LOCAL6)
   logSyslogHandler.addFilter(emptyFilter)
   logSyslogHandler.setFormatter(logFormatter)
   APPLICATIONLOGGER.addHandler(logSyslogHandler)


class ExecuteRemoteBatch():
   TIMESTAMPTRANSFERRED  = ".__timestamp_transferred"
   TIMESTAMPFINISH       = ".__timestamp_finish"
   TIMESTAMPSTART        = ".__timestamp_start"
   TIMERESULTS           = ".__time_results"
   SUBMITBATCHJOBCOMMAND = 'submitfromcondor.sh'

   def __init__(self,
                configurationDirectory,
                distributorConfigurationFile,
                daemonsConfigurationFile,
                infosConfigurationFile,
                siteDesignator,
                managerDesignator,
                sshIdentityPath,
                submitterClass,
                runName,
                localJobId,
                instanceId,
                clusterId,
                jobWorkPath,
                environment,
                stdinput,
                stdoutput,
                stderror,
                nCores,
                nNodes,
                ppn,
                wallTime,
                progressReport):
      self.logger = logging.getLogger(__name__)

      self.configurationDirectory       = configurationDirectory
      self.distributorConfigurationFile = distributorConfigurationFile
      self.daemonsConfigurationFile     = daemonsConfigurationFile
      self.infosConfigurationFile       = infosConfigurationFile

      self.configData     = {}
      self.dataDirectory  = ""
      self.binDirectory   = ""
      self.pegasusVersion = ""

      self.daemonInfo            = None
      self.infosInfo             = None
      self.sitesInfo             = None
      self.managersInfo          = None
      self.submissionScriptsInfo = None

      self.siteDesignator       = siteDesignator
      self.clusterHost          = ""
      self.clusterQueue         = ""
      self.clusterPartition     = ""
      self.clusterPartitionSize = ""
      self.clusterConstraints   = ""

      self.clusterUser          = ""
      self.hostAttributes       = ""
      self.clusterAccount       = ""
      self.clusterWorkDirectory = ""
      self.clusterBinDirectory  = ""

      self.managerDesignator    = managerDesignator
      self.managerCommand       = ""
      self.preManagerCommands   = ""
      self.postManagerCommands  = ""

      self.distributorPid       = os.getpid()
      self.hubUserId            = os.getuid()
      self.hubUserName          = pwd.getpwuid(self.hubUserId).pw_name
      self.sshIdentityPath      = sshIdentityPath

      configFilePath         = os.path.join(self.configurationDirectory,self.daemonsConfigurationFile)
      self.daemonsInfo       = DaemonsInfo(configFilePath)
      self.jobListenURI      = self.daemonsInfo.getDaemonListenURI('jobMonitor','tcp')
      self.identityListenURI = self.daemonsInfo.getDaemonListenURI('identitiesManager','tcp')

      self.submitterClass       = submitterClass
      self.runName              = runName
      self.localJobId           = localJobId
      self.instanceId           = instanceId
      self.clusterId            = clusterId
      self.jobWorkPath          = jobWorkPath
      self.environment          = environment
      self.stdinput             = stdinput
      self.stdoutput            = stdoutput
      self.stderror             = stderror
      self.nCores               = nCores
      self.nNodes               = nNodes
      self.ppn                  = ppn
      self.wallTime             = minTohhmmss(wallTime)
      self.progressReport       = progressReport

      self.commandParser = CommandParser()
      self.logger.log(logging.INFO,getLogMessage("Args are:" + str(sys.argv)))
      self.commandParser.parseArguments(sys.argv[1:])

      self.executable = self.commandParser.getEnteredExecutable()
      self.arguments  = self.commandParser.getEnteredCommandArguments()
      del self.arguments[0]

      if int(self.instanceId) == 0:
         appScriptPath = self.arguments[-1]
         appScriptFile = os.path.basename(appScriptPath)
         localJobIdInstanceId = os.path.splitext(appScriptFile)[0]
         self.instanceId = localJobIdInstanceId.split('_')[-1]

      self.remoteJobMonitor      = RemoteJobMonitor(self.jobListenURI,
                                                    writeStdoutMessages=False)
      self.remoteIdentityManager = RemoteIdentityManager(self.identityListenURI)

      self.submissionFile    = ""
      self.remoteJobIdNumber = None

      self.bufferSize      = 1024
      self.waitForJobsInfo = {}

      self.abortAttempted = False
      self.abortGlobal    = {}
      self.abortGlobal['abortAttempted'] = self.abortAttempted
      self.abortGlobal['abortSignal']    = 0


   def __writeToStdout(self,
                       message):
      try:
         sys.stdout.write(message)
         sys.stdout.flush()
      except IOError,err:
         if not err[0] in [EPIPE]:
            self.logger.log(logging.ERROR,getLogMessage("Can't write to stdout: %s" % (message)))


   def __writeToStderr(self,
                       message):
      try:
         sys.stderr.write(message)
         sys.stderr.flush()
      except IOError,err:
         if not err[0] in [EPIPE]:
            self.logger.log(logging.ERROR,getLogMessage("Can't write to stderr: %s" % (message)))


   def configure(self):
      sectionPattern  = re.compile('(\s*\[)([^\s]*)(]\s*)')
      keyValuePattern = re.compile('( *)(\w*)( *= *)(.*[^\s$])( *)')
      commentPattern  = re.compile('\s*#.*')
      inDistributorSection = False

      configured = False
      configFilePath = os.path.join(self.configurationDirectory,self.distributorConfigurationFile)
      try:
         fpConfig = open(configFilePath,'r')
         try:
            eof = False
            while not eof:
               record = fpConfig.readline()
               if record != "":
                  record = commentPattern.sub("",record)
                  if   sectionPattern.match(record):
                     sectionName = sectionPattern.match(record).group(2)
                     inDistributorSection = (sectionName == 'distributor')
                     if inDistributorSection:
                        self.configData = {'probeMonitoringInstalled':False,
                                           'maximumSelectedSites':1,
                                           'allowedVenueMechanisms':['local','ssh'],
                                           'dataDirectory':os.path.join(os.sep,'opt','submit'),
                                           'binDirectory':os.path.join(os.sep,'opt','submit','bin'),
                                           'condorRoot':'',
                                           'condorConfig':'',
                                           'pbsRoot':''
                                          }
                  elif inDistributorSection:
                     if keyValuePattern.match(record):
                        key,value = keyValuePattern.match(record).group(2,4)
                        if key in self.configData:
                           if   isinstance(self.configData[key],list):
                              self.configData[key] = [e.strip() for e in value.split(',')]
                           elif isinstance(self.configData[key],bool):
                              self.configData[key] = bool(value.lower() == 'true')
                           elif isinstance(self.configData[key],float):
                              self.configData[key] = float(value)
                           elif isinstance(self.configData[key],int):
                              self.configData[key] = int(value)
                           elif isinstance(self.configData[key],dict):
                              try:
                                 sampleKey   = self.configData[key].keys()[0]
                                 sampleValue = self.configData[key][sampleKey]
                              except:
                                 sampleKey   = "key"
                                 sampleValue = "value"
                              self.configData[key] = {}
                              for e in value.split(','):
                                 dictKey,dictValue = e.split(':')
                                 if isinstance(sampleKey,int):
                                    dictKey = int(dictKey)
                                 if   isinstance(sampleValue,int):
                                    dictValue = int(dictValue)
                                 elif isinstance(sampleValue,float):
                                    dictValue = float(dictValue)
                                 elif isinstance(sampleValue,bool):
                                    dictValue = bool(dictValue.lower() == 'true')
                                 self.configData[key][dictKey] = dictValue
                           else:
                              self.configData[key] = value
                        else:
                           self.logger.log(logging.WARNING,getLogMessage("Undefined key = value pair %s = %s" % (key,value)))
               else:
                  eof = True
                  configured = True
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be read" % (configFilePath)))
         finally:
            fpConfig.close()
      except (IOError,OSError):
         self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % (configFilePath)))

      if configured:
         if os.path.isdir(self.configData['dataDirectory']):
            self.dataDirectory = self.configData['dataDirectory']
         else:
            message = "Specified dataDirectory does not exist: %s" % (self.configData['dataDirectory'])
            self.logger.log(logging.ERROR,getLogMessage(message))
            configured = False

         if os.path.isdir(self.configData['binDirectory']):
            self.binDirectory = self.configData['binDirectory']
         else:
            message = "Specified binDirectory does not exist: %s" % (self.configData['binDirectory'])
            self.logger.log(logging.ERROR,getLogMessage(message))
            configured = False

      return(configured)


   def setupSite(self):
      configFilePath = os.path.join(self.configurationDirectory,self.infosConfigurationFile)
      self.infosInfo = InfosInfo(configFilePath)

      self.sitesInfo = SitesInfo(self.infosInfo.getInfoPath('sites'),
                                 restrictionUser=os.getenv("USER"),
                                 templateDirectory=self.dataDirectory,
                                 allowedVenueMechanisms=self.configData['allowedVenueMechanisms'],
                                 pegasusVersion=self.pegasusVersion)
      self.clusterBatchSystem               = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBatchSystem')
      if   self.clusterBatchSystem == 'PBS':
         self.submissionScriptCommandPrefix = '#PBS'
         self.submissionFile                = "%s_%s_%s.%s" % (self.localJobId,self.instanceId, \
                                                               self.clusterId,self.clusterBatchSystem.lower())
         self.clusterQueue                  = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBatchQueue')
      elif self.clusterBatchSystem == 'SLURM':
         self.submissionScriptCommandPrefix = '#SBATCH'
         self.submissionFile                = "%s_%s_%s.%s" % (self.localJobId,self.instanceId, \
                                                               self.clusterId,self.clusterBatchSystem.lower())
         self.clusterPartition              = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBatchPartition')
         self.clusterPartitionSize          = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBatchPartitionSize')
         self.clusterConstraints            = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBatchConstraints')
      self.clusterHost                      = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'venues')[0]
      self.clusterUser                      = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteUser')
      self.hostAttributes                   = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteHostAttribute')
      self.clusterAccount                   = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBatchAccount')
      self.clusterWorkDirectory             = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteScratchDirectory')
      self.clusterBinDirectory              = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'remoteBinDirectory')
      self.siteMonitorDesignator            = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'siteMonitorDesignator')
      self.identityManagers                 = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'identityManagers')
      self.submissionScriptCommands         = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'submissionScriptCommands')
      self.venueMechanism                   = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'venueMechanism')
      self.venuePort                        = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'venuePort')
      self.sshOptions                       = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'sshOptions')
      self.logUserRemotely                  = self.sitesInfo.getSiteKeyValue(self.siteDesignator,'logUserRemotely')

      self.managersInfo = ManagersInfo(self.infosInfo.getInfoPath('managers'))
      managerInfo = self.managersInfo.getManagerInfo(self.managerDesignator)
      self.managerCommand      = managerInfo['managerCommand']
      self.preManagerCommands  = managerInfo['preManagerCommands']
      self.postManagerCommands = managerInfo['postManagerCommands']

      self.submissionScriptsInfo = SubmissionScriptsInfo('Distributor',
                                                         submissionScriptRootPath=self.infosInfo.getInfoPath('submissionscripts'))


   def executeCommand(self,
                      command,
                      streamOutput=False):
      fpSubmissionFile = open(self.submissionFile,'r')
      child = subprocess.Popen(command,bufsize=self.bufferSize,
                               stdin=fpSubmissionFile,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
      self.childPid = child.pid
      childout      = child.stdout
      childoutFd    = childout.fileno()
      childerr      = child.stderr
      childerrFd    = childerr.fileno()

      outEOF = False
      errEOF = False

      outData = []
      errData = []

      while True:
         toCheck = []
         if not outEOF:
            toCheck.append(childoutFd)
         if not errEOF:
            toCheck.append(childerrFd)
         try:
            ready = select.select(toCheck,[],[]) # wait for input
         except select.error,err:
            ready = {}
            ready[0] = []
         if childoutFd in ready[0]:
            outChunk = os.read(childoutFd,self.bufferSize)
            if outChunk == '':
               outEOF = True
            outData.append(outChunk)
            if streamOutput:
               self.__writeToStdout(outChunk)

         if childerrFd in ready[0]:
            errChunk = os.read(childerrFd,self.bufferSize)
            if errChunk == '':
               errEOF = True
            errData.append(errChunk)
            if streamOutput:
               self.__writeToStderr(errChunk)

         if outEOF and errEOF:
            break

      fpSubmissionFile.close()
      pid,err = os.waitpid(self.childPid,0)
      self.childPid = 0
      if err != 0:
         if   os.WIFSIGNALED(err):
            self.logger.log(logging.ERROR,getLogMessage("%s failed w/ signal %d" % (command,os.WTERMSIG(err))))
         else:
            if os.WIFEXITED(err):
               err = os.WEXITSTATUS(err)
            self.logger.log(logging.ERROR,getLogMessage("%s failed w/ exit code %d" % (command,err)))
         if not streamOutput:
            self.logger.log(logging.ERROR,getLogMessage("%s" % ("".join(errData))))

      return(err,"".join(outData),"".join(errData))


   def buildSerialFile(self):
      rawSubmissionScript = self.submissionScriptsInfo.getSubmissionScript('Batch',self.clusterBatchSystem,'pegasusserial')

      queueCommand = ""
      if self.clusterQueue != "":
         queueCommand = self.submissionScriptCommandPrefix + " -q " + self.clusterQueue

      partitionCommand = ""
      if self.clusterPartition != "":
         partitionCommand = self.submissionScriptCommandPrefix + " --partition " + self.clusterPartition

      constraintsCommand = ""
      if self.clusterConstraints != "":
         constraintsCommand = self.submissionScriptCommandPrefix + " --constraint " + self.clusterConstraints

      submissionCommands = ""
      if self.submissionScriptCommands:
         if self.submissionScriptCommandPrefix:
            commandSeparator = "\n%s " % self.submissionScriptCommandPrefix
            submissionCommands = self.submissionScriptCommandPrefix + " " + commandSeparator.join(self.submissionScriptCommands)
         else:
            commandSeparator = "\n"
            submissionCommands = commandSeparator.join(self.submissionScriptCommands)

      environmentExport = ""
      environmentVars = self.environment.split()
      for environmentVar in environmentVars:
         environmentExport += "export " + environmentVar + "\n"

      substitutions = {}
      if self.stdinput == "":
         substitutions["STDIN"]                 = '/dev/null'
      else:
         substitutions["STDIN"]                 = self.stdinput
      substitutions["STDOUT"]                   = self.stdoutput
      substitutions["STDERR"]                   = self.stderror
      substitutions["WALLTIME"]                 = self.wallTime
      substitutions["QUEUE"]                    = queueCommand
      substitutions["PARTITION"]                = partitionCommand
      substitutions["CONSTRAINTS"]              = constraintsCommand
      substitutions["SUBMISSIONSCRIPTCOMMANDS"] = submissionCommands
      substitutions["ENVIRONMENT"]              = environmentExport
      substitutions["EXECUTABLE"]               = self.executable
      substitutions["ARGUMENTS"]                = ' '.join(self.arguments)
      timestampStart  = "%s.%s_%s" % (self.TIMESTAMPSTART,self.localJobId,self.instanceId)
      substitutions["TS_START"]                 = timestampStart
      timestampFinish = "%s.%s_%s" % (self.TIMESTAMPFINISH,self.localJobId,self.instanceId)
      substitutions["TS_FINISH"]                = timestampFinish
      timeResults     = "%s.%s_%s" % (self.TIMERESULTS,self.localJobId,self.instanceId)
      substitutions["TIME_RESULTS"]             = timeResults
      substitutions["RUNNAME"]                  = self.runName
      substitutions["JOBID"]                    = self.localJobId
      substitutions["INSTANCEID"]               = self.instanceId
      substitutions["CLUSTERID"]                = self.clusterId
      if self.clusterPartitionSize != "":
         substitutions["NNODES"]                = self.clusterPartitionSize
      else:
         substitutions["NNODES"]                = self.nNodes
      substitutions["ATTRIBUTES"]               = self.hostAttributes
      substitutions["PPN"]                      = self.ppn
      nProcessors = str(int(self.nNodes)*int(self.ppn))
      substitutions["NPROCESSORS"]              = nProcessors
      if self.clusterAccount != "":
         substitutions["REMOTEBATCHACCOUNT"]    = self.submissionScriptCommandPrefix + ' -A ' + self.clusterAccount
      else:
         substitutions["REMOTEBATCHACCOUNT"]    = ''
      substitutions["PREMANAGERCOMMANDS"]       = "\n".join(self.preManagerCommands)
      substitutions["POSTMANAGERCOMMANDS"]      = "\n".join(self.postManagerCommands)

      template = ParameterTemplate(rawSubmissionScript)
      try:
         submissionScript = template.substitute_recur(substitutions)
      except KeyError,err:
         submissionScript = ""
         self.logger.log(logging.ERROR,getLogMessage("Pattern substitution failed for @@%s\n" % (err[0])))
      except TypeError,err:
         submissionScript = ""
         self.logger.log(logging.ERROR,getLogMessage("Submission script substitution failed:\n%s\n" % (rawSubmissionScript)))

      return(submissionScript)


   def writeSubmissionScript(self):
      scriptWritten = False
      submissionScript = self.buildSerialFile()
      try:
         fpSubmissionFile = open(self.submissionFile,'w')
         try:
            fpSubmissionFile.write(submissionScript)
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be written" % (self.submissionFile)))
         else:
            scriptWritten = True
         finally:
            fpSubmissionFile.close()
      except (IOError,OSError):
         self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % (self.submissionFile)))

      return(scriptWritten)


   def registerJob(self):
      instance = int(self.instanceId)
      self.waitForJobsInfo[instance] = {}
      self.waitForJobsInfo[instance]['isBatchJob'] = True

      self.remoteJobMonitor.postJobRegistration(self.siteDesignator,
                                                self.runName,self.localJobId,self.clusterId,
                                                str(self.hubUserId),self.submitterClass,
                                                self.distributorPid)
      self.waitForJobsInfo[instance]['state'] = 'held'
      message = "Run %d registered 1 job instance." % (int(self.localJobId))
      self.__writeToStderr("%s\n" % (message))
      self.logger.log(logging.DEBUG,getLogMessage("%s" % (message)))
      sleepTime = 5
      jobReleased = self.remoteJobMonitor.isJobReleased(self.localJobId,self.clusterId)
      while not jobReleased:
         time.sleep(sleepTime)
         jobReleased = self.remoteJobMonitor.isJobReleased(self.localJobId,self.clusterId)
      self.waitForJobsInfo[instance]['state'] = 'released'
      message = "Run %d instance %s released for submission." % (int(self.localJobId),self.instanceId)
      self.__writeToStderr("%s\n" % (message))
      self.logger.log(logging.DEBUG,getLogMessage("%s" % (message)))


   def executeSubmission(self):
      submissionExecuted = False
      sshCommandArgs = []
      sshCommandArgs.append(self.venueMechanism)
      sshCommandArgs.append('-T')
      sshCommandArgs.append('-x')
      sshCommandArgs.append('-a')
      if self.venueMechanism == 'ssh':
         sshCommandArgs.append('-i')
         sshCommandArgs.append(self.sshIdentityPath)
      if self.sshOptions:
         for arg in self.sshOptions.split():
            sshCommandArgs.append(arg)
      sshCommandArgs.append('-p')
      sshCommandArgs.append('%d' % (self.venuePort))
      sshCommandArgs.append("%s@%s" % (self.clusterUser,self.clusterHost))

      commandArgs = []
      commandArgs.append(os.path.join(self.clusterBinDirectory,self.SUBMITBATCHJOBCOMMAND))
      commandArgs.append(self.jobWorkPath)
      commandArgs.append(self.submissionFile)
      timestampTransferred = "%s.%s_%s" % (self.TIMESTAMPTRANSFERRED,self.localJobId,self.instanceId)
      commandArgs.append(timestampTransferred)
      if self.logUserRemotely:
         hubUserHash = self.remoteIdentityManager.queryUserHash(self.identityManagers,
                                                                self.hubUserName,
                                                                self.hubUserId)
         commandArgs.append(hubUserHash)
         commandArgs.append(str(self.hubUserId))
         commandArgs.append(self.localJobId.lstrip('0') + '_' + self.instanceId)
         commandArgs.append(self.executable)

      command = ' '.join(commandArgs)
      sshCommandArgs.append(command)
      self.logger.log(logging.INFO,getLogMessage("command:" + str(sshCommandArgs)))
      exitStatus,stdOutput,stdError = self.executeCommand(sshCommandArgs)
      if not exitStatus:
         remoteJobId = stdOutput.strip()
         self.logger.log(logging.INFO,getLogMessage("remoteJobId = " + remoteJobId))
         if   self.clusterBatchSystem == 'PBS':
            self.remoteJobIdNumber = remoteJobId.split('.')[0]
         elif self.clusterBatchSystem == 'SLURM':
            self.remoteJobIdNumber = remoteJobId.split()[-1]
         self.logger.log(logging.INFO,getLogMessage("remoteJobIdNumber = " + self.remoteJobIdNumber))
         submissionExecuted = True

         enteredCommand = self.commandParser.getEnteredCommand()
         tailFiles      = []
         self.remoteJobMonitor.postJobSubmission(self.siteDesignator,
                                                 self.identityManagers,
                                                 self.siteMonitorDesignator,
                                                 self.remoteJobIdNumber,str(self.hubUserId),
                                                 tailFiles,enteredCommand,
                                                 self.jobWorkPath,
                                                 self.localJobId,self.instanceId,self.clusterHost,
                                                 self.runName,self.nCores,self.distributorPid)
      else:
         self.remoteJobMonitor.deleteJobRegistration(self.localJobId,self.clusterId)
         self.logger.log(logging.ERROR,getLogMessage("%s submission failed:\n%s\n%s" % (self.clusterBatchSystem, \
                                                                                        stdOutput,stdError)))

      return(submissionExecuted)


   def waitForBatchJob(self):
      instance = int(self.instanceId)
      self.waitForJobsInfo[instance]['siteMonitorDesignator'] = self.siteMonitorDesignator
      self.waitForJobsInfo[instance]['knownSite']             = self.clusterHost
      self.waitForJobsInfo[instance]['remoteJobId']           = self.remoteJobIdNumber
      self.waitForJobsInfo[instance]['recentJobStatus']       = '?'
      completeRemoteJobIndexes = self.remoteJobMonitor.waitForRedundantJobs(self.waitForJobsInfo,
                                                                            self.progressReport,
                                                                            self.abortGlobal)


   def executeFetch(self):
      fetchExecuted = False
      sshCommandArgs = []
      sshCommandArgs.append(self.venueMechanism)
      sshCommandArgs.append('-T')
      sshCommandArgs.append('-x')
      sshCommandArgs.append('-a')
      if self.venueMechanism == 'ssh':
         sshCommandArgs.append('-i')
         sshCommandArgs.append(self.sshIdentityPath)
      if self.sshOptions:
         for arg in self.sshOptions.split():
            sshCommandArgs.append(arg)
      sshCommandArgs.append('-p')
      sshCommandArgs.append('%d' % (self.venuePort))
      sshCommandArgs.append("%s@%s" % (self.clusterUser,self.clusterHost))
      command = ' '.join([os.path.join(self.clusterBinDirectory,'stdtocondor.sh'),
                          self.jobWorkPath,
                          self.stdoutput,
                          self.stderror])
      sshCommandArgs.append(command)
      tarCommandArgs = ['tar','xzmf','-','--transform','s:.*/::']
      self.logger.log(logging.INFO,getLogMessage("command:" + str(sshCommandArgs)))
      self.logger.log(logging.INFO,getLogMessage("command:" + str(tarCommandArgs)))

      try:
         sshChild = subprocess.Popen(sshCommandArgs,
                                     stdin=None,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)
         tarChild = subprocess.Popen(tarCommandArgs,
                                     stdin=sshChild.stdout,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)
         tarStdOutput,tarStdError = tarChild.communicate()
         tarExitStatus = tarChild.returncode
         if tarExitStatus != 0:
            self.logger.log(logging.ERROR,getLogMessage("Failed to write application std files"))
            if tarStdOutput:
               self.logger.log(logging.ERROR,getLogMessage(tarStdOutput))
            if tarStdError:
               self.logger.log(logging.ERROR,getLogMessage(tarStdError))
            exitCode = 1
         else:
            fetchExecuted = True
      except OSError,err:
         self.logger.log(logging.ERROR,getLogMessage("Failed to fetch application std files %s" % (transferTarPath)))
         self.logger.log(logging.ERROR,getLogMessage(err.args[1]))

      self.logger.log(logging.DEBUG,getLogMessage("application std files fetched"))

      stdoutput = os.path.basename(self.stdoutput)
      if os.path.exists(stdoutput):
         try:
            fpStd = open(stdoutput,'r')
            try:
               shutil.copyfileobj(fpStd,sys.stdout)
            except (IOError,OSError):
               self.logger.log(logging.ERROR,getLogMessage("%s could not be copied" % (stdoutput)))
            finally:
               fpStd.close()
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % (stdoutput)))
         finally:
            try:
               os.remove(stdoutput)
            except OSError:
               pass

      stderror  = os.path.basename(self.stderror)
      if os.path.exists(stderror):
         try:
            fpStd = open(stderror,'r')
            try:
               shutil.copyfileobj(fpStd,sys.stderr)
            except (IOError,OSError):
               self.logger.log(logging.ERROR,getLogMessage("%s could not be copied" % (stderror)))
            finally:
               fpStd.close()
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % (stderror)))
         finally:
            try:
               os.remove(stderror)
            except OSError:
               pass

      return(fetchExecuted)


if __name__ == '__main__':
   openLogger()

   siteDesignator       = os.getenv("DISTRIBUTE_SITE_DESIGNATOR")
   managerDesignator    = ""
   sshIdentityPath      = os.getenv("DISTRIBUTE_SSH_IDENTITY_PATH")
   environment          = ""
   jobStdin             = ""
   nNodes               = os.getenv("DISTRIBUTE_NODES",'1')
   ppn                  = os.getenv("DISTRIBUTE_PPN",'1')
   nCores               = int(nNodes)*int(ppn)
   wallTime             = os.getenv("DISTRIBUTE_WALLTIME",'60')
   progressReport       = 'silent'

   jobWorkPath          = os.getenv("DISTRIBUTE_JOB_PATH")
#DISTRIBUTE_JOB_STDOUT=/scratch/lustreA/n/nano0/nanoHUBjobs/Test/Jobs/scratch/preprocess_ID0000001.stdout
   jobStdout            = os.getenv("DISTRIBUTE_JOB_STDOUT")
#DISTRIBUTE_JOB_STDERR=/scratch/lustreA/n/nano0/nanoHUBjobs/Test/Jobs/scratch/preprocess_ID0000001.stderr
   jobStderr            = os.getenv("DISTRIBUTE_JOB_STDERR")

   remoteEnvironment = os.getenv("DISTRIBUTE_REMOTE_ENVIRONMENT")
   remoteEnvironmentVariables = {}
   for keyValue in remoteEnvironment.split(','):
      key,value = keyValue.split('=')
      remoteEnvironmentVariables[key] = value
   runName        = remoteEnvironmentVariables["DISTRIBUTE_RUNNAME"]
   localJobId     = remoteEnvironmentVariables["DISTRIBUTE_LOCALJOBID"]
   instanceId     = remoteEnvironmentVariables["DISTRIBUTE_INSTANCEID"]
   clusterId      = remoteEnvironmentVariables["DISTRIBUTE_CLUSTERID"]
   submitterClass = remoteEnvironmentVariables["DISTRIBUTE_SUBMITTER_CLASS"]

   try:
      jobId = int(localJobId)
      logSetJobId(jobId)
   except:
      sys.stderr.write("Could not determine jobId: %s\n" % (localJobId))
   else:
      executeRemoteBatch = ExecuteRemoteBatch(CONFIGURATIONDIRECTORY,
                                              DISTRIBUTORCONFIGURATIONFILE,
                                              DAEMONSCONFIGURATIONFILE,
                                              INFOSCONFIGURATIONFILE,
                                              siteDesignator,
                                              managerDesignator,
                                              sshIdentityPath,
                                              submitterClass,
                                              runName,
                                              localJobId,
                                              instanceId,
                                              clusterId,
                                              jobWorkPath,
                                              environment,
                                              jobStdin,
                                              jobStdout,
                                              jobStderr,
                                              nCores,
                                              nNodes,
                                              ppn,
                                              wallTime,
                                              progressReport)

      if executeRemoteBatch.configure():
         executeRemoteBatch.setupSite()
         if executeRemoteBatch.writeSubmissionScript():
            executeRemoteBatch.registerJob()
            if executeRemoteBatch.executeSubmission():
               executeRemoteBatch.waitForBatchJob()
               executeRemoteBatch.executeFetch()


