#!/usr/bin/env python

"""
Pegasus utility for creating directories for a set of protocols

Usage: pegasus-create-dir [options]

"""

##
#  Copyright 2007-2011 University Of Southern California
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing,
#  software distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
##

import os
import re
import sys
import errno
import logging
import optparse
import tempfile
import subprocess
import signal
import string
import stat
import time
from collections import deque
import ConfigParser

try:
    # Python 3.0 and later
    from urllib.parse import unquote
except ImportError:
    # Fall back to Python 2's urllib
    from urllib import unquote


__author__ = "Mats Rynge <rynge@isi.edu>"

# --- regular expressions -------------------------------------------------------------

re_parse_url = re.compile(r'([\w]+)://([\w\.\-:@]*)(/[\S]*)')

# --- classes -------------------------------------------------------------------------

class URL:

    site       = None
    proto      = ""
    host       = ""
    path       = ""

    def set_site(self, site):
        if site is None:
            site = ""
        # the site name is used to match against shell variables, so we have
        # have to replace dashes with underscores (as we do in the planner)
        self.site = string.replace(site, "-", "_")
    
    def set_url(self, url):
        self.proto, self.host, self.path = self.parse_url(url)
    
    def parse_url(self, url):

        proto = ""
        host  = ""
        path  = ""
        
        # the url should be URL decoded to work with our shell callouts
        url = unquote(url)

        # default protocol is file://
        if string.find(url, ":") == -1:
            logger.debug("URL without protocol (" + url + ") - assuming file://")
            url = "file://" + url

        # file url is a special cases as it can contain relative paths and env vars
        if string.find(url, "file:") == 0:
            proto = "file"
            # file urls can either start with file://[\w]*/ or file: (no //)
            path = re.sub("^file:(//[\w\.\-:@]*)?", "", url)
            path = expand_env_vars(path)
            return proto, host, path

        # other than file urls
        r = re_parse_url.search(url)
        if not r:
            raise RuntimeError("Unable to parse URL: %s" % (url))
        
        # Parse successful
        proto = r.group(1)
        host = r.group(2)
        path = r.group(3)
        
        # no double slashes in urls
        path = re.sub('//+', '/', path)
        
        return proto, host, path

    def url(self):
        return "%s://%s%s" % (self.proto, self.host, self.path)
    
    def url_dirname(self):
        dn = os.path.dirname(self.path)
        return "%s://%s%s" % (self.proto, self.host, dn)

    def parent_url(self):
        parent = URL()
        parent.proto = self.proto
        parent.host = self.host
        parent.path = os.path.dirname(self.path)
        return parent


class Alarm(Exception):
    pass


# --- global variables ----------------------------------------------------------------

prog_base = os.path.split(sys.argv[0])[1]   # Name of this program

logger = logging.getLogger("my_logger")


# this is the map of what tool to use for a given protocol pair (src, dest)
tool_map = {}
tool_map['file'  ] = 'mkdir'
tool_map['ftp'   ] = 'gsiftp'
tool_map['gsiftp'] = 'gsiftp'
tool_map['sshftp'] = 'sshftp'
tool_map['irods' ] = 'irods'
tool_map['s3'    ] = 's3'
tool_map['s3s'   ] = 's3'
tool_map['gs'    ] = 'gs'
tool_map['scp'   ] = 'scp'
tool_map['srm'   ] = 'srm'

tool_info = {}


# --- functions -----------------------------------------------------------------------


def setup_logger(level_str):
    
    # log to the console
    console = logging.StreamHandler()
    
    # default log level - make logger/console match
    logger.setLevel(logging.INFO)
    console.setLevel(logging.INFO)

    # level - from the command line
    level_str = level_str.lower()
    if level_str == "debug":
        logger.setLevel(logging.DEBUG)
        console.setLevel(logging.DEBUG)
    if level_str == "warning":
        logger.setLevel(logging.WARNING)
        console.setLevel(logging.WARNING)
    if level_str == "error":
        logger.setLevel(logging.ERROR)
        console.setLevel(logging.ERROR)

    # formatter
    formatter = logging.Formatter("%(asctime)s %(levelname)7s:  %(message)s")
    console.setFormatter(formatter)
    logger.addHandler(console)
    logger.debug("Logger has been configured")

def prog_sigint_handler(signum, frame):
    logger.warn("Exiting due to signal %d" % (signum))
    myexit(1)

def alarm_handler(signum, frame):
    raise Alarm


def expand_env_vars(s):
    re_env_var = re.compile(r'\${?([a-zA-Z0-9_]+)}?')
    s = re.sub(re_env_var, get_env_var, s)
    return s


def get_env_var(match):
    name = match.group(1)
    value = ""
    logger.debug("Looking up " + name)
    if name in os.environ:
        value = os.environ[name]
    return value


def myexec(cmd_line, timeout_secs, should_log):
    """
    executes shell commands with the ability to time out if the command hangs
    """
    global delay_exit_code
    if should_log or logger.isEnabledFor(logging.DEBUG):
        logger.info(cmd_line)
    sys.stdout.flush()

    # set up signal handler for timeout
    signal.signal(signal.SIGALRM, alarm_handler)
    signal.alarm(timeout_secs)

    p = subprocess.Popen(cmd_line + " 2>&1", shell=True)
    try:
        stdoutdata, stderrdata = p.communicate()
    except Alarm:
        if sys.version_info >= (2, 6):
            p.terminate()
        raise RuntimeError("Command '%s' timed out after %s seconds" % (cmd_line, timeout_secs))
    rc = p.returncode
    if rc != 0:
        raise RuntimeError("Command '%s' failed with error code %s" % (cmd_line, rc))


def backticks(cmd_line):
    """
    what would a python program be without some perl love?
    """
    return subprocess.Popen(cmd_line, shell=True, stdout=subprocess.PIPE).communicate()[0]


def check_tool(executable, version_arg, version_regex):
    # initialize the global tool info for this executable
    tool_info[executable] = {}
    tool_info[executable]['full_path'] = None
    tool_info[executable]['version'] = None
    tool_info[executable]['version_major'] = None
    tool_info[executable]['version_minor'] = None
    tool_info[executable]['version_patch'] = None

    # figure out the full path to the executable
    full_path = backticks("which " + executable + " 2>/dev/null") 
    full_path = full_path.rstrip('\n')
    if full_path == "":
        logger.info("Command '%s' not found in the current environment" %(executable))
        return
    tool_info[executable]['full_path'] = full_path

    # version
    if version_regex == None:
        version = "N/A"
    else:
        version = backticks(executable + " " + version_arg + " 2>&1")
        version = version.replace('\n', "")
        re_version = re.compile(version_regex)
        result = re_version.search(version)
        if result:
            version = result.group(1)
        tool_info[executable]['version'] = version

    # if possible, break up version into major, minor, patch
    re_version = re.compile("([0-9]+)\.([0-9]+)(\.([0-9]+)){0,1}")
    result = re_version.search(version)
    if result:
        tool_info[executable]['version_major'] = int(result.group(1))
        tool_info[executable]['version_minor'] = int(result.group(2))
        tool_info[executable]['version_patch'] = result.group(4)
    if tool_info[executable]['version_patch'] == None or tool_info[executable]['version_patch'] == "":
        tool_info[executable]['version_patch'] = None
    else:
        tool_info[executable]['version_patch'] = int(tool_info[executable]['version_patch'])

    logger.info("  %-18s Version: %-7s Path: %s" % (executable, version, full_path))


def check_env_and_tools():
    
    # PATH setup
    path = "/usr/bin:/bin"
    if "PATH" in os.environ:
        path = os.environ['PATH']
    path_entries = path.split(':')
    
    # is /usr/bin in the path?
    if not("/usr/bin" in path_entries):
        path_entries.append("/usr/bin")
        path_entries.append("/bin")
       
    # fink on macos x
    if os.path.exists("/sw/bin") and not("/sw/bin" in path_entries):
        path_entries.append("/sw/bin")

    # add our own path (basic way to get to other Pegasus tools)
    my_bin_dir = os.path.normpath(os.path.join(os.path.dirname(sys.argv[0])))
    if not(my_bin_dir in path_entries):
        path_entries.append(my_bin_dir)
       
    # need LD_LIBRARY_PATH for Globus tools
    ld_library_path = ""
    if "LD_LIBRARY_PATH" in os.environ:
        ld_library_path = os.environ['LD_LIBRARY_PATH']
    ld_library_path_entries = ld_library_path.split(':')
    
    # if PEGASUS_HOME is set, prepend it to the PATH (we want it early to override other cruft)
    if "PEGASUS_HOME" in os.environ:
        try:
            path_entries.remove(os.environ['PEGASUS_HOME'] + "/bin")
        except Exception:
            pass
        path_entries.insert(0, os.environ['PEGASUS_HOME'] + "/bin")
    
    # if GLOBUS_LOCATION is set, prepend it to the PATH and LD_LIBRARY_PATH 
    # (we want it early to override other cruft)
    if "GLOBUS_LOCATION" in os.environ:
        try:
            path_entries.remove(os.environ['GLOBUS_LOCATION'] + "/bin")
        except Exception:
            pass
        path_entries.insert(0, os.environ['GLOBUS_LOCATION'] + "/bin")
        try:
            ld_library_path_entries.remove(os.environ['GLOBUS_LOCATION'] + "/lib")
        except Exception:
            pass
        ld_library_path_entries.insert(0, os.environ['GLOBUS_LOCATION'] + "/lib")

    os.environ['PATH'] = ":".join(path_entries)
    os.environ['LD_LIBRARY_PATH'] = ":".join(ld_library_path_entries)
    os.environ['DYLD_LIBRARY_PATH'] = ":".join(ld_library_path_entries)
    logger.info("PATH=" + os.environ['PATH'])
    logger.info("LD_LIBRARY_PATH=" + os.environ['LD_LIBRARY_PATH'])
    
    # irods requires a password hash file
    os.environ['irodsAuthFileName'] = os.getcwd() + "/.irodsA"
    


def mkdir(url):
    """
    creates a directory on a mounted file system
    """
    path = url.path
    if not(os.path.exists(path)):
        logger.debug("Creating local directory " + path)
        try:
            os.makedirs(path, 0755)
        except os.error, err:
            # if dir already exists, ignore the error
            if not(os.path.isdir(path)):
                raise RuntimeError(err)


def scp(url):
    """
    creates a directory using ssh
    """
    cmd = "/usr/bin/ssh"
    
    key = "SSH_PRIVATE_KEY_" + url.site
    if key in os.environ:
        os.environ["SSH_PRIVATE_KEY"] = os.environ[key]

    if "SSH_PRIVATE_KEY" in os.environ:
        cmd += " -i " + os.environ['SSH_PRIVATE_KEY']

    # split up the host into host/port components
    host = re.sub(':.*', '', url.host)
    port = "22"
    r = re.search(':([0-9]+)', url.host)
    if r:
        port = r.group(1)

    cmd += " -o PasswordAuthentication=no"
    cmd += " -o UserKnownHostsFile=/dev/null"
    cmd += " -o StrictHostKeyChecking=no"
    cmd += " -p " + port
    cmd += " " + host
    cmd += " '/bin/mkdir -p " + url.path + "'"
    myexec(cmd, 60, True)


def gsiftp(url):
    """
    create directories on gridftp servers
    """
 
    if tool_info['pegasus-gridftp']['full_path'] == None:
        raise RuntimeError("Unable to do gsiftp mkdir because pegasus-gridftp could not be found")
        
    key = "X509_USER_PROXY_" + url.site
    if key in os.environ:
        os.environ["X509_USER_PROXY"] = os.environ[key]
   
    # build command line for pegasus-gridftp
    cmd = tool_info['pegasus-gridftp']['full_path']
    cmd += " mkdir"

    # make output match our current log level
    if logger.isEnabledFor(logging.DEBUG):
        cmd += " -v"

    cmd += " -f"
    cmd += " -p"
    cmd += " " + url.url()

    try:
        myexec(cmd, 60, True)
    except:
        # pegasus-gridftp has a new stricter hostcert check - if it
        # fails, try globus-url-copy
        if tool_info['globus-url-copy']['full_path'] == None:
            raise RuntimeError("Unable to do gsiftp mkdir because globus-url-copy could not be found")
        cmd = tool_info['globus-url-copy']['full_path']
        cmd += " -create-dest"
        cmd += " file:///dev/null"
        cmd += " " + url.url() + "/.create-dir"
        myexec(cmd, 60, True)


def sshftp(url):
    """
    create directories on gridftp servers
    """
 
    if tool_info['globus-url-copy']['full_path'] == None:
        raise RuntimeError("Unable to do sshftp mkdir because globus-url-copy could not be found")

    # build command line for globus-url-copy
    cmd = tool_info['globus-url-copy']['full_path']
    # make output match our current log level
    if logger.isEnabledFor(logging.DEBUG):
        cmd += " -dbg"

    key = "SSH_PRIVATE_KEY_" + url.site
    if key in os.environ:
        os.environ["SSH_PRIVATE_KEY"] = os.environ[key]
  
    cmd += " -create-dest"
    cmd += " file:///dev/null"
    cmd += " " + url.url() + "/.create-dir"

    try:
        myexec(cmd, 60, True)
    except:
        raise RuntimeError("globus-url-copy command failed")


def irods_login():
    """
    log in to irods by using the iinit command - if the file already exists,
    we are already logged in
    """
    f = os.environ['irodsAuthFileName']
    if os.path.exists(f):
        return
    
    key = "irodsEnvFile_" + url.site
    if key in os.environ:
        os.environ["irodsEnvFile"] = os.environ[key]
    
    # read password from env file
    if not "irodsEnvFile" in os.environ:
        raise RuntimeError("Missing irodsEnvFile - unable to do irods transfers")
    password = None
    h = open(os.environ['irodsEnvFile'], 'r')
    for line in h:
        items = line.split(" ", 2)
        if items[0].lower() == "irodspassword":
            password = items[1].strip(" \t'\"\r\n")
    h.close()
    if password == None:
        raise RuntimeError("No irodsPassword specified in irods env file")
    
    h = open(".irodsAc", "w")
    h.write(password + "\n")
    h.close()
    
    cmd = "cat .irodsAc | iinit"
    myexec(cmd, 60*60, True)
        
    os.unlink(".irodsAc")


def irods(url):
    """
    irods - use the icommands to interact with irods
    """
    if tool_info['imkdir']['full_path'] == None:
        raise RuntimeError("Unable to do irods create dir because imkdir could not be found in the current path")

    # log in to irods
    try:
        irods_login()
    except Exception, loginErr:
        logger.error(loginErr)
        raise RuntimError("Unable to log into irods")

    cmd = "imkdir -p " + url.path
    myexec(cmd, 60, True)

            
def srm(url):
    """
    We suppoer two tools: lcg-utils and bestman srm. lcg-utils is preferred.
    """
    
    key = "X509_USER_PROXY_" + url.site
    if key in os.environ:
        os.environ["X509_USER_PROXY"] = os.environ[key]
    
    if tool_info['lcg-cp']['full_path'] != None:
        srm_lcgcp(url)
    elif tool_info['srm-mkdir']['full_path'] != None:
        srm_srmmkdir(url)
    else:
        raise RuntimeError("Unable to do srm mkdir becuase lcg-cp/srm-mkdir could not be found")


def srm_lcgcp(url):
    """
    implements mkdir using lcg-cp (there is no mkdir in the lcg utils, but cp creates dirs)
    """
    logger.info("Using lcg-cp as the LCG tools do not have a good mkdir")
    ts = time.time() 
    cmd = "lcg-cp -b -D srmv2 file:///bin/true %s/.empty.%f" %(url.url(), ts)
    myexec(cmd, 60, True)


def srm_srmmkdir(url):
    """
    implements recursive mkdir as srm-mkdir can not handle it
    """

    # if the directory exists, just return
    cmd = "srm-ls %s >/dev/null" %(url.url())
    try:
        myexec(cmd, 60, True)
        return
    except Exception, err:
        logger.info("Directory %s does not exist yet" % (url.path))

    # back down to a directory which exists
    one_up = url.parent_url()
    if one_up.path != "/":
        srm_srmmkdir(one_up)
            
    cmd = "srm-mkdir %s" %(url.url())
    myexec(cmd, 60, True)
    

def s3(url):
    """
    s3 - uses pegasus-s3 to interact with Amazon S3 
    """

    if tool_info['pegasus-s3']['full_path'] == None:
        raise RuntimeError("Unable to do S3 mkdir becuase pegasus-s3 could not be found")
    
    key = "S3CFG_" + url.site
    if key in os.environ:
        os.environ["S3CFG"] = os.environ[key]

    # extract the bucket part
    re_bucket = re.compile(r'(s3(s){0,1}://\w+@\w+/+[\w\-]+)')
    bucket = url.url()
    r = re_bucket.search(bucket)
    if r:
        bucket = r.group(1)
    else:
        raise RuntimeError("Unable to parse bucket: %s" % (bucket))

    # first ensure that the bucket exists
    cmd = "pegasus-s3 mkdir %s" %(bucket)
    try:
        myexec(cmd, 60, True)
    except Exception, err:
        logger.warn("mkdir failed - possibly due to the bucket already existing, so continuing...")


def gs(url):
    """
    gs - uses gsutil to interact with Google Storage 
    """

    if tool_info['gsutil']['full_path'] == None:
        raise RuntimeError("Unable to do Google Storage mkdir because gsutil could not be found")
    
    key = "BOTO_CONFIG_" + url.site
    if key in os.environ:
        os.environ["BOTO_CONFIG"] = os.environ[key]

    key = "GOOGLE_PKCS12_" + url.site
    if key in os.environ:
        os.environ["GOOGLE_PKCS12"] = os.environ[key]
        
    # we need to update the boto config file to specify the full
    # path to the PKCS12 file
    try:
        tmp_fd, tmp_name = tempfile.mkstemp(prefix="pegasus-transfer-", suffix=".lst")
        tmp_file = os.fdopen(tmp_fd, "w+b")
    except:
      raise RuntimeError("Unable to create tmp file for gs boto file")
    try:
        conf = ConfigParser.SafeConfigParser()
        conf.read(os.environ["BOTO_CONFIG"])
        conf.set("Credentials", "gs_service_key_file", os.environ["GOOGLE_PKCS12"])
        conf.write(tmp_file)
        tmp_file.close()
    except Exception, err:
        logger.error(err)
        raise RuntimeError("Unable to convert boto config file")

    # extract the bucket part
    re_bucket = re.compile(r'(gs://[\w-]+/)[/\w-]*')
    bucket = url.url()
    r = re_bucket.search(bucket)
    if r:
        bucket = r.group(1)
    else:
        raise RuntimeError("Unable to parse bucket: %s" % (bucket))

    # first ensure that the bucket exists
    cmd = "BOTO_CONFIG=%s gsutil mb %s" %(tmp_name, bucket)
    try:
        myexec(cmd, 60, True)
    except Exception, err:
        logger.warn("mkdir failed - possibly due to the bucket already existing, so continuing...")
        
    os.unlink(tmp_name)


def create_dir(url):
    """
    handles the creation of a directory
    """
    try:
        if tool_map.has_key(url.proto):
            tool = tool_map[url.proto]
            if tool == "mkdir":
                mkdir(url)
            elif tool == "scp":
                scp(url)
            elif tool == "gsiftp":
                check_tool("pegasus-gridftp", "", None)
                check_tool("globus-url-copy", "-version", "globus-url-copy: ([\.0-9a-zA-Z]+)")
                gsiftp(url)
            elif tool == "sshftp":
                check_tool("globus-url-copy", "-version", "globus-url-copy: ([\.0-9a-zA-Z]+)")
                sshftp(url)
            elif tool == "irods":
                check_tool("imkdir", "-h", "Version[ \t]+([\.0-9a-zA-Z]+)")
                irods(url)
            elif tool == "srm":
                check_tool("lcg-cp", "--version", "lcg_util-([\.0-9a-zA-Z]+)")
                check_tool("srm-mkdir", "-version", "srm-mkdir[ \t]+([\.0-9a-zA-Z]+)")
                srm(url)
            elif tool == "s3":
                check_tool("pegasus-s3", "help", None)
                s3(url)
            elif tool == "gs":
                check_tool("gsutil", "version", "gsutil version: ([\.0-9a-zA-Z]+)")
                gs(url)
            else:
                logger.critical("Error: No mapping for the tool '%s'" %(tool))
                myexit(1)
        else:
            logger.critical("Error: This tool does not know how to create a directory for %s://" % (url.proto))
            myexit(1)

    except RuntimeError, err:
        logger.critical(err)
        myexit(1)


def myexit(rc):
    """
    system exit without a stack trace - silly python
    """
    try:
        sys.exit(rc)
    except SystemExit:
        sys.exit(rc)



# --- main ----------------------------------------------------------------------------

# dup stderr onto stdout
sys.stderr = sys.stdout

# Configure command line option parser
prog_usage = "usage: %s [options]" % (prog_base)
parser = optparse.OptionParser(usage=prog_usage)
parser.add_option("-l", "--loglevel", action = "store", dest = "log_level",
                  help = "Log level. Valid levels are: debug,info,warning,error, Default is info.")
parser.add_option("-s", "--site", action = "store", dest = "site",
                  help = "Site name for the target site")
parser.add_option("-u", "--url", action = "store", dest = "url",
                  help = "URL for the directory to create")

# Parse command line options
(options, args) = parser.parse_args()
if options.log_level == None:
    options.log_level = "info"
setup_logger(options.log_level)
if options.url == None:
    logger.critical("Please specify the URL for the directory to create")
    myexit(1)

# Die nicely when asked to (Ctrl+C, system shutdown)
signal.signal(signal.SIGINT, prog_sigint_handler)

# check environment and tools
try:
    check_env_and_tools()
except Exception, err:
    logger.critical(err)
    myexit(1)

url = URL()
url.set_site(options.site)
url.set_url(options.url)

try:
    create_dir(url)
except Exception, err:
    logger.critical(err)
    logger.critical("Directory not created!")
    myexit(1)

logger.info("Directory created")

myexit(0)


