# @package      hubzero-mw2-common
# @file         container.py
# @author       Pascal Meunier <pmeunier@purdue.edu>
# @copyright    Copyright (c) 2016-2017 HUBzero Foundation, LLC.
# @license      http://opensource.org/licenses/MIT MIT
#
# Based on previous work by Richard L. Kennell and Nicholas Kisseberth
#
# Copyright (c) 2016-2017 HUBzero Foundation, LLC.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# HUBzero is a registered trademark of HUBzero Foundation, LLC.
#

"""
container.py: class to create, manipulate, and stop containers, and processes within
Designed for simfs support by OpenVZ
Used on execution hosts by the middleware service.  This class should encapsulate OpenVZ idiosyncrasies
and in theory could be replaced by another container.py file to handle other virtualization mechanisms
Some OpenVZ synonyms:
veid = containerID = ctid 
The display number in the database is the ctid
"""
import subprocess
import socket
import os
import stat
import sys
import grp
import time
import threading

from errors import MaxwellError
from constants import CONTAINER_K, VERBOSE, RUN_DIR
from log import log, log_exc
from user_account import User_account
DEBUG = False

def log_subprocess(p, info=None):
  """Communicate after Popen, and log any output"""
  (stdout, stderr) = p.communicate(info)
  if len(stderr) > 0:
    if p.returncode == 0:
      log("success:" + stderr)
    else:
      log("error:" + stderr)
  if len(stdout) > 0:
    log("output:" + stdout)

class Container():
  """A container: Virtual Private Server (VPS) uniquely identified by a veid.
  Note that new documentation uses "CTID" instead of veid: ConTainer's IDentifer (CTID)
  According to the OpenVZ Users Guide, CTIDs 0-100 are reserved and should not be used.
  OpenVZ only currently uses CTID 0 but recommends reserving CTID's 0-100 for non-use

  Paths used by OpenVZ  (see OpenVZ User's Guide Version 2.7.0-8 by SWsoft):

  ct_private  (e.g., "/vz/private/veid")
  This is a path to the Container private area where Virtuozzo Containers 4.0 keeps its private data.

  ct_root (e.g., "/vz/root/veid")
  This is a path to the Container root folder where the Container private area is mounted.

  vz_root
  This is a path to the Virtuozzo folder where Virtuozzo Containers program files are located.

  depends on script INTERNAL_PATH +"mergeauth"

"""

  def __init__(self, disp, machine_number, overrides={}):
    self.k = CONTAINER_K
    self.k.update(overrides)
    self.disp = disp
    self.vncpass = None
    # 255*255 = 65025
    if disp < 1:
      raise MaxwellError("Container ID must be at least 1")
    if disp > int(self.k["MAX_CONTAINERS"]) or disp < 1:
      raise MaxwellError("Container ID must be less than %d" % int(self.k["MAX_CONTAINERS"]))
    self.veid = disp + self.k["VZOFFSET"]
    #if self.veid <= 100:
    #  raise MaxwellError(
    #    "Container IDs (CTIDs, VEIDs) 0-100 are reserved (got %d).  Is vzoffset set correctly?"\
    #    % self.veid)
    self.vz_private_path = "%s/private/%d" % (self.k["VZ_PATH"], self.veid)
    self.vz_root_path = "%s/root/%d" % (self.k["VZ_PATH"], self.veid)
    #
    # to avoid the Invalid MIT-MAGIC-COOKIE error, the IP address of containers must be unique
    # over an entire hub (not just over an execution host).
    # xauth list shows the cookies a user has.  A cookie in use can get overwritten by a new
    # one if there's an IP address collision
    # so, add a "machine number" to the IP address
    #
    # use disp instead of self.veid to calculate IP address to avoid losing VZOFFSET IP addresses
    if disp >= 1024:
      log("container ID %d too high, xauth collisions possible" % disp)
    if (machine_number == 0):
      try:
        machine_number = int(socket.gethostbyname(socket.getfqdn()).split('.')[3])
      except StandardError:
        raise MaxwellError("machine_number not set and unable to derive one from IP address.")
    if (machine_number == 0):
      raise MaxwellError("unable to set machine_number")
    #if VERBOSE:
    #  log("machine number is %d" % machine_number)

    digit = disp/255 + (machine_number % 64) *4
    # use %255 for last byte to avoid broadcast address
    self.veaddr = self.k["PRIVATE_NET"] % (digit % 256, disp % 255)
    # previously:
    #self.veaddr = "10.%d.%d.%d" % (machine_number, self.veid/100, self.veid % 100)


  def __printvzstats(self, err):
    """Print statistics for an OpenVZ VPS."""
    f = open("/proc/vz/vestat")
    for line in f:
      arr = line.split()
      if len(arr) < 5:
        continue
      if arr[0] == "VEID":
        continue
      if DEBUG:
        log("Checking /proc/vz/vestat veid = %s\n" % arr[0])
      try:
        if int(arr[0]) == self.veid:
          # Since VEs are pre-created, this is NOT the real time.
          # Let the middleware host compute this time.
          #log("real %f\n" % (int(arr[4])/1000.0))
          err.write("user %f\n" % (int(arr[1])/1000.0))
          err.write("sys %f\n" % (int(arr[3])/1000.0))
          break
      except ValueError:
        log("can't convert to integer, continuing")
    f.close()
    flag_print = False
    for line in open("/proc/user_beancounters"):
      if line.find(":") > 0:
        parts = line.split(":")
        if parts[0].strip() == "Version":
          continue
        if flag_print:
          break
        if int(parts[0].strip()) == self.disp:
          flag_print = True
          err.write("resource                     held              maxheld              barrier                limit              failcnt\n")
          err.write(parts[1].strip() + "\n")
      else:
        if flag_print:
          err.write(line.strip() + "\n")

  def __etc_passwd(self, user, account, groups, err):
    """Add user info to /etc/passwd and /etc/group"""
    etc_passwd = open(self.vz_root_path + "/etc/passwd", "a")
    etc_shadow = open(self.vz_root_path + "/etc/shadow", "a")
    etc_group = open(self.vz_root_path + "/etc/group", "a")

    if 'apps' in groups:
      # add the apps user and edit the sudoers file to allow su to apps
      apps_user = User_account('apps')
      err.write(apps_user.passwd_entry())
      etc_passwd.write(apps_user.passwd_entry())
      etc_passwd.write("\n")
      etc_shadow.write(apps_user.shadow_entry())
      etc_passwd.write("\n")
      open(self.vz_root_path + "/etc/sudoers", 'a', 0440).write("%apps           ALL=NOPASSWD:/bin/su - apps\n")
      apps_groups = apps_user.groups()
    else:
      apps_groups = []

    # write the /etc/passwd entry of the user last so we can look that up in firewall_readd.py
    err.write(account.passwd_entry())
    err.write("\n")
    etc_passwd.write(account.passwd_entry())
    err.write("\n")
    etc_shadow.write(account.shadow_entry())
    err.write("\n")

    # Add CMS groups to the /etc/group file in the container/VEID...
    # normally CMS gids are above 500 
    # gids below 500 could be system groups and should be left alone, except for fuse.  See later
    for g in groups:
      try:
        groupinfo = grp.getgrnam(g)
        gname = groupinfo[0]
        gid = groupinfo[2]
        if gid > 500:
          # copy all group info as is if gid > 500
          if g in apps_groups:
            #  support su to apps.  Create all the groups that user apps belongs to
            etc_group.write("%s:x:%d:%s,%s\n" % (gname, gid, user, 'apps'))
          else:
            etc_group.write("%s:x:%d:%s\n" % (gname, gid, user))
      except EnvironmentError:
        pass

  def update_resources(self, session):
    """Add contents to resources file.  Why couldn't it have been put there when it was first
    created?"""
    homedir = self.k["HOME_DIR"]+"/"+'anonymous'
    rpath = "%s/%s/data/sessions/%s/resources" % (self.vz_root_path, homedir, session)
    try:
      rfile = open(rpath,"a+")
      # Read data from command line and write to file, until an empty string is found
      while 1:
        line = sys.stdin.readline()
        if line == "":
          break
        rfile.write(line)

      rfile.close()
    except OSError:
      raise MaxwellError("Unable to append to resource file.")

  def create_anonymous(self, session, params):
    """Add an "anonymous" user in the container"""
    homedir = self.k["HOME_DIR"]+"/"+'anonymous'
    args = ['/usr/sbin/vzctl', 'exec2', str(self.veid), 'adduser', '--uid', '1234']
    args += ['--disabled-password', '--home', homedir, '--gecos', '"anonymous user"', 'anonymous']
    subprocess.check_call(args)

    rdir = "%s%s/data/sessions/%s" % (self.vz_root_path, homedir, session)
    if VERBOSE:
      log("creating " + rdir)
    os.makedirs(rdir)
    os.chown(rdir,  1234, 1234)
    os.chown("%s%s/data/sessions" % (self.vz_root_path, homedir),  1234, 1234)
    os.chown("%s%s/data/" % (self.vz_root_path, homedir),  1234, 1234)
    rfile = open(rdir+"/resources", "w")
    rfile.write("sessionid %s\n" % session)
    rfile.write("results_directory %s/data/results/%s\n" % (homedir, session))
    os.fchown(rfile.fileno(), 1234, 1234)
    rfile.close()
    if VERBOSE:
      log("setup anonymous session directory and resources in session %s" % (session))
    if params is not None and params != "":
      import urllib2
      params_path = rdir + "/parameters.hz"
      pfile = open(params_path, "w")
      pfile.write(urllib2.unquote(params).decode("utf8"))

  def __child_unix_command(self, user, session_id, timeout, command, params, sesslog_path):
    """child
      1. Setup the environment inside the container: permissions, password, group files,
      2. Setup the firewall rules on the host
      3. Setup X server authentication.  Call xauth so we're allowed to connect to the X server
      4. Invoke the command within the container
      5. Calculate time stats
      6. Restore the firewall rules
    """

    err = open(sesslog_path + ".err", 'a', 0)
    out = open(sesslog_path + ".out", 'a', 0)
    err.write("Starting command '%s' for '%s' with timeout '%s'\n" % (command, user, timeout))

    if user == "anonymous":
      if self.k["ANONYMOUS"]:
        # do not mount /home.
        # do not make any LDAP calls.
        account = User_account(user, self.k)
        groups = []
      else:
        raise MaxwellError("Anonymous user session not supported")
    else:
      if self.k["ANONYMOUS"]:
        # mount script already mounted this if we don't support anonymous sessions
        self.__root_mount("/home", "rw,nodev,nosuid,noatime")
      # Setup the environment inside the container: permissions, password, group files
      err.write("Setup the environment inside the container: permissions, password, group files\n")
      account = User_account(user)
      err.write("got user account info\n")

      # write group entries
      # Get a list of the supplementary groups...
      groups = account.groups()
      err.write("got groups\n")
      try:
        self.__etc_passwd(user, account, groups, err)
      except StandardError, exc:
        # cleanup iptable rules
        err.write("writing /etc/passwd failed due to exception:'%s'\n" % exc)
        err.write("Exit_Status: 2\n")
        err.close()
        os._exit(2)

      err.write("wrote /etc/passwd\n")

    # groups like "fuse" may pre-exist inside containers with a different gid than on the hub
    # just appending a new line to /etc/group could create conflicting definitions
    # example: vhub.org.def:@define MW_CONTAINER_GROUPS '"fuse", "public"'
    # Assumes a Debian Linux container, will fail with RedHat because the command is named useradd instead
    err.write("calling adduser\n")
    for defgroup in self.k["DEFAULT_GROUPS"]:
      p = subprocess.Popen(['/usr/sbin/vzctl', 'exec2', str(self.veid), 'adduser', user, defgroup], stdout = err, stderr = err)
      p.communicate()

    # 1b. /apps bind mount conditional on apps group membership
    # NOTE: the mount script in /etc/vz/conf takes care of mounting apps when
    # APPS_READONLY is false!  Don't try to mount it here again, it will fail
    # This requires xvnc to not be in /apps, but in /usr/sbin
    if self.k["APPS_READONLY"]:
      err.write("apps readonly\n")
      if 'apps' in groups:
        mount_opt = 'rw,acl,noatime'
      else:
        mount_opt = 'ro,acl,noatime'
        log("mounting apps READONLY")
      self.__root_mount("/apps", mount_opt)

    # User-based mounts
    # uses bind mounts from an already mounted filesystem
    if self.k["USER_MOUNT"]:
      for mount_pt in self.k["USER_MOUNT_POINTS"]:
        log("mounting %s" % (mount_pt))
        # mount_pt must already exist
        if not os.path.exists(mount_pt):
          raise MaxwellError("Mount point '%s' does not exist", mount_pt)
        source_mount = mount_pt + user
        # check if source exists
        if not os.path.exists(source_mount):
          # create it as the user, not root
          args = ['/bin/su', user, '-c', "mkdir -m 0700 " + source_mount]
          p = subprocess.Popen(args, stderr = err, stdout = err)
          p.communicate()
          if p.returncode != 0:
            raise MaxwellError("Could not create '%s'" % (source_mount))
        if not os.path.exists(self.vz_root_path + source_mount):
          args = ['/bin/mkdir', '-m', '0700', '-p', self.vz_root_path + source_mount]
          p = subprocess.Popen(args, stderr = err, stdout = err)
          p.communicate()
          if p.returncode != 0:
            raise MaxwellError("Could not create '%s'" % (self.vz_root_path + source_mount))
        self.__root_mount(source_mount, 'rw,noatime')

    if self.k["PROJECT_MOUNT"]:
      for g in groups:
	if g[0:3] == "pr-":
	  source_mount = self.k["PROJECT_PATH"] + g[3:]
          if not os.path.exists(source_mount):
	    continue
          if not os.path.exists(self.vz_root_path + source_mount):
            args = ['/bin/mkdir', '-m', '0700', '-p', self.vz_root_path + source_mount]
            p = subprocess.Popen(args, stderr = err, stdout = err)
            p.communicate()
            if p.returncode != 0:
              raise MaxwellError("Could not create '%s'" % (self.vz_root_path + source_mount))
          self.__root_mount(source_mount, 'rw,noatime')
	  
    # Deprecated -- SSHFS-based user mounts -- Deprecated
    if self.k["SSHFS_MOUNT"]:
      # create an SSH connection for each container
      for mount_pair in self.k["SSHFS_MOUNT_POINTS"]:
        # array of remote, local info
        remote_path = mount_pair[0] + user
        container_path = self.vz_root_path + mount_pair[1] + user
        manage_path = mount_pair[1] + user
        log("mounting %s at %s" % (remote_path, container_path))
        if not os.path.exists(manage_path):
          # create it as the user, not root
          args = ['/bin/su', user, '-c', "mkdir -m 0700 " + manage_path]
          p = subprocess.Popen(args, stderr = err, stdout = err)
          p.communicate()
          if p.returncode != 0:
            # possible race condition if user starts two sessions quickly for the first time
            raise MaxwellError("Could not create '%s'" % (manage_path))
        if not os.path.exists(container_path):
          args = ['/bin/mkdir', '-m', '0700', '-p', container_path]
          p = subprocess.Popen(args, stderr = err, stdout = err)
          p.communicate()
          if p.returncode != 0:
            raise MaxwellError("Could not create '%s'" % (self.vz_root_path + source_mount))
        args = ['/usr/bin/ssh', '-o', 'intr', '-o', 'sync_read', '-o', 'IdentityFile=%s' % self.k["SSHFS_MOUNT_KEY"], '-o', 'allow_other', remote_path, container_path]
        p = subprocess.Popen(args, stderr = err, stdout = err)
        p.communicate()

    # 2. Setup the firewall rules on the host
    err.write("firewall rules\n")
    self.firewall_by_group(groups, 'add')

    # Wrap the following in a "try" to reverse the iptables state in case of an exception
    try:
      count = 0
      # 3. Setup X server authentication
      # Use the mergeauth script wrapper to xauth.  Calling xauth directly can lead to
      # "Massive race conditions with the .Xauthority file on multiple systems shared with NFS."
      # cmd = "xauth -v source /Xvnc/authlist*"
      # args = ['/usr/sbin/vzctl', 'exec2', str(self.veid), 'su', user, '-s', '/bin/sh', '-c',
      #  '\"cd; %s %s\"' % (env_cmd, cmd)]
      args = ['/usr/sbin/vzctl', 'exec2', str(self.veid), 'su', user, '-s', '/bin/dash', '-c',
        self.k["INTERNAL_PATH"] + 'mergeauth']
      if VERBOSE:
        log("command is %s\n" % " ".join(args))
      while True:
        p = subprocess.Popen(args, stderr = err, stdout = err)
        p.communicate()
        if p.returncode == 0:
          break
        time.sleep(0.5)
        count = count+1
        if count > self.k["XAUTH_RETRIES"]:
          err.write("Unable to extract xauth cookie for %d\n" % self.veid)
          raise MaxwellError() # cleanup rules

      # 4. Invoke the command within the container
      # Actual application start!
      # In the command below, use "time" to get the runtime of the command.
      # The user and sys cputimes will be inaccurate, but will be overridden
      # by printvzstats().  We use it to get the clock time of the command.
      #
      # Note: time is a built-in shell command, there is no separate binary installed
      # so the whole thing has to be passed to a shell for interpretation...
      # when shell=True, args needs to be a string for "time" to be interpreted as the built-in
      #
      # Also, the environment needs to be passed inside the container.  Setting env= in the
      # subprocess call only sets the environment for the vzctl command.
      #
      # In addition, the whole "su..." command has to be passed inside quotes, otherwise it fails.
      #  Perhaps some of the switches get interpreted by an earlier command than intended? time?
      env_cmd = " ".join(account.env(session_id, timeout, params) + ["DISPLAY=\"%s:0.0\"" % (self.veaddr)])
      try:
        env_cmd += " " + self.k["EXTRA_ENV_CMD"]
      except KeyError:
        pass
      args = ['/usr/sbin/vzctl', 'exec2', str(self.veid), 'su', user, '-s', '/bin/dash', '-c',
         '\"cd; %s %s\"' % (env_cmd, command)]
      if VERBOSE:
        log("command is %s\n" % " ".join(args))
      start_time = time.time()
      # subprocess.call(args)
      # Python docs: "The data read is buffered in memory, so do not use this method if the data size is large or unlimited."
      # Problem:  If the tool is misbehaving and has GBs of output, then root processes start
      # consuming GBs of memory!  Todo: find alternative to calling subprocess.communicate while capturing output
      p = subprocess.Popen(args, stderr = err, stdout = err)
      p.communicate()
      end_time = time.time()

      # 5. Calculate time stats
      if VERBOSE:
        err.write("Processing stats")
      err.write("real\t%f\n" % (end_time - start_time))
      self.__printvzstats(err)
      # everything went OK
      err.write("Exit_Status: 0\n")
      err.close()
    except StandardError, exc:
      # cleanup iptable rules
      err.write("tool session failed due to exception:'%s'\n" % exc)
      err.write("Exit_Status: 2\n")
      err.close()

    # 6. Restore the firewall
    self.firewall_by_group(groups, 'delete')
    # self.firewall(False, False, False, False)

    os._exit(0)

  def invoke_unix_command(self, user, session_id, timeout, command, params, sesslog_path):
    """Start a tool in the container.
     Child will invoke the command.
     Parent will log the exit status.  When we'll return, other things will happen (notify).
     When we are called, the log file has been closed and we're a dissociated process.
     Stdout and stderr have been redirected to files, so we use that for logging.
    user: string
    session_id: string (int+letter)
    timeout: int
    command: string
    """
    try:
      pid = os.fork()
    except OSError, ose:
      log("unable to fork: '%s', exiting" % ose)
      sys.exit(1)

    if pid == 0:
      self.__child_unix_command(user, session_id, timeout, command, params, sesslog_path)
    # parent
    try:
      log("Waiting for %d" % pid)
      os.waitpid(pid, 0)
    except OSError:
      pass
    return 0

  def screenshot(self, user, sessionid):
    """Support display of session screenshots for app UI.  On error, do not produce an exception."""
    account = User_account(user)
    destination = "%s/data/sessions/%s/screenshot.png" % (account.homedir, sessionid)
    if os.path.isdir("%s/data/sessions/%s" % (account.homedir, sessionid)):
      vz_env = account.env(sessionid, 8000, False)
      env_cmd = " ".join(vz_env + ["DISPLAY=\"%s:0.0\"" % (self.veaddr)])
      command = "/usr/bin/screenshot %s" % destination
      args = ['/usr/sbin/vzctl', 'exec2', str(self.veid), 'su', user, '-s', '/bin/dash', '-c',
        '\"cd; %s %s\"' % (env_cmd, command)]
      p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
      log_subprocess(p)

  def __root_mount(self, point, perm):
    """Mount a directory to make it available to OpenVZ containers.  By mounting under root,
    containers can't modify the original. """
    mntpt = self.vz_root_path + point
    if os.path.isdir(mntpt):
      try:
        os.rmdir(mntpt)
      except OSError, exc:
        log("exception:'%s'\n" % exc)
        raise MaxwellError("'%s' already exists and is probably already mounted.  Giving up."
          % mntpt)
    os.mkdir(mntpt)
    if VERBOSE:
      log("Created %s" % mntpt)

    # -n: Mount without writing in /etc/mtab.
    args = ["/bin/mount", "-n", "--bind", point, '-o', perm, mntpt]
    p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    log_subprocess(p)
    if p.returncode != 0:
      try:
        os.rmdir(mntpt)
      except OSError:
        pass
      raise MaxwellError("Could not mount %s in '%d'" % (point, self.veid))

  def firewall_by_group(self, groups, operation='add'):
    """groups is an array of groups the user belongs to
    FW_GROUP_MAP is an array of ["group_name", net_cidr, portmin, portmax]
    where net_cidr is something like 128.46.19.160/32
    """
    rule_start = [self.k["FW_CHAIN"], '-i', 'venet0', '-s', self.veaddr]
    for g in self.k["FW_GROUP_MAP"]:
      if g[0] in groups:
        (net_cidr, portmin, portmax) = g[1:]
        if net_cidr == "":
          # block access to hubzero networks due to firewall rule exceptions
          hostpart = ['!', '-d', self.k["MW_PROTECTED_NETWORK"]]
        else:
          # hostpart = ['-d', socket.gethostbyname(host)]
          hostpart = ['-d', net_cidr]
        if portmin == 0:
          # all ports
          portpart = []
        else:
          if portmax == 0:
            # single port
            portpart = ['-p', 'tcp', '--dport', '%s' % portmin]
          else:
            portpart = ['-p', 'tcp', '--dport', '%s:%s' % (portmin, portmax)]
        fwd_rule = rule_start + hostpart + portpart + ['-j', 'ACCEPT']
        if DEBUG:
          log(fwd_rule)
        if operation == 'add':
          subprocess.check_call(['/sbin/iptables', '-A'] + fwd_rule)
        else:
          subprocess.check_call(['/sbin/iptables', '-D'] + fwd_rule)

  def set_ipaddress(self):
    """Set IP address of container"""
    status = os.system("vzctl set %d --ipadd %s" % (self.veid, self.veaddr))
    if status != 0:
      raise MaxwellError("Bad status for vzctl set %d --ipadd %s: %d" %
        (self.veid,self.veaddr,status))

  def umount(self):
    """unmount container directories"""
    self.__openVZ_umount(self.vz_root_path)
    self.__openVZ_umount(self.vz_private_path)


  def __openVZ_umount(self, fs_path):
    """If given path exists, call ctid.umount for that container
    The ctid.umount script is part of the shutdown process of a container.  This indicates
    an unclean shutdown.
    """
    if os.path.exists(fs_path):
      # tell openVZ to unmount that container's file system
      # internally, that umount script doesn't use abolute paths so we need
      # to set the PATH
      v_env = {"VEID" : str(self.veid), "PATH": "/bin:/usr/bin"}
      args = ["%s/%s" % (self.k["VZ_CONF_PATH"], self.k["OVZ_SESSION_UMOUNT"])]
      p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE, env = v_env)
      log_subprocess(p)
      if p.returncode != 0:
        raise MaxwellError("Could not unmount container '%d'" % (self.veid))
      if self.k["APPS_READONLY"]:
        # we mount /apps rw or ro depending on group membership of user
        try:
          os.rmdir(fs_path + '/apps')
        except OSError:
          pass
      try:
        os.rmdir(fs_path)
      except OSError:
        pass
      if os.path.exists(fs_path):
        raise MaxwellError("'%s' still exists.  Giving up." % fs_path)

  def create_xstartup(self):
    """xstartup is a file we create to make VNC happy.  RUN_DIR is something like '/usr/lib/mw'.
    VNC is started inside containers, and /usr is mounted inside."""
    x_path = RUN_DIR + "/xstartup"
    try:
      lock_stat = os.lstat(x_path)
    except OSError:
      # does not exist
      try:
        xstartup = os.open(x_path, os.O_CREAT | os.O_WRONLY | os.O_NOFOLLOW, 0700)
        os.write(xstartup, "#!/bin/bash\n")
        os.close(xstartup)
        lock_stat = os.lstat(x_path)
      except OSError:
        raise MaxwellError("Unable to create '%s'." % x_path)

    # check that it has the expected permissions and ownership
    # check that we are the owner and that others can't write
    if lock_stat[stat.ST_MODE] & stat.S_IWOTH:
      raise MaxwellError("'%s' has unsafe permissions.  Remove write permissions for others"
        % x_path)

    usr_id = lock_stat[stat.ST_UID]
    if usr_id != os.geteuid():
      raise MaxwellError("'%s' has incorrect owner: %s" % (x_path, usr_id))

  def read_passwd(self):
    """VNC password is 8 bytes, we want the version encrypted for VNC, not the
    encoded version for web use
    """
    self.vncpass = sys.stdin.read(8)
    return

  def stunnel(self):
    """We handle tunnels and forwards here, to make the inside of containers visible to the outside.
    Containers are mapped to port ranges using the dispnum (a.k.a. CTID a.k.a. veid).
    Nick says:
      socat starts fewer processes than stunnel per connection,
      doesn't require the netstat check, and doesn't seem to have the ssl session
      corruption problem that my vncproxy module seems to be triggering in stunnel.
    """
    in_port = self.veid + self.k["STUNNEL_PORTS"] # e.g., 4000 + display
    remote = '%s:%d' % (self.veaddr, self.k["PORTBASE"])
    # kill anything listening on the stunnel port; should have been killed when stopping container
    # to avoid race condition with TCP_WAIT state.
    p = subprocess.Popen(['fuser', '-n', 'tcp', '%d' % in_port, '-k', '-9'], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    # retrieve but ignore error message which is generated if there was no process to kill
    (stdout, stderr) = p.communicate()
    if p.returncode == 0:
      # a process was killed, wait a bit for TCP_WAIT state to clear
      time.sleep(1)
    if self.k["TUNNEL_MODE"][:5] == 'socat':
      args = ["socat"]
      args.append("OPENSSL-LISTEN:%d,cert=%s,fork,verify=0" % (in_port, self.k["PEM_PATH"]))
      args.append("tcp4:%s" % remote)
      if len(self.k["TUNNEL_MODE"]) > 5:
        args += ["-d", "-d", "-d"]
      log(" ".join(args))
      process = subprocess.Popen(
        args,
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE
      )
      (stdout, stderr) = process.communicate()
      if process.returncode != 0:
        log("Can't start ssl socat (it's probably already running): %s" % stderr)

      if VERBOSE:
        log("forwarder started: %s" % stdout)

    else:
      if self.k["TUNNEL_MODE"] == 'stunnel4':
        # stunnel4 will read configuration from a pipe.
        # reading from stdin causes the first connection to fail
        # we're going to create a pipe and fork because Popen is too limited
        # file descriptors r, w for reading and writing
        r, w = os.pipe() 
        processid = os.fork()
        if processid:
          # This is the parent process
          # note: the parent can't exec otherwise the webserver will think the container is done starting
          os.close(r)
          w = os.fdopen(w, 'w')
          w.write("cert = %s\n" % self.k["PEM_PATH"])
          w.write("accept = %d\n" % in_port)
          w.write("connect = %s\n" % remote)
          w.write("debug = 3\n")
          # RedHat has FIPS version, this errors out on Debian.
          w.write("fips=no\n")
          w.write("output=/var/log/stunnel\n")
          w.write("[stunnel3]\n")
          w.close()
          (pid, status) = os.wait()
          if status == 0:
            log("stunnel4 os.wait pid=%d, status=%d." %(pid, status))
          else:
            raise MaxwellError("stunnel error! pid=%d, status=%d." %(pid, status))
        else:
          # This is the child process, will read from r
          os.close(w)
          os.execl("/usr/bin/stunnel", "/usr/bin/stunnel", "-fd", "%d" % r)
          raise MaxwellError("unable to execute /usr/bin/stunnel")
      else:
        # stunnel3 for Debian
        # status = os.system("stunnel -d %d -r %s:%d -p %s" %
        #         (4000+self.disp, self.veaddr, 5000,  self.k["PEM_PATH"]))
        # -d: daemon mode
        # -r [host:]port    connect to remote service
        # , '-D', '7' to increase debug level to 7
        args = ["stunnel", '-D', '4', '-d', str(in_port), '-r', remote, '-p',  self.k["PEM_PATH"]]
        log(" ".join(args))
        subprocess.check_call(args)

      if VERBOSE:
        log("stunnel started for %d" % self.veid)

    # Start a forwarder to make the display look external.
    # This is only for backward-compatibility.
    #os.system("socat tcp4-listen:%d,fork,reuseaddr,linger=0 tcp4:%s:5000 > /dev/null 2>&1 < /dev/null &" % (5000+ self.veid, self.veaddr))

  def delete_confs(self):
    """Get rid of these links if they exist."""
    for ext in ['conf', 'mount', 'umount']:
      try:
        os.unlink("%s/%d.%s" % (self.k["VZ_CONF_PATH"], self.veid, ext))
      except EnvironmentError:
        if DEBUG:
          log("File %s/%d.%s was already deleted or missing" %
            (self.k["VZ_CONF_PATH"], self.veid, ext))

    # stop quotas if they are already running
    # vzquota off 194
    # vzquota drop 194
    # this operation can fail if the operations are still ongoing; that's fine.
    subprocess.call(['/usr/sbin/vzquota', 'off', '%d' % self.veid], stderr=open('/dev/null', 'w'))
    # drop safely removes the quota file -- this file can cause problems,
    # e.g., when container template is changed
    subprocess.call(['/usr/sbin/vzquota', 'drop', '%d' % self.veid], stderr=open('/dev/null', 'w'))

  def create_confs(self):
    """In directory /etc/vz/conf, create the symlinks to the mount, unmount scripts and
    the configuration file.  The mount script is called when starting the VE (container). """
    try:
      os.symlink("%s/%s" % (self.k["VZ_CONF_PATH"], self.k["OVZ_SESSION_CONF"]),
        "%s/%d.conf" % (self.k["VZ_CONF_PATH"], self.veid))
      os.symlink("%s/%s" % (self.k["VZ_CONF_PATH"], self.k["OVZ_SESSION_MOUNT"]),
        "%s/%d.mount" % (self.k["VZ_CONF_PATH"], self.veid))
      os.symlink("%s/%s" % (self.k["VZ_CONF_PATH"], self.k["OVZ_SESSION_UMOUNT"]),
        "%s/%d.umount" % (self.k["VZ_CONF_PATH"], self.veid))
    except EnvironmentError:
      raise MaxwellError("Unable to create OpenVZ symlinks")

  def start_filexfer(self):
    """Start a socat forwarder for filexfer.  We never kill it.
       If we can't start one, that means there's already one running."""
    port = self.veid + self.k["FILEXFER_PORTS"]
    os.system("socat tcp4-listen:%d,fork,reuseaddr,linger=0 tcp4:%s:%d > /dev/null 2>&1 &"
      % (port, self.veaddr, port))

  def start(self, geom):
    """ start a container.
    Have /usr be symlink at the beginning (from setup_template), then remove it to put a mount
    Setup a lock directory that will be erased by the start process when it's done
    this functionality appears to be duplicated by the .mount scripts in /etc/vz/conf.
    We wait for the lock to be removed, to indicate that the mount script has finished.  This is
    not an access lock.
    """
    lock_dir = "%s/lock/mount.%d.lock" % (self.k["VZ_PATH"], self.veid)
    if not os.path.exists(lock_dir):
      os.mkdir(lock_dir)
      if VERBOSE:
        log("Created %s" % lock_dir)
    else:
      if VERBOSE:
        log("Already existed: %s" % lock_dir)
    start_time = time.time()
    # extremely verbose: args = ["vzctl", "--verbose", "start", str(self.veid)]
    args = ["vzctl", "start", str(self.veid)]
    p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    log_subprocess(p)
    if p.returncode != 0:
      raise MaxwellError("Can't start container '%d'" % (self.veid))
    # now sleep until the lock is gone
    while os.path.exists(lock_dir):
      time.sleep(1)
      if time.time() - start_time > 60:
        raise MaxwellError("Timed out waiting for container to start")
    end_time = time.time()
    # log how long we waited
    log ("vzctl start time: %f seconds" % (end_time - start_time))

    # replace the symlink with a mount
    os.unlink(self.vz_private_path + "/usr")

    # If vz/root mount is done after the call to start, without a symlink in place, we get:
    # bash: line 318: awk: command not found
    # ERROR: Can't change file /etc/hosts

    # If vz/root mount is done before the call to start, we get:
    # error 32
    # mount: special device /vz/root/257/.root/usr does not exist
    #
    # if we try private instead of root, we get:
    # mount: special device /vz/private/261/.root/usr does not exist
    #
    # symlink can't be left alone due to bug in gcc;  mounting is needed.
    #
    # check mount point exists or create it
    usr_mnt = self.vz_root_path + "/usr"
    if not os.path.exists(usr_mnt):
      os.mkdir(usr_mnt)

    # mount --bind olddir newdir
    # -n: Mount without writing in /etc/mtab.
    args = ["/bin/mount", "-n", "--bind", self.vz_root_path + "/.root/usr", usr_mnt]
    p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    log_subprocess(p)
    if p.returncode != 0:
      raise MaxwellError("Can't bind mount .root/usr in '%d'" % (self.veid))

    if VERBOSE:
      log("vzctl exec2 %d %s %s 0 %s" %\
          (self.veid, self.k["INTERNAL_PATH"]+'startxvnc', self.veaddr, geom))
    args = ["vzctl", "exec2", str(self.veid)]
    args += [self.k["INTERNAL_PATH"] + 'startxvnc', self.veaddr, '0', geom]
    process = subprocess.Popen(
      args,
      stdin = subprocess.PIPE,
      stdout = subprocess.PIPE,
      stderr = subprocess.PIPE
    )
    (stdout, stderr) = process.communicate(self.vncpass)
    if process.returncode != 0:
      raise MaxwellError("Unable to start internal Xvnc server: %s%s" %(stdout, stderr))
    elif VERBOSE:
      log(stdout)
      end_time3 = time.time()
      # log how long we waited
      log ("startxvnc call took: %f seconds" % (end_time3 - end_time))

  def resize(self, geometry):
    """Change XVNC geometry on the fly, after the container has started."""
    (width, height) = geometry.split('x')
    args = ["vzctl", "exec2", str(self.veid), self.k["INTERNAL_PATH"] + 'hzvncresize']
    args += ['-a', '/var/run/Xvnc/passwd-%s:0' % self.veaddr, width, height ]
    process = subprocess.Popen(
      args,
      stdin = subprocess.PIPE,
      stdout = subprocess.PIPE,
      stderr = subprocess.PIPE
    )
    (stdout, stderr) = process.communicate()
    if process.returncode != 0:
      raise MaxwellError("Unable to change Xvnc geometry: %s%s" %(stdout, stderr))
    elif VERBOSE:
      log(stdout)

  def setup_template(self):
    """Setup symlinks and mount points for OpenVZ container.
        usr is problematic, some versions of gcc don't work with a symlink.
        so even though nanohub worked with a symlink, we're going to try to
        make usr a mount point instead.
        The mount point sometimes generates these:
      # bash: line 318: awk: command not found
      #  ERROR: Can't change file /etc/hosts
    """
    os.makedirs(self.vz_root_path, mode=0755)
    if VERBOSE:
      log("created directory " + self.vz_root_path)
    os.makedirs(self.vz_private_path, mode=0755)
    if VERBOSE:
      log("created directory " + self.vz_private_path)
    # for link in ['bin', 'lib', 'sbin', 'lib64', 'usr']: equivalent to "template copy -a" method
    # also link 'emul', 'lib32' to support 32-bit binaries
    for link in ['bin', 'lib', 'sbin', 'emul', 'lib32', 'lib64', 'usr', 'opt']:
      path = self.vz_private_path + "/" + link
      # treating usr as a mount point can generate these errors:
      # bash: line 318: awk: command not found
      #  ERROR: Can't change file /etc/hosts
      if os.path.lexists(path):
        log("%s already exists!" % path)
      else:
        os.symlink(".root/%s" % link, path)

    for vzdir in ['.root', 'home', 'mnt', 'proc', 'sys']:
      os.mkdir(self.vz_private_path + "/" + vzdir)
    if VERBOSE:
      log("template setup")

  def __delete_root(self):
    """Delete container root path.  If it doesn't exist, it will fail the directory test"""
    if os.path.isdir(self.vz_root_path):
      os.rmdir(self.vz_root_path)
      # what if directory isn't empty?

  def __log_status(self):
    """log the status of the container if in VERBOSE mode"""
    if VERBOSE:
      log(self.get_status())

  def get_status(self):
    """Obtain the status of this container"""
    args = ['vzctl', 'status', str(self.veid)]
    p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    (stdout, stderr) = p.communicate()
    if p.returncode != 0:
      raise MaxwellError("Can't get status of container '%d': %s" % (self.veid, stderr))
    return str(stdout)

  def __halt(self):
    """ Hard halt for all processes in the container.  Does not wait for anything."""
    args = ['vzctl', 'exec', str(self.veid), 'halt', '-nf']
    process = subprocess.Popen(
      args,
      stdout = subprocess.PIPE,
      stderr = subprocess.PIPE
    )
    (stdout, stderr) = process.communicate()
    if process.returncode == 14:
      # Container configuration file vps.conf(5) not found
      # try to fix it otherwise VE can't be shut down!
      self.create_confs()
      subprocess.check_call(args)
    if process.returncode != 0:
      log("Unable to halt VE: %s%s" %(stdout, stderr))

  def wait_unlock(self):
    """Allow the caller to know when OpenVZ is done starting or stopping a container"""
    attempt = 0
    while os.path.exists("/vz/lock/%d" % self.veid):
      time.sleep(10)
      attempt += 1
      if attempt > 50:
        raise MaxwellError("Unable to get lock on VE: %d" %(self.veid))

  def stop(self):
    """# Stops  and  unmounts  a  container. """
    args = ['vzctl', 'stop', str(self.veid), '--fast']
    process = subprocess.Popen(
      args,
      stdout = subprocess.PIPE,
      stderr = subprocess.PIPE
    )
    (stdout, stderr) = process.communicate()
    if process.returncode == 14:
      # Container configuration file vps.conf(5) not found
      # try to fix it otherwise VE can't be shut down!
      self.create_confs()
      p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
      log_subprocess(p)
      return
    if process.returncode != 0:
      raise MaxwellError("'vzctl stop' output: %s%s exit code %d"
        % (stdout, stderr, process.returncode))
    log(stdout)
    # delete the directory.  We needed to wait until now because "vzctl stop" expects it.
    # if this directory is present later, it shows the code didn't get to this point
    # and cleanup may be needed.
    os.rmdir(self.vz_root_path)

  def __vzproccount(self):
    """Count the number of processes in container"""
    try:
      f = open("/proc/vz/veinfo")
    except EnvironmentError:
      log("vzproccount: can't open veinfo.")
      # possibly stopped
      return 0
    while 1:
      line = f.readline()
      if line == "":
        if False:
          log("End of file /proc/vz/veinfo.")
        return 0
      arr = line.split()
      # expecting something like "         29     0     2      10.26.0.29"
      if len(arr) != 4:
        continue
      if arr[0] == str(self.veid):
        #log("vzproccount is %s" % arr[2])
        try:
          return int(arr[2])
        except ValueError:
          return 0

  def stop_submit_local(self):
    """check for submit --local.  If it's there, give it SIGINT and wait"""
    # kill stunnel and log any errors as it should still be running
    in_port = self.veid + self.k["STUNNEL_PORTS"] # e.g., 4000 + display
    p = subprocess.Popen(['fuser', '-n', 'tcp', '%d' % in_port, '-k', '-9'], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    log_subprocess(p)
    check_submit_args = ['vzctl', 'exec', str(self.veid), '/usr/bin/pgrep', '-f', '\"submit --local\"']
    rc = subprocess.call(check_submit_args)
    if rc == 0:
      log("Telling submit --local to exit")
      args = ['vzctl', 'exec', str(self.veid), 'pkill', '-15', '-f', '\"submit --local\"']
      subprocess.call(args)
      attempt = 0
      rc = subprocess.call(check_submit_args)
      while rc == 0 and (attempt < 10):
        attempt += 1
        time.sleep(2) # give time for submit to exit
        rc = subprocess.call(check_submit_args)
      if attempt > 9:
        log("submit --local didn't exit!")
    else:
      #log("exit code %d" % rc)
      #args= ['vzctl', 'exec', str(self.veid), 'ps aux']
      #p = subprocess.Popen(args, stderr=subprocess.PIPE,stdout=subprocess.PIPE)
      #log_subprocess(p)
      pass

  def killall(self):
    """ While there are any processes (other than init) running in the VPS, kill all.
    Start with milder signals.  Wait as long as the number of processes goes down"""
    for sig in [1, 2, 15, 9]:
      pcount = self.__vzproccount()
      if pcount <= 1:
        break
      log("Killing %d processes in veid %d with signal %d" % (self.__vzproccount(), self.veid, sig))
      # -1 indicates all processes except the kill process itself and init.
      args = ['vzctl', 'exec', str(self.veid), 'kill -%d -1' % sig]
      p = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
      log_subprocess(p)
      # Wait as long as the number of processes keeps going down
      ccount = 0
      tries = 0
      while ccount < pcount and tries < 60:
        time.sleep(1) # give time for processes to exit, otherwise we try again too early
        pcount = ccount
        ccount = self.__vzproccount()
        tries += 1
        # log("attempt %d, signal %d, count is %d" % (tries, sig, ccount))
      if tries == 60:
        log("timeout waiting for processes to exit from signal %d" % (sig))
    attempt = 0
    while self.__vzproccount() > 1 and (attempt < 5):
      attempt += 1
      time.sleep(1) # don't want failed message unnecessarily
    if attempt > 4:
      log("Warning killall: processes still running")

