#!/bin/sh
# @package      hubzero-hubgraph
# @file         hubgraph-monitor
# @author       Nicholas J. Kisseberth <nkissebe@purdue.edu>
# @copyright    Copyright (c) 2014 HUBzero Foundation, LLC.
# @license      http://www.gnu.org/licenses/lgpl-3.0.html LGPLv3
#
# Copyright (c) 2014 HUBzero Foundation, LLC.
#
# This file is part of: The HUBzero(R) Platform for Scientific Collaboration
#
# The HUBzero(R) Platform for Scientific Collaboration (HUBzero) is free
# software: you can redistribute it and/or modify it under the terms of
# the GNU Lesser General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# HUBzero is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# HUBzero is a registered trademark of HUBzero Foundation, LLC.
#

MONITOR_NAME=hubgraph-monitor
MONITOR_PIDFILE=/var/run/hubzero-hubgraph/$MONITOR_NAME.pid
MONITOR_CMD=$MONITOR_NAME
MONITOR_LOG=/var/log/hubzero-hubgraph/hubgraph-monitor.log

SERVER_NAME=hubgraph-server
SERVER_PIDFILE=/var/run/hubzero-hubgraph/$SERVER_NAME.pid
SERVER_CMD=hubgraph-server
SERVER_LOG=/var/log/hubzero-hubgraph/hubgraph-server.log

SERVER_INIT="/usr/bin/hzhubgraph-cfg init"

force_monitor=0
hostname=`hostname --fqdn`
datefmt="%FT%T.%6N%:z"
debug=0
min_valid_run_time=30
min_retry_delay=1
max_retry_delay=3600

if [ "$HUBGRAPH_DEBUG" = "1" ]
then
    debug=1
fi

##############################################################################
# Nothing below this line should need to be modified when re-using this script
##############################################################################

mypid=$$
server_starts=0
start_time=0
end_time=0
run_time=0
retry_delay=$min_retry_delay

interrupt_monitor ()
{
	log debug "interrupt_monitor $*"
	log debug "interrupt_monitor removing $MONITOR_PIDFILE"
	rm -f $MONITOR_PIDFILE
	exit
}

terminate_monitor ()
{
	log debug "terminate_monitor $*"
	log debug "terminate_monitor $* removing $MONITOR_PIDFILE"
	rm -f $MONITOR_PIDFILE
	exit
}

trap {} HUP 
trap interrupt_monitor INT 
trap terminate_monitor TERM 

log ()
{
	PRIORITY=$1
	MSG=$2
	shift
	shift

	if [ "$PRIORITY" = "debug" -a "$debug" = "1" ] || [ "$PRIORITY" != "debug" ]
	then
		printf "%s %s $MONITOR_NAME[%d] [%s] " `date +$datefmt` $hostname $mypid $PRIORITY
		printf "$MSG\n" $*
	fi
}

kill_process ()
{
	log debug "kill_process $*"

	PID=$1
	PIDFILE=$2

	tries=0

	if [ $PID -lt 2 ]
	then
		log debug "kill_process $* invalid process-id"
		return 1
	fi

	while [ -e /proc/$PID/environ ]
	do
		if [ $tries -gt 6 ]
		then
			break
		elif [ $tries -lt 4 ]
		then
			log debug "kill_process $* attempting to terminate $PID" 
			kill -TERM $PID
			log debug "kill_process $* attempting to terminate $PID child processes" 
			pkill -P $PID
		else
			log debug "kill_process $* attempting to kill  $PID child processes" 
			pkill -KILL -P $PID
			log debug "kill_process $* attempting to kill $PID" 
			kill -KILL $PID
		fi

		tries=$((tries+1))

		sleep 1

	done

	if [ ! -e /proc/$PID/environ ]
	then
		log debug "kill_process $* process $PID ended" 

		if [ -e $PIDFILE ]
		then
			log debug "kill_process $* removing $PIDFILE"

			rm -f $PIDFILE
		fi

		return 0
	fi

	log info "kill_process $* failed to stop process $PID"

	return 3
}

find_process ()
{
	log debug "find_process $*"

	case $1 in

	    ''|*[!0-9]*) 
		PIDFILE=$1
		PID=`cat $PIDFILE 2>/dev/null` 
		;;

	    *) 
		PID=$1 
		;;

	esac

	if [ "$PID" = "" -o "$PID" = "0" ] 
	then
		log debug "find_process $* not found"

		if [ -e $PIDFILE ]
		then
			log debug "find_process $* removing $PIDFILE"

			rm -f $PIDFILE
		fi
		PID=0
		return 1
	fi


	if [ $# -gt 1 ]
	then
		egrep -q "$2" /proc/$PID/cmdline 2>/dev/null
	fi

	if [ $? = 0 ]
	then
		log debug "find_process $* found $PID"

		return 0
	fi

	log debug "find_process $* failed to find $PID" 

	PID=0
	return 1
}

start()
{
	log debug "$MONITOR_NAME start server"

	find_process $MONITOR_PIDFILE $MONITOR_CMD

	if [ $? -eq 0 ]
	then
		return 0
	fi

	log debug "$MONITOR_NAME start creating $MONITOR_PIDFILE"

	echo $mypid > $MONITOR_PIDFILE

	touch -r /proc/$mypid $MONITOR_PIDFILE 2>/dev/null

	find_process $SERVER_PIDFILE $SERVER_CMD

	SERVER_PID=$PID
	
	if [ "$SERVER_PID" -gt 1 ]
	then
		server_starts=1
		start_time=`stat -c "%Z" /proc/$SERVER_PID 2>/dev/null`
	else
		server_starts=0
		start_time=0
	fi

	if [ "x${start_time}x" = "xx" ]
	then
	    start_time=0
	fi

	while [ 1 ]
	do
		if [ ! -f $MONITOR_PIDFILE ]
		then
			echo $mypid > $MONITOR_PIDFILE

			touch -r /proc/$mypid $MONITOR_PIDFILE 2>/dev/null
		fi

		if [ "$SERVER_PID" -gt 1 ]
		then

			log debug "$MONITOR_NAME start  waiting on process $SERVER_PID"

			wait $SERVER_PID 2>/dev/null

			while [ -e /proc/$SERVER_PID/environ ]
			do
				log debug "$MONITOR_NAME start sleeping for 60 seconds before checking /proc/$SERVER_PID again" 
				sleep 60
			done

			log debug "$MONITOR_NAME start process $SERVER_PID ended"
		fi

		end_time=`date +"%s"`

		run_time=$((end_time - start_time))

		log debug "$MONITOR_NAME start removing pid file $SERVER_PIDFILE"

		rm -f $SERVER_PIDFILE

		if [ $server_starts -gt 0 ]
		then
			log info "$MONITOR_NAME start runtime was $run_time seconds, min runtime = $min_valid_run_time"

			if [ $run_time -lt $min_valid_run_time ]
			then
     			log info "$MONITOR_NAME start waiting $retry_delay seconds before retry"

				sleep $retry_delay 

				retry_delay=$((retry_delay * 2))

				if [ $retry_delay -gt $max_retry_delay ]
				then
					retry_delay=$max_retry_delay
				fi
			else
				retry_delay=$min_retry_delay
			fi
		fi

		log debug "$MONITOR_NAME starting initialization process"

		start_time=`date +"%s"`

		$SERVER_INIT

		RETVAL=$?
		SERVER_PID=0

		log info "$MONITOR_NAME starting $SERVER_CMD"

		if [ $RETVAL = 0 ]
		then
			$SERVER_CMD >> $SERVER_LOG 2>&1 &

			SERVER_PID=$!

			log debug "$MONITOR_NAME started process $SERVER_PID"

			echo $SERVER_PID > $SERVER_PIDFILE

			log debug "$MONITOR_NAME created $SERVER_PIDFILE"
   
			touch -r /proc/$SERVER_PID $SERVER_PIDFILE 2>/dev/null
		elif [ $RETVAL > 100 ]
		then
         		log info "$MONITOR_NAME start mysql connection failed ($RETVAL)"
        else
         		log info "$MONITOR_NAME start $SERVER_NAME initialization failed ($RETVAL)"
        fi

		server_starts=$((server_starts+1))

	done
}

stop()
{
	log info "$MONITOR_NAME stopping hubgraph-server"

	find_process $MONITOR_PIDFILE $MONITOR_CMD

	if [ $? -eq 0 ]
	then
		kill_process $PID $MONITOR_PIDFILE
	fi

	find_process $SERVER_PIDFILE $SERVER_CMD

	if [ $? -eq 0 ]
	then
		kill_process $PID $SERVER_PIDFILE
	fi

	find_process $SERVER_PIDFILE $SERVER_CMD

	return $?
}


case "$1" in

  start|"")
	log info "$MONITOR_NAME start"

	start

	RETVAL=$?

	if [ $RETVAL -eq 0 ]
	then
		log info "$MONITOR_NAME processes already running"
	else
		log info "$MONITOR_NAME processes started but terminated unexpectedly"
	fi

	exit $RETVAL
  ;;

  stop)
	log info "$MONITOR_NAME stop"

	stop

	RETVAL=$?

	if [ $RETVAL -eq 0 ]
	then
		log info "$MONITOR_NAME processes failed to stop"
		exit 1
	fi


	exit 0
  ;;

  status)
	log info "$MONITOR_NAME status"

	find_process $MONITOR_PIDFILE $MONITOR_CMD

	RETVAL=$?

	exit $RETVAL
  ;;

  *)
	log info "$MONITOR_NAME invalid command [$*]"

	exit 1
	
esac
