/*
 * Copyright (C) 1994-2021 Altair Engineering, Inc.
 * For more information, contact Altair at www.altair.com.
 *
 * This file is part of both the OpenPBS software ("OpenPBS")
 * and the PBS Professional ("PBS Pro") software.
 *
 * Open Source License Information:
 *
 * OpenPBS is free software. You can redistribute it and/or modify it under
 * the terms of the GNU Affero General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or (at your
 * option) any later version.
 *
 * OpenPBS is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Commercial License Information:
 *
 * PBS Pro is commercially licensed software that shares a common core with
 * the OpenPBS software.  For a copy of the commercial license terms and
 * conditions, go to: (http://www.pbspro.com/agreement.html) or contact the
 * Altair Legal Department.
 *
 * Altair's dual-license business model allows companies, individuals, and
 * organizations to create proprietary derivative works of OpenPBS and
 * distribute them - whether embedded or bundled with other software -
 * under a commercial license agreement.
 *
 * Use of Altair's trademarks, including but not limited to "PBS™",
 * "OpenPBS®", "PBS Professional®", and "PBS Pro™" and Altair's logos is
 * subject to Altair's trademark licensing policies.
 */

/**
 * @brief
 * 		The entry point function for pbs_daemon.
 */

#include <pbs_config.h> /* the master config generated by configure */

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <netdb.h>
#include <unistd.h>
#include <signal.h>
#ifdef _POSIX_MEMLOCK
#include <sys/mman.h>
#endif /* _POSIX_MEMLOCK */

#include "pbs_ifl.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "ticket.h"
#ifdef linux
#include <sys/prctl.h>
#endif

#include "list_link.h"
#include "work_task.h"
#include "log.h"
#include "server_limits.h"
#include "attribute.h"
#include "resource.h"
#include "job.h"
#include "reservation.h"
#include "queue.h"
#include "server.h"
#include "net_connect.h"
#include "libpbs.h"
#include "credential.h"
#include "batch_request.h"
#include "pbs_idx.h"
#include "pbs_nodes.h"
#include "svrfunc.h"
#include <libutil.h>
#include "tracking.h"
#include "acct.h"
#include "sched_cmds.h"
#include "tpp.h"
#include "dis.h"
#include "libsec.h"
#include "pbs_version.h"
#include "pbs_license.h"
#include "hook.h"
#include "pbs_ecl.h"
#include "provision.h"
#include "pbs_db.h"
#include "pbs_sched.h"
#include "pbs_share.h"
#include <pbs_python.h> /* for python interpreter */
#include "auth.h"

#include "pbs_v1_module_common.i"

/* External functions called */

extern int pbsd_init(int);
extern void shutdown_ack();
extern int takeover_from_secondary(void);
extern int be_secondary(time_t sec);
extern void set_srv_prov_attributes();
extern int connect_to_db(int);
extern void stop_db();
#ifdef NAS /* localmod 005 */
extern int chk_and_update_db_svrhost();
#endif /* localmod 005 */

/* External data items */
extern pbs_list_head svr_requests;
extern char *msg_err_malloc;
extern int pbs_failover_active;

/* Local Private Functions */

static int get_port(char *, unsigned int *, pbs_net_t *);
static time_t next_task();
static int start_hot_jobs();
static void lock_out(int, int);
#define HOT_START_PING_RATE 15

/* Global Data Items */

int stalone = 0; /* is program running not as a service ? */
char *acct_file = NULL;
char daemonname[PBS_MAXHOSTNAME + 8];
int used_unix_licenses = 0;
int used_linix_licenses = 0;
char *log_file = NULL;
char *path_acct;
char *path_usedlicenses;
char path_log[MAXPATHLEN + 1];
char *path_priv;
char *path_jobs;
char *path_hooks_tracking;
char *path_users;
char *path_hooks_rescdef;
char *path_spool;
char *path_track;
char *path_svrlive;
extern char *path_prov_track;
char *path_secondaryact;
char *pbs_o_host = "PBS_O_HOST";
pbs_net_t pbs_mom_addr;
unsigned int pbs_mom_port;
unsigned int pbs_rm_port;
pbs_net_t pbs_server_addr;
unsigned int pbs_server_port_dis;
int reap_child_flag = 0;
time_t secondary_delay = 30;
pbs_sched *dflt_scheduler = NULL; /* the default scheduler */
int shutdown_who;		  /* see req_shutdown() */
char *mom_host = server_host;
long new_log_event_mask = 0;
int server_init_type = RECOV_WARM;
pbs_list_head svr_deferred_req;
pbs_list_head svr_newjobs; /* list of incomming new jobs       */
pbs_list_head svr_allscheds;
extern pbs_list_head svr_creds_cache; /* all credentials available to send */
struct batch_request *saved_takeover_req;
int svr_unsent_qrun_req = 0; /* Set to 1 for scheduling unsent qrun requests */

void *jobs_idx;
void *queues_idx;
void *resvs_idx;

sigset_t allsigs;

/* private data */
static char *suffix_slash = "/";
static int brought_up_alt_sched = 0;
void stop_db();
extern void mark_nodes_unknown(int);

/*
 * Used only by the TPP layer, to ping nodes only if the connection to the
 * local router to the server is up.
 * Initially set the connection to up, so that first time ping happens
 * by default.
 */
int tpp_network_up = 0;

/**
 * @brief
 * 		The handler that is called by TPP layer when the connection to the local
 * 		router is restored
 *
 * @param[in]	data	- Any associated data passed from TPP layer
 *
 * @return	void
 */
void
net_restore_handler(void *data)
{
	log_event(PBSEVENT_ERROR | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_ALERT, __func__, "net restore handler called");
	tpp_network_up = 1;
}

/**
 * @brief
 * 		The handler that is called by TPP layer when the connection to the local
 * 		router goes down
 *
 * @param[in]	data	- Any associated data passed from TPP layer
 *
 * @return	void
 */
void
net_down_handler(void *data)
{
	if (tpp_network_up == 1) {
		tpp_network_up = 0;
		/* now loop and set all nodes to down */
		log_event(PBSEVENT_ERROR | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_ALERT, __func__, "marking all nodes unknown");
		mark_nodes_unknown(1);
	}
}

static int lockfds = -1;
static int already_forked = 0; /* we check this variable even in non-debug mode, so dont condition compile it */
static int background = 0;

#ifndef DEBUG
/**
 * @brief
 *		Forks a background process and continues on that, while
 * 		exiting the foreground process. It also sets the child process to
 * 		become the session leader. This function is avaible only on Non-Windows
 * 		platforms and in non-debug mode.
 *
 * @return	pid_t	- sid of the child process (result of setsid)
 * @retval       >0	- sid of the child process.
 * @retval       -1	- Fork or setsid failed.
 */
pid_t
go_to_background()
{
	pid_t sid = -1;
	int rc;

	lock_out(lockfds, F_UNLCK);
	rc = fork();
	if (rc == -1) { /* fork failed */
		log_err(errno, msg_daemonname, "fork failed");
		return ((pid_t) -1);
	}
	if (rc > 0)
		exit(0); /* parent goes away, allowing booting to continue */

	lock_out(lockfds, F_WRLCK);
	if ((sid = setsid()) == -1) {
		log_err(errno, msg_daemonname, "setsid failed");
		return ((pid_t) -1);
	}
	pbs_close_stdfiles();
	already_forked = 1;
	return sid;
}
#endif /* DEBUG is defined */

/**
 * @brief
 * 		Read a message from a TPP stream. Only one kind of message
 * 		is expected -- Inter Server requests from MOM's.
 *
 * @param[in]	stream	- TPP stream from which message is read.
 *
 * @return	void
 */
void
do_tpp(int stream)
{
	int ret, proto, version;
	void is_request(int, int);
	void stream_eof(int, int, char *);

	DIS_tpp_funcs();
	proto = disrsi(stream, &ret);
	if (ret != DIS_SUCCESS) {
		DBPRT(("tpp read failure: ret: %d, proto: %d\n", ret, proto));
		stream_eof(stream, ret, NULL);
		return;
	}
	version = disrsi(stream, &ret);
	if (ret != DIS_SUCCESS) {
		DBPRT(("%s: no protocol version number %s\n",
		       __func__, dis_emsg[ret]))
		stream_eof(stream, ret, NULL);
		return;
	}

	switch (proto) {
		case IS_PROTOCOL:
			DBPRT(("%s: got an inter-server request\n", __func__))
			is_request(stream, version);
			break;
		default:
			DBPRT(("%s: unknown request %d\n", __func__, proto))
			stream_eof(stream, ret, NULL);
			break;
	}
	return;
}

/**
 * @brief
 * 		Read the TPP stream using tpp_poll and invoke do_tpp using that stream.
 *
 * @param[in]	fd	- not used.
 *
 * @return	void
 */
void
tpp_request(int fd)
{
	int iloop;
	int rpp_max_pkt_check = RPP_MAX_PKT_CHECK_DEFAULT;

	/*
	 * Interleave TPP processing with batch request processing.
	 * Certain things like hook/short-job propagation can generate a
	 * huge amount of TPP traffic that can make batch processing
	 * appear sluggish if not interleaved.
	 *
	 */
	if (is_sattr_set(SVR_ATR_rpp_max_pkt_check))
		rpp_max_pkt_check = get_sattr_long(SVR_ATR_rpp_max_pkt_check);

	for (iloop = 0; iloop < rpp_max_pkt_check; iloop++) {
		int stream;

		if ((stream = tpp_poll()) == -1) {
			log_err(errno, __func__, "tpp_poll");
			break;
		}
		if (stream == -2)
			break;
		do_tpp(stream);
	}
	return;
}

/**
 * @brief
 * 		build_path - build the pathname for a PBS directory
 *
 * @param[in]	parent	- parent directory name (dirname)
 * @param[in]	name	- sub directory name
 * @param[in]	sufix	- suffix string to append
 *
 * @return	PBS directory
 */

char *
build_path(char *parent, char *name, char *sufix)
{
	int prefixslash;
	char *ppath;
	size_t len;

	/*
	 * allocate space for the names + maybe a slash between + the suffix
	 */

	if (*(parent + strlen(parent) - 1) == '/')
		prefixslash = 0;
	else
		prefixslash = 1;

	len = strlen(parent) + strlen(name) + prefixslash + 1;
	if (sufix)
		len += strlen(sufix);
	ppath = malloc(len);
	if (ppath) {
		(void) strcpy(ppath, parent);
		if (prefixslash)
			(void) strcat(ppath, "/");
		(void) strcat(ppath, name);
		if (sufix)
			(void) strcat(ppath, sufix);
		return (ppath);
	} else {
		log_err(errno, "build_path", msg_err_malloc);
		log_close(1);
		exit(3);
	}
	/*NOTREACHED*/
}

#ifndef DEBUG
/**
 * @brief
 * 		pbs_close_stdfiles - redirect stdin, stdout and stderr to /dev/null
 *		Not done if compiled with debug
 *
 * @par MT-safe: No
 */
void
pbs_close_stdfiles(void)
{
	static int already_done = 0;

	if (!already_done) {
		FILE *dummyfile;

		(void) fclose(stdin);
		(void) fclose(stdout);
		(void) fclose(stderr);

		dummyfile = fopen(NULL_DEVICE, "r");
		assert((dummyfile != 0) && (fileno(dummyfile) == 0));

		dummyfile = fopen(NULL_DEVICE, "w");
		assert((dummyfile != 0) && (fileno(dummyfile) == 1));
		dummyfile = fopen(NULL_DEVICE, "w");
		assert((dummyfile != 0) && (fileno(dummyfile) == 2));
		already_done = 1;
	}
}
#endif /* DEBUG */

/**
 * @brief
 * 		clear_exec_vnode - clear the exec_vnode attribute
 *		This is done when the server is coming out of HOT start (first
 *		regular RUN cycle).  Jobs which were running when the Server was
 *		shut down may have there exec_vnode left to assist in HOT start.
 *		If left set, the job is trapped into requiring those nodes.
 *		Clear on any job not running and without a restart file.
 */
static void
clear_exec_vnode()
{
	job *pjob;

	for (pjob = (job *) GET_NEXT(svr_alljobs); pjob;
	     pjob = (job *) GET_NEXT(pjob->ji_alljobs)) {
		if ((!check_job_state(pjob, JOB_STATE_LTR_RUNNING)) &&
		    (!check_job_state(pjob, JOB_STATE_LTR_FINISHED)) &&
		    (!check_job_state(pjob, JOB_STATE_LTR_MOVED)) &&
		    (!check_job_state(pjob, JOB_STATE_LTR_EXITING))) {
			if (is_jattr_set(pjob, JOB_ATR_exec_vnode) && (pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHKPT) == 0) {
				free_jattr(pjob, JOB_ATR_exec_vnode);
				free_jattr(pjob, JOB_ATR_exec_host);
				free_jattr(pjob, JOB_ATR_exec_host2);
			}
		}
	}
}

/**
 * @brief
 * 		reap_child() - reap dead child processes
 *
 * 		Collect child status and add to work list entry for that child.
 * 		The list entry is marked as immediate to show the child is gone and
 * 		svr_delay_entry is incremented to indicate to next_task() to check for it.
 */

static void
reap_child(void)
{
	struct work_task *ptask;
	pid_t pid;
	int statloc;

	while (1) {
		if ((pid = waitpid((pid_t) -1, &statloc, WNOHANG)) == (pid_t) -1) {
			if (errno == ECHILD) {
				reap_child_flag = 0;
				return;
			} else if (errno == EINTR) {
				continue;
			} else {
				return;
			}
		} else if (pid == 0) {
			reap_child_flag = 0;
			return;
		}
		ptask = (struct work_task *) GET_NEXT(task_list_event);
		while (ptask) {
			if ((ptask->wt_type == WORK_Deferred_Child) &&
			    (ptask->wt_event == pid)) {
				ptask->wt_type = WORK_Deferred_Cmp;
				ptask->wt_aux = (int) statloc; /* exit status */
				svr_delay_entry++;	       /* see next_task() */
			}
			ptask = (struct work_task *) GET_NEXT(ptask->wt_linkevent);
		}
	}
}

/**
 * @brief
 *	this function handles auth related data before process_request()
 *
 * @param[in] conn - connection data
 *
 * @return	int
 * @retval	>0	data ready
 * @retval	0	no data ready
 * @retval	-1	error
 * @retval	-2	on EOF
 */
int
tcp_pre_process(conn_t *conn)
{
	char errbuf[LOG_BUF_SIZE];
	int rc;

	if (conn->cn_auth_config == NULL)
		return 1;

	DIS_tcp_funcs();
	if (conn->cn_auth_config->encrypt_method[0] != '\0') {
		rc = transport_chan_get_ctx_status(conn->cn_sock, FOR_ENCRYPT);
		if (rc == (int) AUTH_STATUS_UNKNOWN)
			return 1;

		if (rc < (int) AUTH_STATUS_CTX_READY) {
			errbuf[0] = '\0';
			rc = engage_server_auth(conn->cn_sock, conn->cn_hostname, FOR_ENCRYPT, errbuf, sizeof(errbuf));
			if (errbuf[0] != '\0') {
				if (rc != 0)
					log_event(PBSEVENT_ERROR | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_ERR, __func__, errbuf);
				else
					log_event(PBSEVENT_DEBUG | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_DEBUG, __func__, errbuf);
			}
			return rc;
		}
	}

	rc = transport_chan_get_ctx_status(conn->cn_sock, FOR_AUTH);
	if (rc == (int) AUTH_STATUS_UNKNOWN)
		return 1;

	if (rc < (int) AUTH_STATUS_CTX_READY) {
		errbuf[0] = '\0';
		rc = engage_server_auth(conn->cn_sock, conn->cn_hostname, FOR_AUTH, errbuf, sizeof(errbuf));
		if (errbuf[0] != '\0') {
			if (rc != 0)
				log_event(PBSEVENT_ERROR | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_ERR, __func__, errbuf);
			else
				log_event(PBSEVENT_DEBUG | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_DEBUG, __func__, errbuf);
		}
		return rc;
	}

	return 1;
}

/**
 * @brief
 * 		main - the initialization and main loop of pbs_daemon
 *
 * @param[in]	argc	- argument count.
 * @param[in]	argv	- argument values.
 *
 * @return	error code
 * @retval	0	- success
 * @retval	!=0	- failed
 *
 * @par MT-safe: No
 */
int
main(int argc, char **argv)
{
	char *nodename = NULL;
	int are_primary;
	int c, rc;
	int i;
	int tppfd; /* fd to receive is HELLO's */
	struct tpp_config tpp_conf;
	char lockfile[MAXPATHLEN + 1];
	char **origevp;
	char *pc;
	pbs_queue *pque;
	char *servicename;
	time_t svrlivetime;
	int sock;
	struct stat sb_sa;
	struct batch_request *periodic_req;
	char hook_msg[HOOK_MSG_SIZE];
	pbs_sched *psched;
	char *keep_daemon_name = NULL;
	pid_t sid = -1;
	long state;
	time_t waittime;
#ifdef _POSIX_MEMLOCK
	int do_mlockall = 0;
#endif /* _POSIX_MEMLOCK */
	extern char **environ;

	static struct {
		char *it_name;
		int it_type;
	} init_name_type[] = {
		{"hot", RECOV_HOT},
		{"warm", RECOV_WARM},
		{"cold", RECOV_COLD},
		{"create", RECOV_CREATE},
		{"updatedb", RECOV_UPDATEDB},
		{"", RECOV_Invalid}};
	static int first_run = 1;

	extern int optind;
	extern char *optarg;
	extern char *msg_svrdown;  /* log message */
	extern char *msg_startup1; /* log message */
	extern char *msg_startup2; /* log message */
	/* python externs */
	extern void pbs_python_svr_initialize_interpreter_data(struct python_interpreter_data * interp_data);
	extern void pbs_python_svr_destroy_interpreter_data(struct python_interpreter_data * interp_data);

	/* set python interp data */
	svr_interp_data.data_initialized = 0;
	svr_interp_data.init_interpreter_data = pbs_python_svr_initialize_interpreter_data;
	svr_interp_data.destroy_interpreter_data = pbs_python_svr_destroy_interpreter_data;
	/*the real deal or just pbs_version and exit*/

	PRINT_VERSION_AND_EXIT(argc, argv);

	/* As a security measure and to make sure all file descriptors	*/
	/* are available to us,  close all above stderr			*/
	i = sysconf(_SC_OPEN_MAX);
	while (--i > 2)
		(void) close(i); /* close any file desc left open by parent */

	/* If we are not run with real and effective uid of 0, forget it */
	if ((getuid() != 0) || (geteuid() != 0)) {
		fprintf(stderr, "%s: Must be run by root\n", argv[0]);
		return (1);
	}

	/* set standard umask */
	umask(022);

	/* set single threaded mode */
	pbs_client_thread_set_single_threaded_mode();
	/* disable attribute verification */
	set_no_attribute_verification();

	/* initialize the thread context */
	if (pbs_client_thread_init_thread_context() != 0) {
		log_err(-1, __func__,
			"Unable to initialize thread context");
		return (1);
	}

	if (pbs_loadconf(0) == 0)
		return (1);

	set_log_conf(pbs_conf.pbs_leaf_name, pbs_conf.pbs_mom_node_name,
		     pbs_conf.locallog, pbs_conf.syslogfac,
		     pbs_conf.syslogsvr, pbs_conf.pbs_log_highres_timestamp);

	/* find out who we are (hostname) */
	server_host[0] = '\0';
	if (pbs_conf.pbs_leaf_name) {
		char *endp;
		snprintf(server_host, sizeof(server_host), "%s", pbs_conf.pbs_leaf_name);
		endp = strchr(server_host, ','); /* find first name */
		if (endp)
			*endp = '\0';
		endp = strchr(server_host, ':'); /* cut out port, if present */
		if (endp)
			*endp = '\0';
	} else if (gethostname(server_host, (sizeof(server_host) - 1)) == -1) {
		log_err(-1, __func__, "Host name too large");
		return (-1);
	}
	if ((server_host[0] == '\0') ||
	    (get_fullhostname(server_host, server_host, (sizeof(server_host) - 1)) == -1)) {
		log_err(-1, __func__, "Unable to get my host name");
		return (-1);
	}

	(void) strcpy(daemonname, "Server@");
	(void) strcat(daemonname, server_host);
	if ((pc = strchr(daemonname, (int) '.')) != NULL)
		*pc = '\0';

	if (set_msgdaemonname(daemonname)) {
		fprintf(stderr, "Out of memory\n");
		return 1;
	}

	/* initialize service port numbers for self, Scheduler, and MOM */

	pbs_server_port_dis = pbs_conf.batch_service_port;
	pbs_mom_port = pbs_conf.mom_service_port;
	pbs_rm_port = pbs_conf.manager_service_port;

	/* by default, server_name is what is set in /etc/pbs.conf */
	(void) strcpy(server_name, pbs_conf.pbs_server_name);

	pbs_server_name = pbs_default();
	if ((!pbs_server_name) || (*pbs_server_name == '\0')) {
		log_err(-1, __func__, "Unable to get server host name");
		return (-1);
	}

	pbs_server_addr = get_hostaddr(server_host);
	pbs_mom_addr = pbs_server_addr; /* assume on same host */

	/* parse the parameters from the command line */

	while ((c = getopt(argc, argv, "A:a:Cd:e:F:p:t:lL:M:NR:g:G:s:P:-:")) != -1) {
		switch (c) {
			case 'a':
				if (decode_b(get_sattr(SVR_ATR_scheduling), NULL,
					     NULL, optarg) != 0) {
					(void) fprintf(stderr, "%s: bad -a option\n", argv[0]);
					return (1);
				}
				break;
			case 'd':
				if (pbs_conf.pbs_home_path != NULL)
					free(pbs_conf.pbs_home_path);
				pbs_conf.pbs_home_path = optarg;
				break;
			case 'e':
				new_log_event_mask = strtol(optarg, NULL, 0);
				break;
			case 'p':
				servicename = optarg;
				if (strlen(server_name) + strlen(servicename) + 1 >
				    (size_t) PBS_MAXSERVERNAME) {
					(void) fprintf(stderr,
						       "%s: -p host:port too long\n", argv[0]);
					return (1);
				}
				(void) strcat(server_name, ":");
				(void) strcat(server_name, servicename);
				if ((pbs_server_port_dis = atoi(servicename)) == 0) {
					(void) fprintf(stderr,
						       "%s: -p host:port invalid\n", argv[0]);
					return (1);
				}
				break;
			case 't':
				for (i = RECOV_HOT; i < RECOV_Invalid; i++) {
					if (strcmp(optarg, init_name_type[i].it_name) == 0) {
						server_init_type = init_name_type[i].it_type;
						break;
					}
				}
				if (i == RECOV_Invalid) {
					(void) fprintf(stderr, "%s -t bad recovery type\n",
						       argv[0]);
					return (1);
				}
				break;
			case 'A':
				acct_file = optarg;
				break;
			case 'C':
				stalone = 2;
				break;
			case 'F':
				i = atoi(optarg);
				if (i < -1) {
					(void) fprintf(stderr, "%s -F invalid delay time\n",
						       argv[0]);
					return (1);
				}
				secondary_delay = (time_t) i;
				break;
			case 'l':
#ifdef _POSIX_MEMLOCK
				do_mlockall = 1;
#else
				fprintf(stderr, "-l option - mlockall not supported\n");
#endif /* _POSIX_MEMLOCK */
				break;
			case 'L':
				log_file = optarg;
				break;
			case 'M':
				if (get_port(optarg, &pbs_mom_port, &pbs_mom_addr)) {
					(void) fprintf(stderr, "%s: bad -M %s\n", argv[0], optarg);
					return (1);
				}
				if (isalpha((int) *optarg)) {
					if ((pc = strchr(optarg, (int) ':')) != NULL)
						*pc = '\0';
					mom_host = optarg;
				}
				break;
			case 'N':
				stalone = 1;
				break;
			case 'R':
				if ((pbs_rm_port = atoi(optarg)) == 0) {
					(void) fprintf(stderr, "%s: bad -R %s\n",
						       argv[0], optarg);
					return 1;
				}
				break;

			case '-':
				(void) fprintf(stderr, "%s: bad - mistyped or specified more than --version\n", argv[0]);
				return (1);

			default:
				(void) fprintf(stderr, "%s: unknown option: %c\n", argv[0], c);
				return (1);
		}
	}

	if (optind < argc) {
		(void) fprintf(stderr, "%s: invalid operand\n", argv[0]);
		return (1);
	}

	/* make sure no other server is running with this home directory */

	(void) sprintf(lockfile, "%s/%s/server.lock", pbs_conf.pbs_home_path,
		       PBS_SVR_PRIVATE);
	if ((are_primary = are_we_primary()) == FAILOVER_SECONDARY) {
		strcat(lockfile, ".secondary");
	} else if (are_primary == FAILOVER_CONFIG_ERROR) {
		log_err(-1, msg_daemonname, "neither primary or secondary server");
		return (3);
	}

#ifdef NAS /* localmod 104 */
	if ((lockfds = open(lockfile, O_CREAT | O_WRONLY, 0644)) < 0)
#else
	if ((lockfds = open(lockfile, O_CREAT | O_WRONLY, 0600)) < 0)
#endif /* localmod 104 */
	{
		(void) sprintf(log_buffer, "%s: unable to open lock file",
			       msg_daemonname);
		(void) fprintf(stderr, "%s\n", log_buffer);
		log_err(errno, msg_daemonname, log_buffer);
		return (2);
	}

	CLEAR_HEAD(svr_requests);
	CLEAR_HEAD(task_list_immed);
	CLEAR_HEAD(task_list_interleave);
	CLEAR_HEAD(task_list_timed);
	CLEAR_HEAD(task_list_event);
	CLEAR_HEAD(svr_queues);
	CLEAR_HEAD(svr_alljobs);
	CLEAR_HEAD(svr_newjobs);
	CLEAR_HEAD(svr_allresvs);
	CLEAR_HEAD(svr_deferred_req);
	CLEAR_HEAD(svr_allhooks);
	CLEAR_HEAD(svr_queuejob_hooks);
	CLEAR_HEAD(svr_postqueuejob_hooks);
	CLEAR_HEAD(svr_modifyjob_hooks);
	CLEAR_HEAD(svr_resvsub_hooks);
	CLEAR_HEAD(svr_modifyresv_hooks);
	CLEAR_HEAD(svr_movejob_hooks);
	CLEAR_HEAD(svr_runjob_hooks);
	CLEAR_HEAD(svr_jobobit_hooks);
	CLEAR_HEAD(svr_management_hooks);
	CLEAR_HEAD(svr_modifyvnode_hooks);
	CLEAR_HEAD(svr_periodic_hooks);
	CLEAR_HEAD(svr_provision_hooks);
	CLEAR_HEAD(svr_resv_confirm_hooks);
	CLEAR_HEAD(svr_resv_begin_hooks);
	CLEAR_HEAD(svr_resv_end_hooks);
	CLEAR_HEAD(svr_execjob_begin_hooks);
	CLEAR_HEAD(svr_execjob_prologue_hooks);
	CLEAR_HEAD(svr_execjob_epilogue_hooks);
	CLEAR_HEAD(svr_execjob_preterm_hooks);
	CLEAR_HEAD(svr_execjob_launch_hooks);
	CLEAR_HEAD(svr_execjob_end_hooks);
	CLEAR_HEAD(svr_exechost_periodic_hooks);
	CLEAR_HEAD(svr_exechost_startup_hooks);
	CLEAR_HEAD(svr_execjob_attach_hooks);
	CLEAR_HEAD(svr_execjob_resize_hooks);
	CLEAR_HEAD(svr_execjob_abort_hooks);
	CLEAR_HEAD(svr_execjob_postsuspend_hooks);
	CLEAR_HEAD(svr_execjob_preresume_hooks);
	CLEAR_HEAD(svr_allscheds);
	CLEAR_HEAD(svr_creds_cache);
	CLEAR_HEAD(unlicensed_nodes_list);

	/* initialize paths that we will need */
	path_priv = build_path(pbs_conf.pbs_home_path, PBS_SVR_PRIVATE,
			       suffix_slash);
	path_spool = build_path(pbs_conf.pbs_home_path, PBS_SPOOLDIR,
				suffix_slash);
	path_jobs = build_path(path_priv, PBS_JOBDIR, suffix_slash);
	path_users = build_path(path_priv, PBS_USERDIR, suffix_slash);
	path_rescdef = build_path(path_priv, PBS_RESCDEF, NULL);
	path_acct = build_path(path_priv, PBS_ACCT, suffix_slash);
	path_track = build_path(path_priv, PBS_TRACKING, NULL);
	path_prov_track = build_path(path_priv, PBS_PROV_TRACKING, NULL);
	path_usedlicenses = build_path(path_priv, "usedlic", NULL);
	path_secondaryact = build_path(path_priv, "secondary_active", NULL);
	path_hooks = build_path(path_priv, PBS_HOOKDIR, suffix_slash);
	path_hooks_workdir = build_path(path_priv, PBS_HOOK_WORKDIR,
					suffix_slash);
	path_hooks_tracking = build_path(path_priv, PBS_HOOK_TRACKING,
					 HOOK_TRACKING_SUFFIX);
	path_hooks_rescdef = build_path(path_hooks, PBS_RESCDEF, NULL);
	path_svrlive = build_path(path_priv, PBS_SVRLIVE, NULL);

	/* save original environment in case we re-exec */
	origevp = environ;

	/*
	 * Open the log file so we can start recording events
	 *
	 * set log_event_mask to point to the log_event attribute value so
	 * it controls which events are logged.
	 */
	set_sattr_l_slim(SVR_ATR_log_events, PBSEVENT_MASK, SET);
	*log_event_mask = get_sattr_long(SVR_ATR_log_events);
	(void) sprintf(path_log, "%s/%s", pbs_conf.pbs_home_path, PBS_LOGFILES);

	(void) log_open(log_file, path_log);
	(void) sprintf(log_buffer, msg_startup1, PBS_VERSION, server_init_type);
	log_event(PBSEVENT_SYSTEM | PBSEVENT_ADMIN | PBSEVENT_FORCE,
		  LOG_NOTICE,
		  PBS_EVENTCLASS_SERVER, msg_daemonname, log_buffer);

	/*Initialize security library's internal data structures*/
	if (load_auths(AUTH_SERVER)) {
		log_err(-1, __func__, "Failed to load auth lib");
		exit(3);
	}

	{
		int csret;

		/* let Libsec do logging if part of PBS daemon code */
		p_cslog = log_err;

		if ((csret = CS_server_init()) != CS_SUCCESS) {
			sprintf(log_buffer,
				"Problem initializing security library (%d)", csret);
			log_err(-1, __func__, log_buffer);
			exit(3);
		}
	}

	/* At this point we must decide if we are the primary or secondary */

	if (are_primary == FAILOVER_NONE) {
		lock_out(lockfds, F_WRLCK); /* no failover configured */
	} else if (are_primary == FAILOVER_PRIMARY) {
		char *takeovermsg = "Notifying Secondary Server that we are taking over";
		/* we believe we are the primary server */

		lock_out(lockfds, F_WRLCK);
		svrlivetime = 0;
		i = 0;

		/*
		 * try to connect to the Secondary Server to tell it to go away
		 * Keep trying untill we connect or see the svrlive time is
		 * not changing
		 */

		printf("%s\n", takeovermsg);
		log_event(PBSEVENT_SYSTEM | PBSEVENT_ADMIN | PBSEVENT_FORCE,
			  LOG_NOTICE, PBS_EVENTCLASS_SERVER, msg_daemonname,
			  takeovermsg);
		while (1) {
			if (takeover_from_secondary() == 1) {
				/* contacted Secondary, its gone */
				break;
			}
			/* could not connact Secondary */
			if (stat(path_secondaryact, &sb_sa) == -1)
				break; /* no file saying its active */
			if (stat(path_svrlive, &sb_sa) == -1)
				break; /* no svrlive file */
			if (sb_sa.st_mtime > svrlivetime) {
				/* time stamp is changing, at   */
				/* least once, loop for a retry */
				svrlivetime = sb_sa.st_mtime;
			} else if ((time_now = time(0)) > (svrlivetime + secondary_delay)) {
				/* has not changed during the delay time */
				break;
			}
			sleep(4);
			if ((++i % 15) == 3) {
				/* display and log this about once a minute */
				/* after a couple of tries */
				sprintf(log_buffer, "Unable to contact Secondary Server but it appears to be running; it may need to be shutdown manually.");
				log_event(PBSEVENT_SYSTEM | PBSEVENT_ADMIN |
						  PBSEVENT_FORCE,
					  LOG_NOTICE,
					  PBS_EVENTCLASS_SERVER, msg_daemonname,
					  log_buffer);
				printf("%s", log_buffer);
				printf("  Will continue to attempt to takeover\n");
			}
		}

		/* in case secondary didn't remove the file */
		/* also tells the secondary to go idle	    */
		(void) unlink(path_secondaryact);

	} else {
		/* we believe we are a secondary server */
#ifndef DEBUG
		/* go into the background and become own sess/process group */
		if (stalone == 0) {
			if ((sid = go_to_background()) == -1)
				return (2);
		}
#endif /* DEBUG */

		/* will not attempt to lock again if go_to_background was already called */
		if (already_forked == 0)
			lock_out(lockfds, F_WRLCK);

		/* Protect from being killed by kernel */
		daemon_protect(0, PBS_DAEMON_PROTECT_ON);

		do {
			c = be_secondary(secondary_delay);
		} while (c == 1); /* recycle and stay inactive */
	}

	/*
	 * At this point, we are the active Server ...
	 *
	 * Initialize the server objects and perform specified recovery
	 * will be left in the server's private directory
	 */

#ifdef linux
	/*
	 * Set floating-point emulation control bits to silently emulate
	 * fp operations accesses. This works on Linux IA64 only, so we do not
	 * check the return status. On non-IA64 linux machine, it silently fails.
	 *
	 */
	prctl(PR_SET_FPEMU, PR_FPEMU_NOPRINT, 0, 0, 0);
#endif

	/* Setup db connection here */
	if (server_init_type != RECOV_CREATE && !stalone && !already_forked)
		background = 1;
	if ((rc = connect_to_db(background)) != 0)
		return rc;

	/* database connection code end */

	if (stalone == 2) {
		log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, LOG_NOTICE,
			  PBS_EVENTCLASS_SERVER, msg_daemonname, msg_svrdown);
		acct_close();
		stop_db();
		log_close(1);
		return (0);
	}

	/* initialize the network interface */

	if ((sock = init_network(pbs_server_port_dis)) < 0) {
		(void) sprintf(log_buffer,
			       "init_network failed using ports Server:%u MOM:%u RM:%u",
			       pbs_server_port_dis, pbs_mom_port, pbs_rm_port);
		log_event(PBSEVENT_SYSTEM | PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER,
			  LOG_ERR, msg_daemonname, log_buffer);
		fprintf(stderr, "%s\n", log_buffer);
		stop_db();
		return (4);
	}

	/* go into the background and become own sess/process group */

#ifndef DEBUG
	if (stalone == 0 && already_forked == 0) {
		if ((sid = go_to_background()) == -1) {
			stop_db();
			return (2);
		}
	}
	pbs_close_stdfiles();
#else  /* DEBUG is defined */
	sid = getpid();
	(void) setvbuf(stdout, NULL, _IOLBF, 0);
	(void) setvbuf(stderr, NULL, _IOLBF, 0);
#endif /* end the ifndef DEBUG */

	/* Protect from being killed by kernel */
	daemon_protect(0, PBS_DAEMON_PROTECT_ON);

#ifdef _POSIX_MEMLOCK
	if (do_mlockall == 1) {
		if (mlockall(MCL_CURRENT | MCL_FUTURE) == -1) {
			log_err(errno, msg_daemonname, "mlockall failed");
		}
	}
#endif /* _POSIX_MEMLOCK */

	sigemptyset(&allsigs);
	sigaddset(&allsigs, SIGHUP);  /* remember to block these */
	sigaddset(&allsigs, SIGINT);  /* during critical sections */
	sigaddset(&allsigs, SIGTERM); /* so we don't get confused */
	sigaddset(&allsigs, SIGCHLD);
	/* block signals while we do things */
	if (sigprocmask(SIG_BLOCK, &allsigs, NULL) == -1)
		log_err(errno, msg_daemonname, "sigprocmask(BLOCK)");

	/* initialize the network interface */
	if (init_network_add(sock, tcp_pre_process, process_request) != 0) {
		(void) sprintf(log_buffer, "add connection for init_network failed");
		log_event(PBSEVENT_SYSTEM | PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER,
			  LOG_ERR, msg_daemonname, log_buffer);
		stop_db();
		return (3);
	}

	sprintf(log_buffer, "Out of memory");
	if (pbs_conf.pbs_leaf_name) {
		char *p;
		nodename = strdup(pbs_conf.pbs_leaf_name);

		/* reset pbs_leaf_name to only the first leaf name with port */
		p = strchr(pbs_conf.pbs_leaf_name, ','); /* keep only the first leaf name */
		if (p)
			*p = '\0';
		p = strchr(pbs_conf.pbs_leaf_name, ':'); /* cut out the port */
		if (p)
			*p = '\0';
	} else {
		char *host = NULL;
		if (pbs_conf.pbs_primary)
			if (!pbs_failover_active)
				host = pbs_conf.pbs_primary;
			else
				host = pbs_conf.pbs_secondary;
		else if (pbs_conf.pbs_server_host_name)
			host = pbs_conf.pbs_server_host_name;
		else if (pbs_conf.pbs_server_name)
			host = pbs_conf.pbs_server_name;

		/* since pbs_leaf_name was not specified, determine all IPs */
		nodename = get_all_ips(host, log_buffer, sizeof(log_buffer) - 1);
	}

	if (!nodename) {
		log_err(-1, __func__, log_buffer);
		fprintf(stderr, "%s\n", "Unable to determine TPP node name");
		stop_db();
		return (1);
	}

	if (setup_env(pbs_conf.pbs_environment) == -1) {
		fprintf(stderr, "%s\n", "Setup environment failed");
		stop_db();
		return (3);
	}

	/* set tpp config */
	rc = set_tpp_config(&pbs_conf, &tpp_conf, nodename, pbs_server_port_dis, pbs_conf.pbs_leaf_routers);
	free(nodename);
	if (rc == -1) {
		(void) sprintf(log_buffer, "Error setting TPP config");
		fprintf(stderr, "%s", log_buffer);
		stop_db();
		return (3);
	}

	tpp_set_app_net_handler(net_down_handler, net_restore_handler);
	tpp_conf.node_type = TPP_LEAF_NODE_LISTEN; /* server needs to know about all CTL LEAVE messages */

	if ((tppfd = tpp_init(&tpp_conf)) == -1) {
		log_err(-1, msg_daemonname, "tpp_init failed");
		fprintf(stderr, "%s", log_buffer);
		stop_db();
		return (3);
	}

	(void) add_conn(tppfd, TppComm, (pbs_net_t) 0, 0, NULL, tpp_request);

	tfree2(&ipaddrs);
	tfree2(&streams);

	if (pbsd_init(server_init_type) != 0) {
		log_err(-1, msg_daemonname, "pbsd_init failed");
		stop_db();
		return (3);
	}

	/* record the fact that the Secondary is up and active (running) */

	if (pbs_failover_active) {
		sprintf(log_buffer, "Failover Secondary Server at %s has gone active", server_host);
		log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,
			  LOG_CRIT, msg_daemonname, log_buffer);

		/* now go set up work task to do timestamp svrlive file */

		(void) set_task(WORK_Timed, time_now, secondary_handshake, NULL);

		svr_mailowner(0, 0, 1, log_buffer);
		if (get_sattr_long(SVR_ATR_scheduling)) {
			/* Bring up scheduler here */
			if (dflt_scheduler->sc_primary_conn == -1) {
				char **workenv;
				char schedcmd[MAXPATHLEN + 1];
				/* save the current, "safe", environment.
				 * reset the enviroment to that when first started
				 * this is to get PBS_CONF_FILE if specified.*/
				workenv = environ;
				environ = origevp;

				snprintf(schedcmd, sizeof(schedcmd), "%s/sbin/pbs_sched &", pbs_conf.pbs_exec_path);
				snprintf(log_buffer, sizeof(log_buffer), "starting scheduler: %s", schedcmd);
				if (system(schedcmd) == -1) 
					log_errf(-1, __func__, "system(%s) failed. ERR : %s",schedcmd, strerror(errno));

				log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE,
					  PBS_EVENTCLASS_SERVER, LOG_CRIT,
					  msg_daemonname, log_buffer);

				brought_up_alt_sched = 1;
				/* restore environment to "safe" one */
				environ = workenv;
			}
		}
	} else if (are_primary == FAILOVER_PRIMARY) {
		/* now go set up work task to do handshake with secondary */

		(void) set_task(WORK_Timed, time_now, primary_handshake, NULL);
	}

	log_eventf(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO,
		   msg_daemonname, msg_startup2,
		   sid, pbs_server_port_dis, pbs_mom_port, pbs_rm_port);

	/*
	 * Now at last, we are read to do some batch work, the
	 * following section constitutes the "main" loop of the server
	 */

	if (server_init_type == RECOV_HOT)
		set_sattr_l_slim(SVR_ATR_State, SV_STATE_HOT, SET);
	else
		set_sattr_l_slim(SVR_ATR_State, SV_STATE_RUN, SET);

	/* Can start the python interpreter this late, before the main loop,*/
	/* which is when requests are actually read and processed           */
	/* (in wait_request), and when python processing is needed.         */
	svr_interp_data.daemon_name = strdup(msg_daemonname);

	if (svr_interp_data.daemon_name == NULL) { /* should not happen */
		log_err(errno, msg_daemonname, "strdup failed!");
		stop_db();
		return (1);
	}

	/* save it so we can free it without needing the pointer inside svr_interp_data */
	keep_daemon_name = svr_interp_data.daemon_name;

	snprintf(svr_interp_data.local_host_name, sizeof(svr_interp_data.local_host_name),
		 "%s", server_host);
	if ((pc = strchr(svr_interp_data.local_host_name, '.')) != NULL)
		*pc = '\0';

	if (pbs_python_ext_start_interpreter(&svr_interp_data) != 0) {
		log_err(-1, msg_daemonname, "Failed to start Python interpreter");
		stop_db();
		free(keep_daemon_name);
		return (1);
	}

	/* check and enable the prov attributes */
	set_srv_prov_attributes();

	/* check and set power attribute */
	set_srv_pwr_prov_attribute();

	periodic_req = alloc_br(PBS_BATCH_HookPeriodic);
	if (periodic_req == NULL) {
		log_err(errno, msg_daemonname, "Out of memory!");
		stop_db();
		free(keep_daemon_name);
		return (1);
	}
	process_hooks(periodic_req, hook_msg, sizeof(hook_msg), pbs_python_set_interrupt);

	/*
	 * main loop of server
	 * stays in this loop until server's state is either
	 * 	_DOWN - time to complete shutdown and exit, or
	 *	_SECIDLE - time for Secondary Server in failover to go
	 *		back to an inactive state.
	 * If state includes SV_STATE_PRIMDLY, stay in loop; this will be
	 * cleared when Secondary Server responds to a request.
	 */
	while ((state = get_sattr_long(SVR_ATR_State)) != SV_STATE_DOWN && state != SV_STATE_SECIDLE) {

		/*
		 * double check that if we are an active Secondary Server, that
		 * that the Primary has not come back alive; if it did it will
		 * remove the "secondary active" file.
		 */
		if (are_primary == FAILOVER_SECONDARY) {
			if (stat(path_secondaryact, &sb_sa) == -1) {
				if (errno == ENOENT) {
					/* file gone, restart to go idle */
					set_sattr_l_slim(SVR_ATR_State, SV_STATE_SECIDLE, SET);
					break;
				}
			}
		}

		/* first process any task whose time delay has expired */
		waittime = next_task();

		if ((state = get_sattr_long(SVR_ATR_State)) == SV_STATE_RUN) { /* In normal Run State */

			if (first_run) {

				/*
				 * clear exec_vnode for jobs that doesn't need
				 * it, otherwise job is locked into those nodes
				 */
				clear_exec_vnode();
				first_run = 0;
			}
			for (psched = (pbs_sched *) GET_NEXT(svr_allscheds); psched; psched = (pbs_sched *) GET_NEXT(psched->sc_link)) {

				/* schedule anything only if sched is connected */
				if (psched->sc_primary_conn == -1 || psched->sc_secondary_conn == -1)
					continue;

				/* if we have a high prio sched command, send it 1st */
				if (psched->svr_do_sched_high != SCH_SCHEDULE_NULL)
					schedule_high(psched);
				if (psched->svr_do_schedule == SCH_SCHEDULE_RESTART_CYCLE) {
					if (!send_sched_cmd(psched, psched->svr_do_schedule, NULL)) {
						log_eventf(PBSEVENT_DEBUG2, PBS_EVENTCLASS_SERVER, LOG_NOTICE, msg_daemonname,
							   "sent scheduler restart scheduling cycle request to %s", psched->sc_name);
					} else
						psched->svr_do_schedule = SCH_SCHEDULE_NULL;
				} else if (svr_unsent_qrun_req || (psched->svr_do_schedule != SCH_SCHEDULE_NULL && get_sched_attr_long(psched, SCHED_ATR_scheduling))) {
					/*
					 * If svr_unsent_qrun_req is set to one there are pending qrun
					 * request, then do schedule_jobs irrespective of the server scheduling
					 * state.
					 * If svr_unsent_qrun_req is not set then do the existing checking and do
					 * scheduling only if server scheduling is turned on.
					 */

					psched->sch_next_schedule = time_now + get_sched_attr_long(psched, SCHED_ATR_schediteration);
					if (schedule_jobs(psched) == 0 && svr_unsent_qrun_req)
						svr_unsent_qrun_req = 0;
				}
			}
		} else if (state == SV_STATE_HOT) {

			/* Are there HOT jobs to rerun */
			/* only try every _CYCLE seconds */

			if (time_now > server.sv_hotcycle + SVR_HOT_CYCLE) {
				server.sv_hotcycle = time_now + SVR_HOT_CYCLE;
				c = start_hot_jobs();
			}

			/* If more than _LIMIT seconds since start, stop */

			if ((c == 0) ||
			    (time_now > server.sv_started + SVR_HOT_LIMIT)) {
				server_init_type = RECOV_WARM;
				set_sattr_l_slim(SVR_ATR_State, SV_STATE_RUN, SET);
				state = SV_STATE_RUN;
			}
		}

		/* any jobs to route today */

		pque = (pbs_queue *) GET_NEXT(svr_queues);
		while (pque) {
			if (pque->qu_qs.qu_type == QTYPE_RoutePush)
				queue_route(pque);
			pque = (pbs_queue *) GET_NEXT(pque->qu_link);
		}

		if (reap_child_flag)
			reap_child();

		/* wait for a request and process it */
		if (wait_request(waittime, priority_context) != 0) {
			log_err(-1, msg_daemonname, "wait_requst failed");
		}

		if (reap_child_flag)  /* check again incase signal arrived */
			reap_child(); /* before they were blocked          */

		if ((state = get_sattr_long(SVR_ATR_State)) == SV_STATE_SHUTSIG)
			(void) svr_shutdown(SHUT_SIG); /* caught sig */

		/*
		 * if in process of shuting down and all running jobs
		 * and all children are done, change state to DOWN
		 */

		if ((state > SV_STATE_RUN) &&
		    (state < SV_STATE_SECIDLE) &&
		    (server.sv_jobstates[JOB_STATE_RUNNING] == 0) &&
		    (server.sv_jobstates[JOB_STATE_EXITING] == 0) &&
		    ((void *) GET_NEXT(task_list_event) == NULL)) {
			set_sattr_l_slim(SVR_ATR_State, SV_STATE_DOWN, SET);
			state = SV_STATE_DOWN;
		}
	}
	DBPRT(("Server out of main loop, state is %ld\n", state))

	/* set the current seq id to the last id before final save */
	server.sv_qs.sv_lastid = server.sv_qs.sv_jobidnumber;
	svr_save_db(&server); /* final recording of server */
	track_save(NULL);     /* save tracking data	     */

	/* if brought up the Secondary Scheduler, take it down */

	if (brought_up_alt_sched == 1)
		send_sched_cmd(dflt_scheduler, SCH_QUIT, NULL);

	/* if Moms are to to down as well, tell them */

	if (state != SV_STATE_SECIDLE && (shutdown_who & SHUT_WHO_MOM))
		shutdown_nodes();

	/* if brought up the DB, take it down */
	stop_db();

	if (are_primary == FAILOVER_SECONDARY) {
		/* we are the secondary server */
		(void) unlink(path_secondaryact); /* remove file */

		if (state == SV_STATE_SECIDLE && saved_takeover_req != NULL) {
			/*
			 * If we are the secondary server that is
			 * going inactive AND there is a batch request struct,
			 * send acknowledgement back to primary so primary
			 * server knows that the data have been written.
			 */
			DBPRT(("Failover: acknowledging FO(%d) request\n", saved_takeover_req->rq_ind.rq_failover))
			reply_send(saved_takeover_req);
			saved_takeover_req = NULL;
		}
	}

#if defined(DEBUG)
	/* for valgrind, clear some stuff up */
	{
		hook *phook = (hook *) GET_NEXT(svr_allhooks);
		while (phook) {
			hook *tmp;
			free(phook->hook_name);
			pbs_python_ext_free_python_script(phook->script);
			free(phook->script);
			tmp = phook;
			phook = (hook *) GET_NEXT(phook->hi_allhooks);
			free(tmp);
		}
	}
#endif

	/* Shut down interpreter now before closing network connections */
	pbs_python_ext_shutdown_interpreter(&svr_interp_data); /* stop python if started */

	shutdown_ack();
	net_close(-1); /* close all network connections */
	tpp_shutdown();

	/*
	 * SERVER is going to be shutdown, destroy indexes
	 */
	pbs_idx_destroy(jobs_idx);
	pbs_idx_destroy(queues_idx);
	pbs_idx_destroy(resvs_idx);

	{
		int csret;
		if ((csret = CS_close_app()) != CS_SUCCESS) {
			/*had some problem closing the security library*/

			sprintf(log_buffer, "problem closing security library (%d)", csret);
			log_err(-1, __func__, log_buffer);
		}
	}

	log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,
		  LOG_NOTICE, msg_daemonname, msg_svrdown);
	acct_close();
	log_close(1);
	free(keep_daemon_name); /* logs closed, can free here */

	lock_out(lockfds, F_UNLCK); /* unlock  */
	(void) close(lockfds);
	(void) unlink(lockfile);
	unload_auths();

	if (state == SV_STATE_SECIDLE) {
		/*
		 * Secondary Server going inactive, or the Primary needs to
		 * recycle itself (found Secondary active);
		 * re-execv the Server, keeps things clean
		 */
		DBPRT(("Failover: reexecing %s as %s ", server_host, argv[0]))
		sprintf(log_buffer, "%s restarting as %s", server_host,
			are_primary == FAILOVER_PRIMARY ? "primary" : "secondary");
		if (*argv[0] == '/') {
			execve(argv[0], argv, origevp);
		} else {
			sprintf(log_buffer, "%s/sbin/pbs_server",
				pbs_conf.pbs_exec_path);
			execve(log_buffer, argv, origevp);
		}
		DBPRT(("Failover: execv failed\n"))
	}
	return (0);
}

/**
 * @brief
 * 		get_port - parse host:port for -M and -S option
 *		Returns into *port and *addr if and only if that part is specified
 *		Both port and addr are returned in HOST byte order.
 *
 * @param[in]	arg	- "host", "port", ":port", or "host:port"
 * @param[in]	port	- RETURN: new port if one given
 * @param[in]	arg	- RETURN: daemon's address if host given
 *
 * @return	int
 * @retval	0	- ok
 * @retval	-1	- error
 */

static int
get_port(char *arg, unsigned int *port, pbs_net_t *addr)
{
	if (*arg == ':')
		++arg;
	if (isdigit((int) *arg)) { /* port only specified */
		*port = (unsigned int) atoi(arg);
	} else {
		char *name;

		name = parse_servername(arg, port);
		if (name) {
			*addr = get_hostaddr(name);
		} else {
			return (-1);
		}
	}
	if ((*port == 0) || (*addr == 0))
		return (-1);
	return 0;
}

/**
 * @brief
 * 		next_task - look for the next work task to perform:
 *		1. If svr_delay_entry is set, then a delayed task is ready so
 *	   		find and process it.
 *		2. All items on the immediate list, then
 *		3. All items on the timed task list which have expired times
 *
 * @return	amount of time till next task
 */

static time_t
next_task()
{

	time_t tilwhen;
	pbs_sched *psched;

	tilwhen = default_next_task();

	/* should the scheduler be run?  If so, adjust the delay time  */

	for (psched = (pbs_sched *) GET_NEXT(svr_allscheds); psched; psched = (pbs_sched *) GET_NEXT(psched->sc_link)) {
		time_t delay;
		if ((delay = psched->sch_next_schedule - time_now) <= 0)
			set_scheduler_flag(SCH_SCHEDULE_TIME, psched);
		else if (delay < tilwhen)
			tilwhen = delay;
	}

	next_sync_mom_hookfiles();

	return (tilwhen);
}

/**
 * @brief
 * 		start_hot_jobs - place any job which is state QUEUED and has the
 *		HOT start flag set into execution.
 *
 * @return	number of jobs to be hot started.
 */

static int
start_hot_jobs()
{
	int ct = 0;
	char *nodename;

	job *pjob;

	pjob = (job *) GET_NEXT(svr_alljobs);
	while (pjob) {
		if ((check_job_substate(pjob, JOB_SUBSTATE_QUEUED)) &&
		    (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HOTSTART)) {
			if (is_jattr_set(pjob, JOB_ATR_exec_vnode)) {
				ct++;
				/* find Mother Superior node and see if she is up */
				nodename = parse_servername(get_jattr_str(pjob, JOB_ATR_exec_vnode), NULL);
				if (is_vnode_up(nodename)) {
					/* she is up so can send her the job */
					/* else we will try later            */
					log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB,
						  LOG_INFO,
						  pjob->ji_qs.ji_jobid,
						  "attempting to hot start job");
					(void) svr_startjob(pjob, 0);
				}
			} else {
				/* no vnode list, cannot hot start, clear flag */
				pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_HOTSTART;
			}
		}
		pjob = (job *) GET_NEXT(pjob->ji_alljobs);
	}
	return (ct);
}

/**
 * @brief
 * 		lock_out - lock out other daemons from this directory.
 *		And record (on write-lock), my pid into the file
 *
 * @param[in]	fds	- file descriptor.
 * @param[in]	op	- F_WRLCK  or  F_UNLCK
 */

static void
lock_out(int fds, int op)
{
	int i;
	int j;
	struct flock flock;
	char buf[100];

	if (pbs_conf.pbs_secondary == NULL)
		j = 1; /* not fail over, try lock one time */
	else
		j = 30; /* fail over, try for a minute */

	(void) lseek(fds, (off_t) 0, SEEK_SET);
	flock.l_type = op;
	flock.l_whence = SEEK_SET;
	flock.l_start = 0;
	flock.l_len = 0;
	for (i = 0; i < j; i++) {
		if (fcntl(fds, F_SETLK, &flock) != -1) {
			if (op == F_WRLCK) {
				/* if write-lock, record pid in file */
				if (ftruncate(fds, (off_t) 0) == -1)
					log_errf(-1, __func__, "ftruncate failed. ERR : %s",strerror(errno));

				(void) sprintf(buf, "%d\n", getpid());
				if (write(fds, buf, strlen(buf)) == -1) 
					log_errf(-1, __func__, "write failed. ERR : %s",strerror(errno));
			}
			return;
		}
		sleep(2);
	}

	(void) strcpy(log_buffer, "another server running");
	log_event(PBSEVENT_SYSTEM | PBSEVENT_ADMIN | PBSEVENT_FORCE,
		  LOG_NOTICE, PBS_EVENTCLASS_SERVER, msg_daemonname,
		  log_buffer);
	fprintf(stderr, "pbs_server: %s\n", log_buffer);
	exit(1);
}
