/*
 * Copyright (C) 1994-2021 Altair Engineering, Inc.
 * For more information, contact Altair at www.altair.com.
 *
 * This file is part of both the OpenPBS software ("OpenPBS")
 * and the PBS Professional ("PBS Pro") software.
 *
 * Open Source License Information:
 *
 * OpenPBS is free software. You can redistribute it and/or modify it under
 * the terms of the GNU Affero General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or (at your
 * option) any later version.
 *
 * OpenPBS is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Commercial License Information:
 *
 * PBS Pro is commercially licensed software that shares a common core with
 * the OpenPBS software.  For a copy of the commercial license terms and
 * conditions, go to: (http://www.pbspro.com/agreement.html) or contact the
 * Altair Legal Department.
 *
 * Altair's dual-license business model allows companies, individuals, and
 * organizations to create proprietary derivative works of OpenPBS and
 * distribute them - whether embedded or bundled with other software -
 * under a commercial license agreement.
 *
 * Use of Altair's trademarks, including but not limited to "PBS™",
 * "OpenPBS®", "PBS Professional®", and "PBS Pro™" and Altair's logos is
 * subject to Altair's trademark licensing policies.
 */

#include <pbs_config.h> /* the master config generated by configure */

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <grp.h>
#include <pwd.h>
#include <unistd.h>
#include <sys/wait.h>
#include <dirent.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include "dis.h"
#include "libpbs.h"
#include "pbs_error.h"
#include "server_limits.h"
#include "list_link.h"
#include "ticket.h"
#include "credential.h"
#include "attribute.h"
#include "resource.h"
#include "job.h"
#include "batch_request.h"
#include "pbs_nodes.h"
#include "svrfunc.h"
#include "mom_mach.h"
#include "mom_func.h"
#include "mom_server.h"
#include "net_connect.h"
#include "log.h"
#include "tpp.h"
#include "hook.h"
#include "pbs_python.h"
#include "mom_hook_func.h"
#include "work_task.h"
#include "placementsets.h"
#include "pbs_internal.h"
#include "portability.h"

#ifdef __SANITIZE_ADDRESS__
#include <sanitizer/common_interface_defs.h>
pid_t pid_sanitizer = -1;
#endif

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
#include "renew_creds.h"
#include <krb5.h>
extern int decode_block_base64(unsigned char *ascii_data, ssize_t ascii_len, unsigned char *bin_data, ssize_t *p_bin_len, char *msg, size_t msg_len);
#endif

/**
 * @file	requests.c
 */
/* External Global Data Items */

extern unsigned int default_server_port;
extern int exiting_tasks;
extern pbs_list_head svr_alljobs;
extern char mom_host[];
#ifdef WIN32
extern char *mom_home;
#endif
extern char *msg_err_unlink;
extern char *msg_mom_reject_root_scripts;
extern int reject_root_scripts;
extern char *path_spool;
extern char *path_undeliv;
extern attribute_def job_attr_def[];
extern char *msg_jobmod;
extern char *msg_manager;
extern time_t time_now;
extern time_t time_resc_updated;
extern int resc_access_perm; /* see encode_resc() */
/* in attr_fn_resc.c */
extern int suspend_signal;
extern int resume_signal;
extern char *path_checkpoint;
extern int restart_background;
extern int reject_hook_scripts;
extern int restart_transmogrify;
extern char task_fmt[];
extern char *msg_noloopbackif;
extern char *msg_stageremote;
extern char *path_hooks;
extern char *msg_hookfile_open;
extern char *msg_hookfile_write;
extern unsigned long hooks_rescdef_checksum;
extern char *path_rescdef;
#if MOM_ALPS
extern int alps_confirm_empty_timeout;
extern int alps_confirm_switch_timeout;
#endif

extern long joinjob_alarm_time;
extern long job_launch_delay;
extern int update_joinjob_alarm_time;
extern int update_job_launch_delay;
extern pbs_list_head svr_allhooks;
/* External Functions */
extern int is_direct_write(job *, enum job_file, char *, int *);
extern unsigned char pbs_aes_key[][16];
extern unsigned char pbs_aes_iv[][16];

/* Local Data Items */
char rcperr[MAXPATHLEN] = {'\0'}; /* file to contain rcp error */
char *pbs_jobdir = NULL;	  /* path to staging and execution dir of current job */
char *cred_buf = NULL;
size_t cred_len = 0;
int cred_type = 0;
int cred_pipe = -1;
char *pwd_buf = NULL;

#ifndef WIN32
static void post_cpyfile(job *pjob, int ev);
#else
extern char *save_actual_homedir(struct passwd *, job *);
extern char *set_homedir_to_local_default(job *, char *);
#endif

#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif

#define STAGEOUT_FAILURE 65
#define SUSPEND 1
#define RESUME 0

#ifdef __SANITIZE_ADDRESS__
/**
 * @brief
 * 	__lsan_is_turned_off() - disable leak sanitizer for running process
 *  based on pid_sanitizer. If pid_sanitizer is zero, lsan is disabled.
 *  This is used for disabling lsan in child process in case of changing
 *  uid/gid of the child process. The sanitizer can not handle functions
 *  like setuid() and fails with not being able to connect to the thread.
 *
 * @return 	int
 * @retval	1	disable LSAN
 * @retval	0	enable LSAN
 *
 */
int __attribute__((used))
__lsan_is_turned_off(void)
{
	if (pid_sanitizer)
		return 0;

	return 1;
}
#endif

/**
 * @brief
 * 	is_file_same() - are two paths pointing to the same file
 * @param[in] file1 - path1
 * @param[in] file2 - path2
 *
 * @return 	int
 * @retval	1	if are the same
 * @retval	0	if not the same (or cannot tell)
 *
 */

static int
is_file_same(char *file1, char *file2)
{
#ifndef WIN32
	struct stat sb1, sb2;

	if ((stat(file1, &sb1) == 0) && (stat(file2, &sb2) == 0)) {
		if ((sb1.st_dev == sb2.st_dev) && (sb1.st_ino == sb2.st_ino))
			return 1;
	}
#endif
	return 0;
}

/**
 * @brief
 * 	fork_to_user - fork mom and go to user's home directory
 *	also sets up the global useruid and usergid in the child
 *
 *	WARNING: valid only if called when preq points to a cpyfiles structure
 *
 * @param[in] preq - pointer to batch_request structure
 * @param[in] pjob - pointer to job structure (can be null)
 *
 * @return 	HANDLE
 * @retval
 * @retval	!INVALID_HANDLE_VALUE - success
 * @retval	INVALID_HANDLE_VALUE - failure
 */
#ifdef WIN32
static HANDLE
fork_to_user(struct batch_request *preq, job *pjob)
{
	struct passwd *pwdp = NULL;
	struct rq_cpyfile *rqcpf;
	static char buf[MAXPATHLEN + 1];
	char lpath[MAXPATHLEN + 1];

	/* Need to look up the uid, gid, and home directory */
	if (preq->rq_type == PBS_BATCH_CopyFiles_Cred || preq->rq_type == PBS_BATCH_DelFiles_Cred) {
		rqcpf = &preq->rq_ind.rq_cpyfile_cred.rq_copyfile;
		cred_buf = preq->rq_ind.rq_cpyfile_cred.rq_pcred;
		cred_len = preq->rq_ind.rq_cpyfile_cred.rq_credlen;
	} else {
		rqcpf = &preq->rq_ind.rq_cpyfile;
		cred_buf = NULL;
		cred_len = 0;
	}

	if (pjob)
		pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));

	/* we're trying to reuse old pw_userlogin since a mapped UNC */
	/* path maybe hanging off it. With pbs_mom running under     */
	/* SERVICE_ACCOUNT, we have to map drives under user session */
	/* Only the session that mapped the drive can unmap it.      */
	log_buffer[0] = '\0';
	if ((pwdp == NULL || pwdp->pw_userlogin == INVALID_HANDLE_VALUE) &&
	    (pwdp = logon_pw(preq->rq_ind.rq_cpyfile.rq_user, cred_buf, cred_len, pbs_decrypt_pwd, 0, log_buffer)) == NULL) {
		log_err(-1, __func__, log_buffer);
		return (INVALID_HANDLE_VALUE);
	}

	if (strlen(log_buffer) > 0)
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG, __func__, log_buffer);

	if (pwdp->pw_userlogin != INVALID_HANDLE_VALUE) {
		if (!impersonate_user(pwdp->pw_userlogin)) {
			log_err(-1, "fork_to_user2", "ImpersonateLoggedOnUser");
			return (INVALID_HANDLE_VALUE);
		}
	} else
		return (INVALID_HANDLE_VALUE);

	pbs_strncpy(lpath, save_actual_homedir(pwdp, pjob), sizeof(lpath));
	CreateDirectory(lpath, 0); /* user homedir may not exist yet */
	if (chdir(lpath) == -1) {
		pbs_strncpy(lpath, set_homedir_to_local_default(pjob, preq->rq_ind.rq_cpyfile.rq_user), sizeof(lpath));
		CreateDirectory(lpath, 0); /* user homedir may not exist yet */
		(void) chdir(lpath);
	}

	setenv("PBS_EXEC", pbs_conf.pbs_exec_path, 1);

	return (pwdp->pw_userlogin);
}

#else

/**
 * @brief
 * 	frk_err - error function for "fork_to_user()".
 *	Call this if there is an error reply needed for the the batch request
 *	in the child process.  The error is returned to the Server and the
 *	child process exits.
 *
 * @param[in] err - error number
 * @param[in] preq - pointer to batch_request structure
 *
 * @return 	Void
 *
 */

static void
frk_err(int err, struct batch_request *preq)
{
	req_reject(err, 0, preq);
	exit(0);
}

/**
 * @brief
 *	fork_to_user - fork mom and go to user's home directory
 *		  also sets up the global useruid and usergid in the child
 *
 *	WARNING: valid only if called when preq points to a cpyfiles structure
 *		 or a cpyfiles_cred structure
 *
 * @param[in] preq - pointer to batch_request structure
 * @param[in] pjob - pointer to job structure (can be null)
 *
 * @return	pid_t
 * @retval	>0 - success
 * @retval	<0 - failure
 */

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
static pid_t
fork_to_user(struct batch_request *preq, job *pjob, struct krb_holder *ticket)
#else
static pid_t
fork_to_user(struct batch_request *preq, job *pjob)
#endif
{
	struct group *grpp;
	pid_t pid;
	struct passwd *pwdp;
	uid_t useruid;
	gid_t usergid;
	gid_t user_rgid;
	int fds[2];
	struct rq_cpyfile *rqcpf;
	static char buf[MAXPATHLEN + 1];

	pid = fork_me(preq->rq_conn);

#ifdef __SANITIZE_ADDRESS__
	/* see the comment of __lsan_is_turned_off() */
	pid_sanitizer = pid;
#endif

	if (pid > 0) {
		if (preq->prot == PROT_TCP)
			free_br(preq); /* parent - note leave connection open   */
		return pid;
	} else if (pid < 0)
		return (-PBSE_SYSTEM);

	/* The Child */

	if (preq->rq_type == PBS_BATCH_CopyFiles_Cred || preq->rq_type == PBS_BATCH_DelFiles_Cred)
		rqcpf = &preq->rq_ind.rq_cpyfile_cred.rq_copyfile;
	else
		rqcpf = &preq->rq_ind.rq_cpyfile;

	/* create a PBS_EXEC env entry */
	setenv("PBS_EXEC", pbs_conf.pbs_exec_path, 1);

	if (pjob != NULL && pjob->ji_grpcache != NULL) {
		/* used the good stuff cached in the job structure */
		useruid = pjob->ji_qs.ji_un.ji_momt.ji_exuid;
		usergid = pjob->ji_qs.ji_un.ji_momt.ji_exgid;
		if (chdir(pjob->ji_grpcache->gc_homedir) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));		
		user_rgid = pjob->ji_grpcache->gc_rgid;
		/* Account ID used to be set her for Cray via acctid(). */
	} else {
		/* Need to look up the uid, gid, and home directory */
		if ((pwdp = getpwnam(rqcpf->rq_user)) == NULL)
			frk_err(PBSE_BADUSER, preq); /* no return */
		useruid = pwdp->pw_uid;
		user_rgid = pwdp->pw_gid;

		if (rqcpf->rq_group[0] == '\0')
			usergid = pwdp->pw_gid; /* default to login group */
		else {
			if ((grpp = getgrnam(rqcpf->rq_group)) == NULL)
				frk_err(PBSE_BADUSER, preq); /* no return */
			usergid = grpp->gr_gid;
		}
		if (chdir(pwdp->pw_dir) == -1)  /* change to user`s home directory */
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));
	}

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	/* singleshot ticket, without renewal */
	if (pjob != NULL)
		init_ticket_from_job(pjob, NULL, ticket, CRED_SINGLESHOT);
	else
		init_ticket_from_req(preq->rq_extend, preq->rq_ind.rq_cpyfile.rq_jobid, ticket, CRED_SINGLESHOT);

#if defined(HAVE_LIBKAFS) || defined(HAVE_LIBKOPENAFS)
	singleshot_afslog(ticket);
#endif
#endif

	if (preq->rq_type == PBS_BATCH_CopyFiles_Cred || preq->rq_type == PBS_BATCH_DelFiles_Cred) {

		cred_buf = preq->rq_ind.rq_cpyfile_cred.rq_pcred;
		cred_len = preq->rq_ind.rq_cpyfile_cred.rq_credlen;

		switch (preq->rq_ind.rq_cpyfile_cred.rq_credtype) {
			case PBS_CREDTYPE_NONE:
				if (becomeuser_args(rqcpf->rq_user, useruid, usergid, user_rgid) == -1) {
					log_err(errno, __func__, "set privilege as user");
					frk_err(PBSE_SYSTEM, preq); /* no return */
				}
				break;

			case PBS_CREDTYPE_AES:
				if (becomeuser_args(rqcpf->rq_user, useruid, usergid, user_rgid) == -1) {
					log_err(errno, __func__, "set privilege as user");
					frk_err(PBSE_SYSTEM, preq); /* no return */
				}
				if (pbs_decrypt_pwd(cred_buf, PBS_CREDTYPE_AES, cred_len, &pwd_buf, (const unsigned char *) pbs_aes_key, (const unsigned char *) pbs_aes_iv) != 0) {
					log_joberr(-1, __func__, "decrypt_pwd", rqcpf->rq_jobid);
					frk_err(PBSE_BADCRED, preq); /* no return */
				}
				if (pipe(fds) == -1) {
					log_err(errno, __func__, "pipe");
					frk_err(PBSE_SYSTEM, preq); /* no return */
				}
				cred_pipe = fds[1];

				sprintf(buf, "%d", fds[0]);
				setenv("PBS_PWPIPE", buf, 1);
				fcntl(cred_pipe, F_SETFD, 1); /* close on exec */

				break;

			default:
				log_err(errno, __func__, "unknown credential type");
				break;
		}
	} else { /* no cred */
		if (becomeuser_args(rqcpf->rq_user, useruid, usergid, user_rgid) == -1) {
			log_err(errno, __func__, "set privilege as user");
			frk_err(PBSE_SYSTEM, preq); /* no return */
		}
	}

	return pid;
}
#endif /* WIN32 */

#define RT_BLK_SZ 65536
/**
 * @brief
 * 	Called when a job is rerun (qrerun) to copy the job's standard out/error
 * 	files back to the Server until job is rescheduled.  Function opens
 * 	the StdOut or StdErr file for the job, reading in blocks of it and
 * 	ships the blocks to the Server.
 * 	If the file is shipped back to the Server successfully and it was in
 * 	PBS_HOME/spool, it is then deleted.
 *
 * @see
 * @param[in] pjob  - Accepts a job pointer.
 * @param[in] which - enum for standard job files
 * @param[in] sock  - socket descriptor value
 *
 * @return	int
 * @retval	 0  - success.
 * @retval	-1  - failure.
 *
 */
static int
return_file(job *pjob, enum job_file which, int sock)
{
	int amt;
	char buf[RT_BLK_SZ];
	int fds;
	struct batch_request *prq;
	int rc = 0;
	int seq = 0;
	int direct_write_possible = 1;

	char path[MAXPATHLEN + 1]; /* needed by is_direct_write */

	if ((is_jattr_set(pjob, JOB_ATR_interactive)) && (get_jattr_long(pjob, JOB_ATR_interactive) > 0)) {
		return (0); /* interactive job, no file to copy */
	}

	/* Check for direct write of this file - direct write files are */
	/* not copied back to the server on rerun.                      */
	if (is_direct_write(pjob, which, path, &direct_write_possible)) {
		sprintf(log_buffer,
			"Skipping copy of directly written %s file of job %s",
			(which == StdOut) ? "STDOUT" : "STDERR", pjob->ji_qs.ji_jobid);
		log_event(PBSEVENT_DEBUG4, PBS_EVENTCLASS_JOB, LOG_INFO,
			  pjob->ji_qs.ji_jobid, log_buffer);
		return (0); /* Direct write, no copy done */
	}

	fds = open_std_file(pjob, which, O_RDONLY,
			    pjob->ji_qs.ji_un.ji_momt.ji_exgid);
	if (fds < 0)
		return (0);

	/* Build a "request" to the Server which will contain */
	/* a block of the file and send it on its way         */

	prq = alloc_br(PBS_BATCH_MvJobFile);
	if (prq == NULL) {
		close(fds);
		return (-1);
	}

	(void) strcpy(prq->rq_host, mom_host);
	(void) strcpy(prq->rq_ind.rq_jobfile.rq_jobid, pjob->ji_qs.ji_jobid);

	while ((amt = read(fds, buf, RT_BLK_SZ)) > 0) {
		/* prq->rq_ind.rq_jobfile.rq_sequence = seq++; */
		/* prq->rq_ind.rq_jobfile.rq_type = (int)which; */
		/* prq->rq_ind.rq_jobfile.rq_size = amt; */
		/* prq->rq_ind.rq_jobfile.rq_data = buf; */

		DIS_tcp_funcs();
		if ((rc = encode_DIS_ReqHdr(sock, PBS_BATCH_MvJobFile,
					    pbs_current_user)) ||
		    (rc = encode_DIS_JobFile(sock, seq++, buf, amt,
					     pjob->ji_qs.ji_jobid, which)) ||
		    (rc = encode_DIS_ReqExtend(sock, NULL))) {
			break;
		}

		dis_flush(sock);

		if ((DIS_reply_read(sock, &prq->rq_reply, 0) != 0) ||
		    (prq->rq_reply.brp_code != 0)) {
			rc = -1;
			break;
		}
	}
	free_br(prq);
	(void) close(fds);

	if (rc == 0) {
		int keeping;
		char *path;

		/* get path of file and if "keeping" indicates file is in */
		/* the job's working directory, don't bother to delete it */
		/* it will be deleted when the sandbox is removed or left */
		/* in the user's home to be replaced when job rerun.      */
		path = std_file_name(pjob, which, &keeping);
		if (keeping == 0)
			(void) unlink(path);
		return (0);
	} else
		return (-1);
}

/**
 * Delete job request
 * @brief
 * 	Delete job request - wait for the sisters to finish, cleanup, and respond back to the
 *	the server
 *	In the case of Cray, the response to the server will be done
 *	at a lower level in del_job_hw() to allow the MOM cancel the
 *	ALPS reservation.  In Cray's case, the job will remain in the
 *	"E" state until the MOM responds to the deletejob request from
 *	the server.
 *
 * @param[in] batch_request structure for the job
 *
 * @return Void
 *
 */
void
req_deletejob(struct batch_request *preq)
{
	job *pjob;
	mom_hook_input_t *hook_input = NULL;
	mom_hook_output_t *hook_output = NULL;
	char *jobid = NULL;

	jobid = preq->rq_ind.rq_delete.rq_objname;
	pjob = find_job(jobid);

	if (!pjob) {
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}

	if (pjob->ji_hook_running_bg_on)
		/* This is a duplicate request just return from here. */
		return;

		/*
		* check to see is there any copy request pending
		* for this job ?
		*/
#ifdef WIN32
	if (get_copyinfo_from_list(jobid) != NULL)
#else
	if (pjob->ji_momsubt != 0 && pjob->ji_mompost == post_cpyfile)
#endif
	{
		/*
		 * we have copy request pending so we
		 * need to first process the post_cpyfile
		 * request before starting this one.
		 * Tell the server to try again later.
		 */
		req_reject(PBSE_TRYAGAIN, 0, preq);
		return;
	}
	/*
	 * mom_deljob_wait() sets substate to
	 * prevent sending more OBIT messages
	 */
	pjob->ji_preq = preq;
	if ((hook_input = (mom_hook_input_t *) malloc(
		     sizeof(mom_hook_input_t))) == NULL) {
		log_err(errno, __func__, MALLOC_ERR_MSG);
		return;
	}
	mom_hook_input_init(hook_input);
	hook_input->pjob = pjob;

	if ((hook_output = (mom_hook_output_t *) malloc(
		     sizeof(mom_hook_output_t))) == NULL) {
		log_err(errno, __func__, MALLOC_ERR_MSG);
		return;
	}
	mom_hook_output_init(hook_output);
	if ((hook_output->reject_errcode =
		     (int *) malloc(sizeof(int))) == NULL) {
		log_err(errno, __func__, MALLOC_ERR_MSG);
		free(hook_output);
		return;
	}
	*(hook_output->reject_errcode) = 0;

	if (mom_process_hooks(HOOK_EVENT_EXECJOB_END,
			      PBS_MOM_SERVICE_NAME, mom_host, hook_input,
			      hook_output, NULL, 0, 1) == HOOK_RUNNING_IN_BACKGROUND) {

		pjob->ji_hook_running_bg_on = BG_PBS_BATCH_DeleteJob;

		/*
		* save number of nodes in sisterhood in case
		* job is deleted in send_sisters_deljob()
		*/
		if (pjob->ji_numnodes > 1) {
			if (send_sisters_deljob_wait(pjob) == 0) {
				sprintf(log_buffer, "Unable to send delete job "
						    "request to one or more sisters");
				log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB,
					  LOG_ERR, pjob->ji_qs.ji_jobid, log_buffer);
				/*
				* no messages sent, but there are sisters
				* must be all down
				*/
				pjob->ji_hook_running_bg_on = BG_PBSE_SISCOMM;
			}
		}
		/*
		* Hook is running in background reply to the batch
		* request will be taken care of in mom_process_background_hooks
		* function
		*/
		return;
	}
	mom_deljob_wait2(pjob);
	free(hook_output->reject_errcode);
	free(hook_output);
	free(hook_input);
}

/**
 * @brief
 * 	req_holdjob - checkpoint and terminate job
 *
 * @param[in] batch_request structure for the job
 *
 * @return Void
 *
 */

void
req_holdjob(struct batch_request *preq)
{
	job *pjob;

	pjob = find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname);
	if (pjob == NULL) {
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}

	if (pjob->ji_flags & MOM_CHKPT_ACTIVE) {
		req_reject(PBSE_BADSTATE, 0, preq);
		sprintf(log_buffer, "req_holdjob failed: Checkpoint active.");
	} else if (pjob->ji_flags & MOM_RESTART_ACTIVE) {
		req_reject(PBSE_BADSTATE, 0, preq);
		sprintf(log_buffer, "req_holdjob failed: Restart active.");
	} else if (!check_job_substate(pjob, JOB_SUBSTATE_RUNNING) &&
		   !check_job_substate(pjob, JOB_SUBSTATE_SUSPEND)) {
		req_reject(PBSE_BADSTATE, 0, preq);
		sprintf(log_buffer,
			"req_holdjob failed: Job not running or suspended.");
	} else {
		start_checkpoint(pjob, 1, preq);
		sprintf(log_buffer, "req_holdjob: Checkpoint initiated.");
	}
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_DEBUG,
		  pjob->ji_qs.ji_jobid, log_buffer);

	/* note, normally the reply to the server is in start_checkpoint() */
}

/**
 * @brief
 *	Write text into a job's output file,
 *	Return a PBS error code.
 *
 * @param[in] pjob - pointer to job structure
 * @param[in] jft  - enum for job_file
 * @param[in] text - message to be written into job's o/p file
 *
 * @return 	PBS error code
 * @retval	PBSE_NONE	No error
 * @retval	PBSE_MOMREJECT
 * @retval	PBSE_UNKJOBID	Unknown Job Identifier
 * @retval	PBSE_MOMREJECT	Request to MOM failed
 * @retval	PBSE_INTERNAL	internal server error occurred
 *
 */
int
message_job(job *pjob, enum job_file jft, char *text)
{
	char *pstr = NULL;
	int len;
	int fds = -1;
	ssize_t bytes_written = 0;
	ssize_t total_bytes_written = 0;

	if (pjob == NULL)
		return PBSE_UNKJOBID;

	/* must be Mother Superior for this to make sence */
	if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)
		return PBSE_MOMREJECT;

	len = is_joined(pjob);
	if (len == -1)
		jft = StdErr; /* only have stderr open */
	else if (len == 1)
		jft = StdOut; /* only have stdout open */

#ifdef WIN32
	if ((fds = open_std_file(pjob, jft, O_WRONLY | O_APPEND,
				 pjob->ji_qs.ji_un.ji_momt.ji_exgid)) < 0)
		return PBSE_MOMREJECT;

	/* set to append mode */
	SetFilePointer((HANDLE) _get_osfhandle(fds), (LONG) NULL,
		       (PLONG) NULL, FILE_END);
#else
	int i;
	unsigned int usecs = 250 * 1000; /* 250 milliseconds */
	for (i = 0; i < 3; i++) {
		fds = open_std_file(pjob, jft, O_WRONLY | O_APPEND | O_NONBLOCK,
				    pjob->ji_qs.ji_un.ji_momt.ji_exgid);
		if (fds < 0)
			if (errno == EAGAIN || errno == EWOULDBLOCK)
				usleep(usecs);
			else
				return PBSE_MOMREJECT;
		else
			break;
	}

	if (fds < 0)
		return PBSE_MOMREJECT;
#endif
	len = strlen(text);
	if (text[len - 1] != '\n') {
		if ((pstr = malloc(len + 2)) == NULL)
			return PBSE_INTERNAL;

		(void) strcpy(pstr, text);
		pstr[len++] = '\n'; /* append new-line */
		text = pstr;
	}
#ifdef WIN32
	total_bytes_written = write(fds, text, len);
	(void) _commit(fds);
#else
	for (i = 0; i < 3; i++) {
		bytes_written = write(fds, text, len - total_bytes_written);
		if (bytes_written <= 0) {
			if (errno == EAGAIN || errno == EWOULDBLOCK)
				usleep(usecs);
			else {
				(void) close(fds);
				free(pstr);
				return PBSE_MOMREJECT;
			}
		} else {
			text += bytes_written;
			total_bytes_written += bytes_written;
			if (total_bytes_written == len)
				break;
		}
	}
#endif
	(void) close(fds);
	if (pstr)
		free(pstr);

	if (total_bytes_written == len)
		return PBSE_NONE;
	else
		return PBSE_MOMREJECT;
}

/**
 * @brief
 * 	req_messagejob - Append message to job's output/error file
 *
 * @param[in] preq - pointer to batc_request structure
 *
 * @return 	Void
 *
 */

void
req_messagejob(struct batch_request *preq)
{
	int ret = 0;
	job *pjob;

	pjob = find_job(preq->rq_ind.rq_message.rq_jid);
	if ((preq->rq_ind.rq_message.rq_file == PBS_BATCH_FileOpt_Default) ||
	    (preq->rq_ind.rq_message.rq_file & PBS_BATCH_FileOpt_OFlg)) {
		ret = message_job(pjob, StdOut, preq->rq_ind.rq_message.rq_text);
	}

	if ((preq->rq_ind.rq_message.rq_file & PBS_BATCH_FileOpt_EFlg) &&
	    (ret == 0)) {
		ret = message_job(pjob, StdErr,
				  preq->rq_ind.rq_message.rq_text);
	}

	if (ret == PBSE_NONE)
		reply_ack(preq);
	else
		req_reject(ret, 0, preq);
	return;
}

/**
 * @brief
 *	Spawn a Python process.
 *
 * @param[in] preq - pointer to batc_request structure
 *
 * @return      Void
 *
 */
void
req_py_spawn(struct batch_request *preq)
{
	static char pypath[MAXPATHLEN + 1];
	char *allargs;
	int allarglen = 1;
	struct stat sbuf;
	int ret, i;
	job *pjob;
	pbs_task *ptask;
	char **argv;
	obitent *op;

	pjob = find_job(preq->rq_ind.rq_py_spawn.rq_jid);
	if (pjob == NULL) {
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}

	if (pypath[0] == '\0') { /* initialize pbs_python path */
		sprintf(pypath, "%s/bin/pbs_python", pbs_conf.pbs_exec_path);
#ifdef WIN32
		strncat(pypath, ".exe", sizeof(pypath) - strlen(pypath) - 1);
#endif
	}

	/* return error if python is not found in PBS_EXEC/bin */
	if (stat(pypath, &sbuf) == -1) {
		if (errno == ENOENT) {
			sprintf(log_buffer, "%s: %s not installed",
				__func__, pypath);
			log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB,
				  LOG_WARNING, pjob->ji_qs.ji_jobid, log_buffer);
		} else {
			log_err(errno, __func__, pypath);
		}
		req_reject(PBSE_MOMREJECT, 0, preq);
		return;
	}

	/* check to see it is a regular file that is executable */
	if (((sbuf.st_mode & S_IFMT) != S_IFREG) ||
	    (sbuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) !=
		    (S_IXUSR | S_IXGRP | S_IXOTH)) {
		sprintf(log_buffer, "%s: %s not executable",
			__func__, pypath);
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB,
			  LOG_WARNING, pjob->ji_qs.ji_jobid, log_buffer);
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	/* count the number of args (2 for space and null) */
	for (i = 0; preq->rq_ind.rq_py_spawn.rq_argv[i] != NULL; i++)
		allarglen += strlen(preq->rq_ind.rq_py_spawn.rq_argv[i]) + 2;

	argv = (char **) calloc(i + 2, sizeof(char *));
	if (argv == NULL) {
		req_reject(PBSE_SYSTEM, 0, preq);
		return;
	}
	allargs = (char *) malloc(allarglen);
	if (allargs == NULL) {
		req_reject(PBSE_SYSTEM, 0, preq);
		free(argv);
		return;
	}
	op = (obitent *) malloc(sizeof(obitent));
	if (op == NULL) {
		req_reject(PBSE_SYSTEM, 0, preq);
		free(argv);
		free(allargs);
		return;
	}

	/* fill argv array and create arg string */
	/* allargs will have a trailing blank */
	argv[0] = pypath;
	allargs[0] = '\0';
	for (i = 0; preq->rq_ind.rq_py_spawn.rq_argv[i] != NULL; i++) {
		argv[i + 1] = preq->rq_ind.rq_py_spawn.rq_argv[i];
		strcat(allargs, preq->rq_ind.rq_py_spawn.rq_argv[i]);
		strcat(allargs, " ");
	}
	argv[i + 1] = NULL;

	ptask = momtask_create(pjob);
	if (ptask == NULL) {
		req_reject(PBSE_INTERNAL, 0, preq);
		free(argv);
		free(allargs);
		free(op);
		return;
	}

	strcpy(ptask->ti_qs.ti_parentjobid, preq->rq_ind.rq_py_spawn.rq_jid);
	ptask->ti_qs.ti_parentnode = TM_ERROR_NODE;
	ptask->ti_qs.ti_myvnode = TM_ERROR_NODE;
	ptask->ti_qs.ti_parenttask = TM_INIT_TASK;
	(void) task_save(ptask);

	/* start the task with no demux option */
	ret = start_process(ptask, argv, preq->rq_ind.rq_py_spawn.rq_envp, true);
	free(argv);
	if (ret != PBSE_NONE) {
		snprintf(log_buffer, sizeof(log_buffer),
			 "%s: FAILED %stask %8.8X err %d", __func__,
			 allargs, ptask->ti_qs.ti_task, ret);
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_WARNING,
			  pjob->ji_qs.ji_jobid, log_buffer);

		req_reject(ret, 0, preq);
		free(allargs);
		free(op);
		return;
	}

	snprintf(log_buffer, sizeof(log_buffer), "%s: args %stask %8.8X", __func__,
		 allargs, ptask->ti_qs.ti_task);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_DEBUG,
		  pjob->ji_qs.ji_jobid, log_buffer);
	free(allargs);

	CLEAR_LINK(op->oe_next);
	append_link(&ptask->ti_obits, &op->oe_next, op);
	op->oe_type = OBIT_TYPE_BREVENT;
	op->oe_u.oe_preq = preq;

	return;
}

/**
 * @brief
 * 	req_modifyjob - service the Modify Job Request
 *	This request modifys a job's attributes.
 *
 * @param[in] preq - pointer to batch_request structure
 *
 * @return 	Void
 *
 */

void
req_modifyjob(struct batch_request *preq)
{
	int bad = 0;
	int i;
	attribute newattr[(int) JOB_ATR_LAST];
	attribute *pattr;
	job *pjob;
	svrattrl *plist;
	int rc;
	int recreate_nodes = 0;
	char *new_peh = NULL;

	pjob = find_job(preq->rq_ind.rq_modify.rq_objname);
	if (pjob == NULL) {
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}

	plist = (svrattrl *) GET_NEXT(preq->rq_ind.rq_modify.rq_attr);
	if (plist == NULL) { /* nothing to do */
		reply_ack(preq);
		return;
	}

	/* modify the jobs attributes */

	bad = 0;
	pattr = pjob->ji_wattr;

	/* call attr_atomic_set to decode and set a copy of the attributes */

	rc = attr_atomic_set(plist, pattr, newattr, job_attr_idx, job_attr_def, JOB_ATR_LAST, -1, ATR_DFLAG_MGWR | ATR_DFLAG_MOM, &bad);
	if (rc) {
		/* leave old values, free the new ones */
		for (i = 0; i < JOB_ATR_LAST; i++)
			free_attr(job_attr_def, &newattr[i], i);
		req_reject(rc, 0, preq);
		return;
	}

	/* OK, now copy the new values into the job attribute array */

	for (i = 0; i < JOB_ATR_LAST; i++) {
		if (newattr[i].at_flags & ATR_VFLAG_MODIFY) {

			if (job_attr_def[i].at_action)
				(void) job_attr_def[i].at_action(&newattr[i],
								 pjob, ATR_ACTION_ALTER);
			free_attr(job_attr_def, &pattr[i], i);
			if ((newattr[i].at_type == ATR_TYPE_LIST) ||
			    (newattr[i].at_type == ATR_TYPE_RESC)) {
				list_move(&newattr[i].at_val.at_list,
					  &(pattr + i)->at_val.at_list);
			} else {
				*(pattr + i) = newattr[i];
			}
			(pattr + i)->at_flags = newattr[i].at_flags;
			if ((i == JOB_ATR_exec_vnode) ||
			    (i == JOB_ATR_exec_host) ||
			    (i == JOB_ATR_exec_host2) ||
			    (i == JOB_ATR_SchedSelect) ||
			    (i == JOB_ATR_resource)) {
				/* all of the above attributes must */
				/*  be received (recreate_nodes == 5) */
				/*  in order to trigger recreation of */
				/*  job nodes and PBS_NODEFILE */
				recreate_nodes++;
				if (recreate_nodes == 5)
					break;
			}
		}
	}

	if (get_jattr_str(pjob, JOB_ATR_exec_host2) != NULL) /* Mom got information from new server */
		new_peh = get_jattr_str(pjob, JOB_ATR_exec_host2);
	else
		new_peh = get_jattr_str(pjob, JOB_ATR_exec_host);

	if (recreate_nodes == 5) {

		/* Send IM_DELETE_JOB2 request to the sister moms not in */
		/* 'new_peh', to kill the job on that sister and */
		/* report resources_used info. */
		(void) send_sisters_inner(pjob, IM_DELETE_JOB2,
					  NULL, new_peh);

		if ((rc = job_nodes(pjob)) != 0) {
			snprintf(log_buffer, sizeof(log_buffer) - 1,
				 "failed updating internal nodes data (rc=%d)", rc);
			log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB,
				  LOG_NOTICE, pjob->ji_qs.ji_jobid,
				  log_buffer);
			reply_text(preq, rc, log_buffer);
			return;
		}

		if (generate_pbs_nodefile(pjob, NULL, 0,
					  log_buffer, LOG_BUF_SIZE - 1) != 0) {
			log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB,
				  LOG_NOTICE, pjob->ji_qs.ji_jobid, log_buffer);
			reply_text(preq, rc, log_buffer);
			return;
		}
		send_sisters_job_update(pjob);
		pjob->ji_updated = 1;
	}
	/* note, the newattr[] attributes are on the stack, they goaway auto */

	if (rc == 0)
		rc = mom_set_limits(pjob, SET_LIMIT_ALTER);
	if (rc) {
		req_reject(rc, bad, preq);
		return;
	}

	(void) job_save(pjob);
	(void) sprintf(log_buffer, msg_manager, msg_jobmod,
		       preq->rq_user, preq->rq_host);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_DEBUG,
		  pjob->ji_qs.ji_jobid, log_buffer);
	reply_ack(preq);
}

/**
 * @brief
 *	Create a reject reply for a request, then send the reply.
 *
 * @param[in] preq - pointer to batch_request structure
 *
 * @return 	Void
 *
 */

void
req_shutdown(struct batch_request *preq)
{
	req_reject(PBSE_NOSUP, 0, preq);
}

/**
 * @brief
 * 	See if there are any events of type event_comm left to wait for.
 *
 * @param[in] pjob - pointer to job
 * @param[in] event_com - inter mom request
 *
 * @return 	int
 * @retval	1	if event exist
 * @retval	0 	if no event left
 *
 */

static int
eventleft(job *pjob, int event_com)
{
	int i;
	eventent *ep;
	hnodent *np;

	DBPRT(("eventleft: %s com %d\n", pjob->ji_qs.ji_jobid, event_com))

	for (i = 0; i < pjob->ji_numnodes; i++) {
		np = &pjob->ji_hosts[i];
		ep = (eventent *) GET_NEXT(np->hn_events);
		while (ep) {
			if (ep->ee_command == event_com)
				break;
			ep = (eventent *) GET_NEXT(ep->ee_next);
		}
		if (ep != NULL)
			return 1;
	}
	return 0;
}

/**
 * @brief
 *	Clean up any saved event state in the job structure.
 *
 * @param[in] pjob - pointer to job
 * @param[in] err - exit value
 *
 * @return 	Void
 *
 */
void
post_reply(job *pjob, int err)
{
	int stream;
	char *cookie;
	char *jobid;
	int im_compose(int, char *, char *, int, tm_event_t, tm_task_id, int);

	if (pjob->ji_postevent == TM_NULL_EVENT) /* no event */
		return;

	if (pjob->ji_hosts == NULL) { /* No one to talk to */
		pjob->ji_postevent = TM_NULL_EVENT;
		pjob->ji_taskid = TM_NULL_TASK;
		return;
	}

	stream = pjob->ji_hosts[0].hn_stream; /* MS stream */
	cookie = get_jattr_str(pjob, JOB_ATR_Cookie);
	jobid = pjob->ji_qs.ji_jobid;

	/*
	 **	I'm a sister and the reply needs to be sent back
	 **	to MS for this operation.
	 */

	if (err == 0) {
		(void) im_compose(stream, jobid, cookie, IM_ALL_OKAY,
				  pjob->ji_postevent, pjob->ji_taskid, IM_OLD_PROTOCOL_VER);
	} else {
		(void) im_compose(stream, jobid, cookie, IM_ERROR,
				  pjob->ji_postevent, pjob->ji_taskid, IM_OLD_PROTOCOL_VER);
		(void) diswsi(stream, err);
	}
	(void) dis_flush(stream);

	pjob->ji_postevent = TM_NULL_EVENT;
	pjob->ji_taskid = TM_NULL_TASK;
}

/**
 * @brief
 * 	Do all the common operations for post_action function.
 * 	For MS, if there are more things to wait for, return 1, else return 0.
 * 	For a sister, send reply back to MS and return 0.
 *
 * @param[in] pjob - pointer to job
 * @param[in] event_com - inter mom request
 * @param[in] err - exit value
 *
 * @return 	int
 * @retval	1	if there are more things to wait for
 * @retval	0 	else return 0
 *
 */

int
post_action(job *pjob, int event_com, int err)
{
	DBPRT(("post_action: %s com %d err %d\n", pjob->ji_qs.ji_jobid,
	       event_com, err))

	if (err != 0) {
		pjob->ji_flags |= MOM_SISTER_ERR;
		if (pjob->ji_preq) {
			req_reject(err, 0, pjob->ji_preq);
			pjob->ji_preq = NULL;
		}
	}

	if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) {

		if (pjob->ji_momsubt != 0) /* child running */
			return 1;

		/*
		 **	If I'm MS, I need to check for events
		 **	to see if processing is still going on.
		 */
		if (eventleft(pjob, event_com))
			return 1;

		/*
		 ** No more operations are waiting.
		 ** This will be the final call to whoever called me.
		 */
		if (pjob->ji_preq) {
			reply_ack(pjob->ji_preq);
			pjob->ji_preq = NULL;
		}
	} else
		post_reply(pjob, err);

	/*
	 ** Everything is done, now is the time to clear ji_mompost.
	 */
	pjob->ji_mompost = NULL;

	return 0;
}

/**
 * @brief
 * 	post_suspend - post exit of child for suspending a job
 *
 * @param[in] pjob - pointer to job
 * @param[in] err - exit value
 *
 * @return	Void
 *
 */

void
post_suspend(job *pjob, int err)
{
	DBPRT(("post_suspend: %s err %d\n", pjob->ji_qs.ji_jobid, err))

	if (post_action(pjob, IM_SUSPEND, err))
		return;

	if ((pjob->ji_flags & MOM_SISTER_ERR) == 0) {
		stop_walltime(pjob);

		pjob->ji_polltime = 0; /* don't check polling */
		if (get_job_substate(pjob) < JOB_SUBSTATE_EXITING) {
			mom_hook_input_t hook_input;
			mom_hook_output_t hook_output;
			char hook_msg[HOOK_MSG_SIZE + 1];
			hook *last_phook = NULL;
			unsigned int hook_fail_action = 0;
			int reject_errcode = 0;

			set_job_substate(pjob, JOB_SUBSTATE_SUSPEND);
			pjob->ji_qs.ji_svrflags |= JOB_SVFLG_Suspend;
			(void) job_save(pjob);

			mom_hook_input_init(&hook_input);
			hook_input.pjob = pjob;

			mom_hook_output_init(&hook_output);
			hook_output.reject_errcode = &reject_errcode;
			hook_output.last_phook = &last_phook;
			hook_output.fail_action = &hook_fail_action;

			if (mom_process_hooks(HOOK_EVENT_EXECJOB_POSTSUSPEND,
					      PBS_MOM_SERVICE_NAME, mom_host, &hook_input,
					      &hook_output, hook_msg, sizeof(hook_msg), 1) == 0) {
				snprintf(log_buffer, sizeof(log_buffer),
					 "execjob_postsuspend hook rejected request: %s", hook_msg);
				log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer);
			}
		} else {
			snprintf(log_buffer, sizeof(log_buffer),
				 "This job can't be suspended, since the job was in %ld substate", get_job_substate(pjob));
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid,
				  log_buffer);
		}
	} else
		pjob->ji_flags &= ~MOM_SISTER_ERR;
}

/**
 * @brief
 * 	post_resume - post exit of child for a resume of a job
 *
 * @param[in] pjob - pointer to job
 * @param[in] err - exit value
 *
 * @return      Void
 *
 */

void
post_resume(job *pjob, int err)
{
	DBPRT(("post_resume: %s err %d\n", pjob->ji_qs.ji_jobid, err))

	if (post_action(pjob, IM_RESUME, err))
		return;

	if ((pjob->ji_flags & MOM_SISTER_ERR) == 0) {

		if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) {
			start_walltime(pjob);
		}
		/* if I'm not MS, start to check for polling again */
		if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)
			pjob->ji_polltime = time_now;
		set_job_substate(pjob, JOB_SUBSTATE_RUNNING);
		pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend;
		(void) job_save(pjob);
	} else
		pjob->ji_flags &= ~MOM_SISTER_ERR;
}

#if MOM_ALPS

/*
 * Try to minimize latency of suspend/resume. Wait half a second before
 * the first check, and then poll every tenth of a second.
 */

#define ALPS_SWITCH_SLEEP_USECS_LONG (500000)
#define ALPS_SWITCH_SLEEP_USECS_SHORT (100000)

/**
 * On a Cray, make the requested switch, and confirm it
 * @param[in]	pjob	job of interest
 * @param[in]	which	SUSPEND/RESUME
 * @retval	PBSE_NONE	no error
 * @retval	PBSE_ALPS_SWITCH_ERR
 */
static int
do_cray_susres_conf(job *pjob, int which)
{
	/**
	 * On a Cray, we need to send an ALPS SWITCH request to move the jobs
	 * to a suspend or resume state.
	 */
	basil_switch_action_t action;
	int i;
	int rc = 0;
	time_t total_time = 0;
	time_t begin_time = 0;
	time_t end_time = 0;
	int timeout_val = alps_confirm_switch_timeout;
	int first_status;
	int first_status_was_empty = 0;
	int first_sleep;

	/* Check if there is an ALPS reservation to act on.
	 * If not, just return PBSE_NONE.
	 */
	if (pjob->ji_extended.ji_ext.ji_reservation <= 0) {
		return PBSE_NONE;
	}

	if (which == SUSPEND)
		action = basil_switch_action_out;
	else
		action = basil_switch_action_in;

	rc = alps_suspend_resume_reservation(pjob, action);
	if (rc < 0) {
		sprintf(log_buffer, "Fatal ALPS SWITCH request.");
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_ERR,
			  pjob->ji_qs.ji_jobid, log_buffer);
		return (PBSE_ALPS_SWITCH_ERR);
	}
	if (rc > 0) {
		sprintf(log_buffer, "Transient ALPS SWITCH error, the "
				    "prior SWITCH method has not yet completed.");
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR,
			  pjob->ji_qs.ji_jobid, log_buffer);
		return (PBSE_ALPS_SWITCH_ERR);
	}

	/* The call to ALPS SWITCH was successful
	 * Now we have to poll to confirm the SWITCH happens
	 * We will assume that the ALPS suspend happens "relatively quickly"
	 * as per the Cray ALPS folks, and we will poll for a successful
	 * suspend state here.  If the confirmation takes a while, then PBS
	 * may need to somehow poll for the confirmation without tying up
	 * the mom.
	 *
	 * Keep trying in this process (don't fork a child) until the SWITCH
	 * completes, or a hard error is returned, or
	 * alps_confirm_switch_timeout is reached.
	 * alps_confirm_switch_timeout is set by default to
	 * ALPS_CONF_SWITCH_TIMEOUT
	 * NOTE:  The MOM, server and scheduler are blocked while we poll
	 * ALPS and wait for the SWITCH.
	 */
	first_status = 1;
	first_sleep = 1;
	begin_time = time(NULL);
	end_time = begin_time;
	i = 0;
	do {
		i++;
		if ((rc = alps_confirm_suspend_resume(pjob, action)) <= 0)
			break;
		if (rc == 2) {
			/* we got a response of "EMPTY" */
			if (first_status) {
				/*
				 * Alps may report EMPTY reservation for the first
				 * time we query it. So for the first time we
				 * need to poll for alps_confirm_empty_timeout
				 * time in hopes of letting the Cray race
				 * condition work itself out.  Once we hit the
				 * timeout we assume that there are no ALPS
				 * claims (i.e. apruns) on the reservation and
				 * the suspend can proceed.
				 */
				timeout_val = alps_confirm_empty_timeout;
				first_status_was_empty = 1;
			}
			if (!first_status_was_empty) {
				/* The first status response wasn't EMPTY and
				 * the status is now EMPTY.  According to
				 * Cray this means PBS can proceed
				 * as if the switch request was successful.
				 */
				break;
			}
		} else {
			/* Reset the timeout_val if we get anything besides
			 * "EMPTY"
			 */
			timeout_val = alps_confirm_switch_timeout;
		}
		/* Getting a transient error, sleep then retry */
		if (first_sleep) {
			usleep(ALPS_SWITCH_SLEEP_USECS_LONG);
			first_sleep = 0;
		} else {
			usleep(ALPS_SWITCH_SLEEP_USECS_SHORT);
		}
		end_time = time(NULL);
		first_status = 0;
	} while ((total_time = end_time - begin_time) < timeout_val);
	if (rc == 1) {
		sprintf(log_buffer, "Timed out after %d attempts over %ld "
				    "seconds of attempting to confirm the ALPS "
				    "SWITCH completed.",
			i, total_time);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR,
			  pjob->ji_qs.ji_jobid, log_buffer);
		return (PBSE_ALPS_SWITCH_ERR);
	} else if ((rc == 2) && first_status_was_empty) {
		sprintf(log_buffer, "Timed out after %d attempts over "
				    "%ld seconds of waiting for a status of EMPTY "
				    "to change.  Proceeding as if the SWITCH "
				    "succeeded.",
			i, total_time);
		log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB,
			  LOG_DEBUG, pjob->ji_qs.ji_jobid, log_buffer);
		return (PBSE_NONE);
	} else if (rc < 0) {
		sprintf(log_buffer, "Fatal ALPS QUERY of status.");
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_ERR,
			  pjob->ji_qs.ji_jobid, log_buffer);
		return (PBSE_ALPS_SWITCH_ERR);
	} else {
		/* the SWITCH has completed successfully */
		sprintf(log_buffer, "The SWITCH was confirmed after a total "
				    "of %d attempts, and %ld seconds.",
			i, total_time);
		log_event(PBSEVENT_DEBUG3, PBS_EVENTCLASS_JOB, LOG_DEBUG,
			  pjob->ji_qs.ji_jobid, log_buffer);
	}

	return PBSE_NONE;
}
#endif /* MOM_ALPS */

/**
 * @brief
 *	responsible for suspend/resume job.
 *
 * @param[in] pjob - pointer to job
 * @param[in] which - indication for whether SUSPEND/RESUME
 *
 * @return	int PBSE error number
 * @retval	PBSE_NONE	no error
 * @retval	PBSE_SYSTEM	 system error occurred
 *
 */

int
do_susres(job *pjob, int which)
{
	pbs_task *ptask;
	int rc = 0;
	int err;

	if (pjob == NULL) {
		log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB,
			  LOG_ERR, "do_susres", "The job information is NULL");
		return (PBSE_SYSTEM);
	}

#if MOM_ALPS
	/* if we're trying to suspend, then ask ALPS to suspend, before
	 * we send the signal to the processes
	 */
	if (which == SUSPEND) {
		if ((rc = do_cray_susres_conf(pjob, which)) != PBSE_NONE) {
			/* We failed to do the suspend */
			return PBSE_ALPS_SWITCH_ERR;
		}
	}

	/* Continue on through the code.  Let the signal get sent to the job */

#endif /* MOM_ALPS */

	for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {

		rc = (which == SUSPEND) ? kill_task(ptask, suspend_signal, 1) : kill_task(ptask, resume_signal, 0);
		DBPRT(("%s: %s of task %8.8X rc %d\n", __func__,
		       (which == SUSPEND) ? "suspend" : "resume",
		       ptask->ti_qs.ti_task, rc))
	}
	if (rc < 0) {
		/* error recovery, set things back */
		err = errno;
		for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
		     ptask != NULL;
		     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
			if (which == SUSPEND)
				kill_task(ptask, resume_signal, 0);
			else
				kill_task(ptask, suspend_signal, 1);
		}
		errno = err;
		return PBSE_SYSTEM;
	}

#if MOM_ALPS
	/*
	 * We're trying to resume, we already sent the signal to the processes
	 * now we tell ALPS to resume the ALPS reservation
	 */
	if (which == RESUME) {
		if ((rc = do_cray_susres_conf(pjob, which)) != PBSE_NONE) {
			/* We failed to do the resume */
			return PBSE_ALPS_SWITCH_ERR;
		}
	}
#endif /* MOM_ALPS */

	return PBSE_NONE;
}

/**
 * @brief
 * 	Do the common things needed for both suspend and resume
 * 	for tasks that are local.
 *
 * @param[in]	pjob	job of interest
 * @param[in]	which	SUSPEND/RESUME
 * @param[in]	preq	batch request
 *
 * @return 	int
 * @retval	0	no error
 * @retval	!0	PBS error code
 *
 */
int
local_supres(job *pjob, int which, struct batch_request *preq)
{
	int rc;

	DBPRT(("%s: %s %s %s request\n", __func__, pjob->ji_qs.ji_jobid,
	       which == SUSPEND ? "suspend" : "resume",
	       preq == NULL ? "no" : "with"))

	if (which == RESUME) {
		mom_hook_input_t hook_input;
		mom_hook_output_t hook_output;
		char hook_msg[HOOK_MSG_SIZE + 1];
		hook *last_phook = NULL;
		unsigned int hook_fail_action = 0;
		int reject_errcode = 0;

		mom_hook_input_init(&hook_input);
		hook_input.pjob = pjob;

		mom_hook_output_init(&hook_output);
		hook_output.reject_errcode = &reject_errcode;
		hook_output.last_phook = &last_phook;
		hook_output.fail_action = &hook_fail_action;

		if (mom_process_hooks(HOOK_EVENT_EXECJOB_PRERESUME,
				      PBS_MOM_SERVICE_NAME, mom_host, &hook_input,
				      &hook_output, hook_msg, sizeof(hook_msg), 1) == 0) {
			snprintf(log_buffer, sizeof(log_buffer),
				 "execjob_preresume hook rejected request: %s", hook_msg);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer);
			errno = reject_errcode;
			return (PBSE_MOMREJECT);
		}
	}
	/*
	 ** Check to see if something is already going on.
	 */
	if (pjob->ji_momsubt != 0 ||
	    pjob->ji_mompost != NULL)
		return PBSE_MOMREJECT;

	rc = do_susres(pjob, which);

	return rc;
}

/**
 * @brief
 * 	susp_resum - the suspend/resume function
 *
 * @param[in] pjob - pointer to job
 * @param[in] which - SUSPEND/RESUME
 * @param[in] preq - pointer to batch_request structure
 *
 * @return 	Void
 *
 */

static void
susp_resum(job *pjob, int which, struct batch_request *preq)
{
	int rc;

	DBPRT(("susp_resum: %s %s %s request\n", pjob->ji_qs.ji_jobid,
	       which == SUSPEND ? "suspend" : "resume",
	       preq == NULL ? "no" : "with"))

	/* if already suspended for keyboard activity, just set/clear flag */
	if (which == SUSPEND) {
		if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Actsuspd) {
			/* already suspended for keyboard activity */
			pjob->ji_qs.ji_svrflags |= JOB_SVFLG_Suspend;
			(void) job_save(pjob);
			reply_ack(preq);
			return;
		}
	} else {
		if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Actsuspd) {
			/* keep suspended for keyboard activity */
			pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend;
			(void) job_save(pjob);
			reply_ack(preq);
			return;
		}
	}

	/* do suspend/resume of local tasks */

	if ((rc = local_supres(pjob, which, preq)) != PBSE_NONE) {
		req_reject(rc, errno, preq);
		return;
	}

	/*
	 ** If there is a sisterhood, send command.
	 */
	if (pjob->ji_numnodes > 1) {
		int i;

		i = send_sisters(pjob,
				 (which == SUSPEND) ? IM_SUSPEND : IM_RESUME, NULL);

		if (i > 0) {
			pjob->ji_mompost = (which == SUSPEND) ? post_suspend : post_resume;
		}
		if (i != (pjob->ji_numnodes - 1)) {
			pjob->ji_flags |= MOM_SISTER_ERR;
			req_reject(PBSE_SYSTEM, errno, preq);
			return;
		}
		pjob->ji_preq = preq;
	}

	if (pjob->ji_mompost != NULL) /* later action */
		return;

	if (which == SUSPEND) /* local */
		post_suspend(pjob, 0);
	else
		post_resume(pjob, 0);

	reply_ack(preq);
	return;
}

/**
 * @brief
 *	function for post termination of job
 *
 * @param[in] pjob - pointer to job
 * @param[in] err - exit value
 *
 * @return 	Void
 *
 */
void
post_terminate(job *pjob, int err)
{
	if (err) {
#ifdef WIN32
		if (err == -1) {
			log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_JOB, LOG_INFO,
				  pjob->ji_qs.ji_jobid,
				  "Terminate script failed to exit in allocated time");
		} else {
			log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_JOB, LOG_INFO,
				  pjob->ji_qs.ji_jobid,
				  "Terminate script exited with non-zero status");
		}
		/* assume that terminate action processes are hooked to */
#else

		log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_JOB, LOG_INFO,
			  pjob->ji_qs.ji_jobid, "Terminate action failed");
#endif
		/* kill job */
		if (kill_job(pjob, SIGKILL) == 0) {
			/* no processes around, force into exiting */
			set_job_substate(pjob, JOB_SUBSTATE_EXITING);
			exiting_tasks = 1;
		}
	}
	return;
}

/**
 * @brief
 * 	terminate_job - terminate a job
 *	If there is a site supplied script as given by "$action terminate"
 *	then run it and place job in special exiting state:
 *	Note: This function is invoked on Mother Superior only.
 *
 *	If no script, or error,  do the normal termination: kill_job()
 *	with SIGTERM.
 *
 * @param[in] pjob - pointer to job
 * @param[in] internal - 1 if Mom terminating job for being overlimit
 *		         0 if Server terminating job
 *			 ( in both cases, SIGTERM is used )
 *		        -1 internal and use SIGKILL
 *
 * @return 	int
 * @retval	1	if script running (see do_mom_action_script())
 * @retval	-1 	if error
 * @retval	-2 	no script
 *
 */

int
terminate_job(job *pjob, int internal)
{
	int i;
	int s;

	if (internal == 1)
		pjob->ji_qs.ji_svrflags |= JOB_SVFLG_OVERLMT1;

	/* set overlimit time stamp by adding kill_delay and time_now. */
	pjob->ji_overlmt_timestamp = time_now + get_jattr_long(pjob, JOB_ATR_job_kill_delay);
	if ((chk_mom_action(TerminateAction) == Script) &&
	    ((i = do_mom_action_script(TerminateAction, pjob, NULL, NULL,
				       post_terminate)) == 1)) {
		set_job_state(pjob, JOB_STATE_LTR_EXITING);
		set_job_substate(pjob, JOB_SUBSTATE_TERM);
	} else {
		if (internal == -1)
			s = SIGKILL;
		else {
			extern int next_sample_time;
			extern int min_check_poll;

			/* The job is going to be terminated by TERM */
			s = SIGTERM;
			/* set the TERMJOB flag */
			pjob->ji_qs.ji_svrflags |= JOB_SVFLG_TERMJOB;
			/* poll ASAP in case job ignores SIGTERM */
			next_sample_time = min_check_poll;
		}
		if (kill_job(pjob, s) == 0) {
			/* no processes around, time to exit */
			exiting_tasks = 1;
		}
		i = -2;
	}
	(void) job_save(pjob);
	return i;
}

/**
 * @brief
 *	Issue a specified signal to a job.
 *
 * @par Functionality:
 *	Server has requested that a real or pseudo (made up for PBS) signal
 *	be issued to a job. Real signals (see qsig command) may be sepcified
 *	by number (numeric string), or by name with or without the "SIG" prefix.
 *	Additional processing may be required depending on the signal.
 *
 * @param[in]	preq  - pointer to the batch request structure which contains
 *		jobid and signal.
 *
 * @return	none
 */

void
req_signaljob(struct batch_request *preq)
{
	job *pjob;
	pbs_task *ptask;
	int sig;
	char *sname;
	struct sig_tbl *psigt;
	extern struct sig_tbl sig_tbl[];
	mom_hook_input_t hook_input;
	mom_hook_output_t hook_output;
	char hook_msg[HOOK_MSG_SIZE + 1];
	hook *last_phook = NULL;
	unsigned int hook_fail_action = 0;
	int reject_errcode = 0;

	sname = preq->rq_ind.rq_signal.rq_signame;
	pjob = find_job(preq->rq_ind.rq_signal.rq_jid);

	if (pjob == NULL) {
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}

	sprintf(log_buffer, "signal job with %s", sname);
	log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_JOB, LOG_INFO,
		  pjob->ji_qs.ji_jobid, log_buffer);

	/**
	 *	Apparently the Server didn't receive or process an Obit sent earlier.
	 *	Just force a resend of the obit.
	 */
	if (check_job_substate(pjob, JOB_SUBSTATE_OBIT)) {
		send_obit(pjob, 0);
		if (strcmp(sname, SIG_RESUME) == 0)
			req_reject(PBSE_BADSTATE, 0, preq);
		else
			reply_ack(preq);
		return;
	} else if ((check_job_substate(pjob, JOB_SUBSTATE_RUNEPILOG)) &&
		   (strcmp(sname, "SIGKILL") != 0)) {
		/* If epilogue is running and signal is not SIGKILL, */
		/* disallow request;  note SIGKILL sent on qdel -w force */
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	if ((strcmp(sname, SIG_TermJob) == 0) ||
	    (strcmp(sname, SIG_RERUN) == 0)) {

		if (strcmp(sname, SIG_TermJob) == 0) {
			mom_hook_input_init(&hook_input);
			hook_input.pjob = pjob;

			mom_hook_output_init(&hook_output);
			hook_output.reject_errcode = &reject_errcode;
			hook_output.last_phook = &last_phook;
			hook_output.fail_action = &hook_fail_action;

			if (mom_process_hooks(HOOK_EVENT_EXECJOB_PRETERM,
					      PBS_MOM_SERVICE_NAME, mom_host,
					      &hook_input, &hook_output,
					      hook_msg, sizeof(hook_msg), 1) == 0) {
				reply_text(preq, PBSE_HOOK_REJECT, hook_msg);
				return;
			}
		}
		/**
		 *		PBS pseudo signal for either:
		 *		job termination, sent when a qdel is issued on a running job; or
		 *		rerunning (requeuing) a job, sent on qrerun.
		 */
		if (strcmp(sname, SIG_RERUN) == 0) {
			/* Set RERUN exit value */
			pjob->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_RERUN;
		}

		/**
		 *		For both terminate and rerun terminate the job either via
		 *		running the terminate action script or by sending  a
		 *		SIGTERM-delay-SIGKILL sequence
		 */
		if (terminate_job(pjob, 0) == 1) {
			/* let server know (via ..._TERM) that a site
			 * script is being run
			 *
			 * The reason for the req_reject(PBSE_NONE, ..) call
			 * was that req_reject (unlike reply_ack()) allows MOM
			 * to pass back JOB_SUBSTATE_TERM to the server.
			 */
			req_reject(PBSE_NONE, JOB_SUBSTATE_TERM, preq);
		} else {
			reply_ack(preq);
		}
		return;
	} else if (strcmp(sname, SIG_SUSPEND) == 0 || strcmp(sname, SIG_ADMIN_SUSPEND) == 0) {
		/**
		 *		PBS pseudo signal to suspend a running job.
		 *		Job must be actually running.
		 */
		if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) != 0) {
			sprintf(log_buffer, "suspend failed: %s",
				"server indicates job is already suspended");
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
				  pjob->ji_qs.ji_jobid, log_buffer);
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		switch (get_job_substate(pjob)) {
			case JOB_SUBSTATE_RUNNING:
				break;
			default:
				sprintf(log_buffer, "suspend failed, job substate = %ld",
					get_job_substate(pjob));
				log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
					  pjob->ji_qs.ji_jobid, log_buffer);
				req_reject(PBSE_BADSTATE, 0, preq);
				return;
		}
		susp_resum(pjob, 1, preq);
		return;
	} else if (strcmp(sname, SIG_RESUME) == 0 || strcmp(sname, SIG_ADMIN_RESUME) == 0) {
		/**
		 *		PBS pseudo signal to resume a suspended job.
		 */
		if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) == 0) {
			sprintf(log_buffer, "resume failed: %s",
				"server indicates job is not suspended");
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
				  pjob->ji_qs.ji_jobid, log_buffer);
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		switch (get_job_substate(pjob)) {
			case JOB_SUBSTATE_SUSPEND:
			case JOB_SUBSTATE_SCHSUSP:
				break;
			default:
				sprintf(log_buffer, "resume failed, job substate = %ld",
					get_job_substate(pjob));
				log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
					  pjob->ji_qs.ji_jobid, log_buffer);
				req_reject(PBSE_BADSTATE, 0, preq);
				return;
		}
		susp_resum(pjob, 0, preq);
		return;
	}

	/**
	 *	From here on, we are dealing with a "real" signal.  It is sent to all
	 *	processes in the job.
	 */
	if (isdigit((int) *sname))
		sig = atoi(sname);
	else {
		if (!strncmp("SIG", sname, 3))
			sname += 3;
		psigt = sig_tbl;
		while (psigt->sig_name) {
			if (!strcmp(sname, psigt->sig_name))
				break;
			psigt++;
		}
		sig = psigt->sig_val;
	}
	if (sig < 0) {
		req_reject(PBSE_UNKSIG, 0, preq);
		return;
	}
#ifdef SIGKILL
	if ((sig != SIGKILL) &&
	    (!check_job_substate(pjob, JOB_SUBSTATE_RUNNING)))
#else
	if (!check_job_substate(pjob, JOB_SUBSTATE_RUNNING))
#endif
	{
		sprintf(log_buffer, "cannot signal job, job substate = %ld",
			get_job_substate(pjob));
		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
			  pjob->ji_qs.ji_jobid, log_buffer);
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}
	/* Now, send signal to the MOM's child process */
	if (kill_job(pjob, sig) == 0) {
		if ((get_job_substate(pjob) <= JOB_SUBSTATE_EXITING) ||
		    (check_job_substate(pjob, JOB_SUBSTATE_TERM))) {
			/* No procs found, force job to exiting */
			/* force issue of (another) job obit */
			(void) sprintf(log_buffer,
				       "Job recycled into exiting on signal from substate %ld",
				       get_job_substate(pjob));
			log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO,
				  pjob->ji_qs.ji_jobid, log_buffer);
			set_job_substate(pjob, JOB_SUBSTATE_EXITING);
			ptask = GET_NEXT(pjob->ji_tasks);
			if (ptask)
				ptask->ti_qs.ti_status = TI_STATE_EXITED;
			exiting_tasks = 1;
		}
	}

	reply_ack(preq);
	return;
}

/**
 * @brief
 *	Remove a file which is specified by path and owned by user.
 *
 * @param[in] path - path for file to be deleted
 * @param[in] user - user name
 * @param[in] bad_list - pointer to bad file list
 *
 * @return	int
 * @retval	0	success
 * @retval	errno	failure.
 *
 */

static int
delete_file(char *path, char *user, char *prmt, char **bad_list)
{
	int rc;

	DBPRT(("%s: path %s\n", __func__, path))
	fix_path(prmt, 3);
	fix_path(path, 3);
	if (local_or_remote(&prmt) == 0) {
		/* local file, is the source == destination? */
		/* if so, don't delete it		     */
		if (is_file_same(prmt, path) == 1) {
			DBPRT(("%s: path same as %s\n", __func__, prmt))
			return 0;
		}
	}

	rc = remtree(path);
	if (rc == -1 && errno == ENOENT)
		rc = 0;

	if (rc != 0) {
		sprintf(log_buffer,
			"Unable to delete file %s for user %s, error = %d",
			path, user, errno);
		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_REQUEST,
			  LOG_INFO, __func__, log_buffer);
		add_bad_list(bad_list, log_buffer, 2);
		rc = errno;
	} else {
		DBPRT(("%s: Deleted file %s\n", __func__, path))
	}
	return rc;
}

/**
 * @brief
 *	delete the files in a copy files or delete files request
 *	WARNING: fork_to_user() must be called first so that useruid/gid is set up
 *
 * @param[in] rqcpf - pointer to file list structure from request
 * @param[in] pjob - pointer to job structure (can be null)
 * @param[out] pbadfile - pointer to bad file list
 *
 * @return int
 * @retval 0 - success
 * @retval errno - failure.
 *
 */
static int
del_files(struct rq_cpyfile *rqcpf, job *pjob, char **pbadfile)
{
	struct rqfpair *pair = NULL;
	int rc = 0;
	int ret = 0;
	char path[MAXPATHLEN + 1] = {'\0'};
	struct stat sb = {0};
	char dname[MAXPATHLEN + 1] = {'\0'};
	char matched[MAXPATHLEN + 1] = {'\0'};
	char rmt_file[MAXPATHLEN + 1] = {'\0'};
	char local_file[MAXPATHLEN + 1] = {'\0'};
	char *ps = NULL;
	char *pp = NULL;
	DIR *dirp = NULL;
	struct dirent *pdirent = NULL;
	char prmt[MAXPATHLEN + 1] = {'\0'};
	int sandbox_private = 0;

	DBPRT(("%s: entered\n", __func__))
	/*
	 * Should be running in the user's home directory.
	 * Build up path of file using local name only, then unlink it.
	 * The first set of files may have the STDJOBFILE
	 * flag set, which we need to unlink as root, the others as the user.
	 * This is changed from the past.  We no longer delete
	 * checkpoint files here.
	 */
	sandbox_private = (rqcpf->rq_dir & STAGE_JOBDIR) ? TRUE : FALSE;
	/* When sandbox=private, chdir to job directory */
	if (sandbox_private) {
		if (!pjob) {
			log_eventf(PBSEVENT_JOB, PBS_EVENTCLASS_REQUEST, LOG_INFO, __func__, "%s: no job information", rqcpf->rq_jobid);
			return -1;
		}
		if (pjob->ji_grpcache)
			pbs_jobdir = jobdirname(rqcpf->rq_jobid, pjob->ji_grpcache->gc_homedir);
		else
			pbs_jobdir = jobdirname(rqcpf->rq_jobid, NULL);
		if (chdir(pbs_jobdir) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));		
	}

	pair = (struct rqfpair *) GET_NEXT(rqcpf->rq_pair);
	for (; pair; pair = (struct rqfpair *) GET_NEXT(pair->fp_link)) {

		replace(pair->fp_rmt, "\\,", ",", rmt_file);
		if (*rmt_file != '\0')
			strcpy(prmt, rmt_file);
		else
			pbs_strncpy(prmt, pair->fp_rmt, sizeof(prmt));
		path[0] = '\0';
		if (pair->fp_flag == STDJOBFILE) { /* standard out or error */
#ifndef NO_SPOOL_OUTPUT
			if (!sandbox_private) {
				DBPRT(("%s:, STDJOBFILE in %s\n", __func__, path_spool))
				pbs_strncpy(path, path_spool, sizeof(path));
			}
#endif /* NO_SPOOL_OUTPUT */
		}
		replace(pair->fp_local, "\\,", ",", local_file);
		if (*local_file != '\0')
			(void) strcat(path, local_file);
		else
			(void) strcat(path, pair->fp_local);
		DBPRT(("%s: path %s\n", __func__, path))

		/* will have to fix this for O_WORKDIR - or change O_WORKDIR behavior */
		/* O_WORKDIR behavior should match the behavior of HOME */
		/* and delete files one by one */
		if (sandbox_private) {
			if (is_child_path(pbs_jobdir, path) == 1) {
				/* file is under staging and execution dir, */
				/* so defer its removal until staging and */
				/* execution dir removal time */
				continue;
			}
		}

#ifdef WIN32
		if (stat_uncpath(path, &sb) == 0)
#else
		if (stat(path, &sb) == 0)
#endif
		{
			if (S_ISDIR(sb.st_mode)) {

				/* have a directory, must append last segment */
				/* of source name to it for  the unlink	      */

				(void) strcat(path, "/");
				pp = strrchr(prmt, '/');

				if (pp && *(pp + 1) == '\0') {
					/* reduce /dir/dir/  case to /dir/dir */
					*pp = '\0';
					pp = strrchr(prmt, '/');
				}
				if (pp)
					++pp;
				else if ((pp = strrchr(prmt, ':')) != NULL)
					++pp;
				else
					pp = prmt;

				(void) strcat(path, pp);

				DBPRT(("%s: append segment to path %s\n", __func__, path))
			}
		} else {
			if (errno != ENOENT)
				log_eventf(PBSEVENT_JOB, PBS_EVENTCLASS_REQUEST, LOG_INFO, __func__, "cannot stat(%s): %s", path, strerror(errno));
		}

		/*
		 * If the wildcard "*" is given to delete every file
		 * in the homedir, don't do it.
		 */
		if (strcmp(path, "./*") == 0) {
			DBPRT(("%s: wildcard delete of all files skipped\n", __func__))
			continue;
		}

		ps = strrchr(path, (int) '/');
		if (ps) {
			/* has prefix path, save parent directory name */
			int len = (int) (ps - path) + 1;

			pbs_strncpy(dname, path, len);
			ps++;
		} else { /* no prefix path */
			/*
			 * If the wildcard "*" is given to delete every file
			 * in the homedir, don't do it.
			 */
			if (strcmp(path, "*") == 0) {
				DBPRT(("%s: wildcard delete of all files skipped\n", __func__))
				continue;
			}
			dname[0] = '.';
			dname[1] = '/';
			dname[2] = '\0';
			ps = path;
		}

		/* if there are no wildcards we don't need to search */
		if (strchr(ps, '*') == NULL && strchr(ps, '?') == NULL) {
			DBPRT(("%s: path has no wildcards\n", __func__))
			rc = delete_file(path, rqcpf->rq_user, prmt, pbadfile);
			if (rc != 0)
				ret = rc;
			continue;
		}

		dirp = opendir(dname);
		if (dirp == NULL) { /* dir cannot be opened, just call delete_file */
			DBPRT(("%s: cannot open dir %s\n", __func__, dname))
			rc = delete_file(path, rqcpf->rq_user, prmt, pbadfile);
			if (rc != 0)
				ret = rc;
			continue;
		}

		while (errno = 0, (pdirent = readdir(dirp)) != NULL) {
			if (pdirent->d_name[0] == '.') {
				if (pdirent->d_name[1] == '\0' || (pdirent->d_name[1] == '.' && pdirent->d_name[2] == '\0'))
					continue;
			}
			if (pbs_glob(pdirent->d_name, ps) != 0) {
				/* name matches */
				strcpy(matched, dname);
				strcat(matched, pdirent->d_name);
				DBPRT(("%s: match %s\n", __func__, matched))
				rc = delete_file(matched, rqcpf->rq_user, prmt, pbadfile);
				if (rc != 0)
					ret = rc;
			}
		}
		if (errno != 0 && errno != ENOENT) { /* dir cannot be read, just call delete_file */
			DBPRT(("%s: cannot read dir %s\n", __func__, dname))
			rc = delete_file(path, rqcpf->rq_user, prmt, pbadfile);
			if (rc != 0)
				ret = rc;
		}

		(void) closedir(dirp);
	}
	return (ret);
}

/**
 * @brief
 * 	Do post rerunjob processing and cleanup for both tcp
 * 	and tpp requests
 *
 * @param[in]	ptask - Work task
 *
 * @return 	none
 *
 */

static void
post_rerunjob(struct work_task *ptask)
{
	struct batch_request *preq = ptask->wt_parm1;
	if (preq == NULL)
		return;

	if (ptask->wt_aux != 0)
		req_reject(-ptask->wt_aux, 0, preq);
	else
		reply_ack(preq);
}

/**
 * @brief
 *	request to rerun job.
 *
 * @param[in] preq - pointer to batch_request structure
 *
 * @return 	Void
 *
 */

void
req_rerunjob(struct batch_request *preq)
{
	job *pjob;
	unsigned int port;
	int rc;
	int sock;
	char *svrport;
	struct work_task *wtask = NULL;
	pid_t child;

	pjob = find_job(preq->rq_ind.rq_rerun);
	if (pjob == NULL) {
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}

	/* try fork to send files back */

	if ((child = fork_me(preq->rq_conn)) > 0) {
		wtask = set_task(WORK_Deferred_Child, child, post_rerunjob, preq);
		if (!wtask) {
			log_err(errno, NULL, "Failed to create deferred work task, Out of memory");
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}

		/* change substate so Mom doesn't send another obit     */
		/* do not record to disk, so Obit is resent on recovery */
		if (check_job_substate(pjob, JOB_SUBSTATE_OBIT))
			set_job_substate(pjob, JOB_SUBSTATE_EXITED);
		return;
	} else if ((child < 0) && (errno != ENOSYS)) {
		req_reject(-child, 0, preq);
		return;
	}

	/* Child process ...  if fork available else continue in foreground */
	/* send a Job Files request(s).                           */

	rc = 0;
	svrport = strchr(get_jattr_str(pjob, JOB_ATR_at_server),
			 (int) ':');
	if (svrport)
		port = atoi(svrport + 1);
	else
		port = default_server_port;

	sock = client_to_svr(pjob->ji_qs.ji_un.ji_momt.ji_svraddr, port, B_RESERVED);

	if (pbs_errno == PBSE_NOLOOPBACKIF)
		log_err(PBSE_NOLOOPBACKIF, "client_to_svr", msg_noloopbackif);

	if (sock < 0) {
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_WARNING,
			  "req_rerun", "no contact with the server");
		if (child) {
			/* TPP streams cannot be inherited.
			 * So, we need to reject the request here itself if in foreground.
			 */
			req_reject(PBSE_NOSERVER, 0, preq);
		}
		rc = 1;
	}

	if (rc == 0) {
		if (((rc = return_file(pjob, StdOut, sock)) != 0) ||
		    ((rc = return_file(pjob, StdErr, sock)) != 0)) {
			/* TPP streams cannot be inherited.
		 * So, we need to reject/ack the request here itself if in foreground.
		 */
			if (child)
				req_reject(rc, 0, preq);
			else
				rc = 1;
		} else if (child)
			reply_ack(preq);
	}

	closesocket(sock);
	if (!child)
		exit(rc);
	return;
}

#ifdef WIN32 /* WIN32 ------------------------------------------------------ */
/**
 * @brief
 * 	Do post cpyfile processing and cleanup
 * 	Called when child process started in req_cpyfile()
 *	If it had a major failure, resend obit to server, otherwise set
 *	substate back to OBIT
 *
 * @param[in]	pjob - pointer to the job structure
 * @param[in]	ev - exit value of the child process
 *
 * @return 	none
 *
 */

void
post_cpyfile(struct work_task *pwt)
{
	struct batch_request *preq = NULL;
	pio_handles *pio = NULL;
	struct rq_cpyfile *rqcpf = NULL;
	copy_info *cpyinfo = NULL;
	int ecode = -1;
	job *pjob = NULL;
	char buf[CPY_PIPE_BUFSIZE] = {'\0'};
	int buflen = 0;
	char *jobid = NULL;

	if ((pwt == NULL) || (pwt->wt_parm1 == NULL))
		return;

	cpyinfo = pwt->wt_parm1;
	if (cpyinfo->preq == NULL || cpyinfo->jobid == NULL)
		return;
	preq = cpyinfo->preq;
	pio = &cpyinfo->pio;
	jobid = cpyinfo->jobid;
	pjob = cpyinfo->pjob;
	ecode = pwt->wt_aux;

	DBPRT(("%s: entered %s\n", __func__, jobid))
	log_eventf(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_DEBUG, jobid, "%s: entered %s", __func__, jobid);

	switch (ecode) {
		case STAGEFILE_OK:
			if (pjob) {
				/*
				 * reset substate to OBIT,  if server doesn't move
				 * on to next step in End of Job processing quickly
				 * we will resend obit, see mom_main.c
				 */
				set_job_substate(pjob, JOB_SUBSTATE_OBIT);
				pjob->ji_sampletim = time(0);
			}
			reply_ack(preq);
			break;
		case STAGEFILE_NOCOPYFILE:
			(void) win_pread(pio, buf, sizeof(buf));
			buflen = strlen(buf);
			if (buflen > 0)
				buf[buflen - 1] = '\0';
			(void) reply_text(preq, PBSE_NOCOPYFILE, buf);
			if (pjob) {
				pjob->ji_qs.ji_svrflags |= JOB_SVFLG_StgoFal;
				(void) job_save(pjob);
			}
			break;
		case STAGEFILE_FATAL:
			(void) win_pread(pio, buf, sizeof(buf));
			buflen = strlen(buf);
			if (buflen > 0)
				buf[buflen - 1] = '\0';
			(void) snprintf(log_buffer, sizeof(log_buffer), "file copy failed for jobid %s with fatal error: %s", jobid, buf);
			log_err(PBSE_NOCOPYFILE, __func__, log_buffer);
			(void) reply_text(preq, PBSE_NOCOPYFILE, log_buffer);
			break;
		case STAGEFILE_BADUSER:
			(void) snprintf(log_buffer, sizeof(log_buffer), "file copy failed for jobid %s with baduser", jobid);
			log_err(PBSE_BADUSER, __func__, log_buffer);
			req_reject(PBSE_BADUSER, 0, preq);
			break;
		default:
			(void) snprintf(log_buffer, sizeof(log_buffer), "file copy failed for job %s with error %d", jobid, ecode);
			log_err(PBSE_NOCOPYFILE, __func__, log_buffer);

			if (pjob) {
				/*
				* child that was doing file copies had major error
				* was killed or crashed,  resend obit to restart
				*/
				send_obit(pjob, 0);
			}

			(void) reply_text(preq, PBSE_NOCOPYFILE, log_buffer);
			break;
	}

	win_pclose2(pio);
	DBPRT(("%s: done %s\n", __func__, jobid))
	log_eventf(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_DEBUG, jobid, "%s: done %s", __func__, jobid);

	delete_link(&cpyinfo->al_link);
	free(cpyinfo->jobid);
	cpyinfo->jobid = NULL;
	free(cpyinfo);
	cpyinfo = NULL;
}

/**
 * @brief
 *	find and return copy information saved in global list mom_copyreqs_list
 *	for given <jobid>
 *
 * @param[in]	jobid - job id
 *
 * @return	copy_info *
 * @retval	pointer to copy_info	if copy info found
 * @retval	NULL			if copy info not found
 *
 */
copy_info *
get_copyinfo_from_list(char *jobid)
{
	copy_info *cpyinfo = NULL;

	if (jobid == NULL || *jobid == '\0')
		return 0;

	cpyinfo = GET_NEXT(mom_copyreqs_list);
	while (cpyinfo) {
		if (!strncmp(cpyinfo->jobid, jobid, strlen(jobid))) {
			return cpyinfo;
		}
		cpyinfo = GET_NEXT(cpyinfo->al_link);
	}

	return NULL;
}

/**
 * @brief
 *	process the Copy Files request from the server to dispose
 *	of output from the job.  This is done by a child of MOM since it
 *	might take time.
 *	<Windows version>
 *
 * @param[in]	preq - pointer to batch request for copy file
 * @return	void
 *
 * NOTE:The supplied PBS means of moving the file is by "rcp". A site may wish to change this.
 *
 */

void
req_cpyfile(struct batch_request *preq)
{
	int dir = -1;
	job *pjob = NULL;
	int rc = -1;
	struct rq_cpyfile *rqcpf = NULL;
	struct passwd *pw = NULL;
	char actual_homedir[MAXPATHLEN + 1] = {'\0'};
	cpy_files stage_inout = {0};
	char cmdline[PBS_CMDLINE_LENGTH + 1] = {'\0'};
	char buf[CPY_PIPE_BUFSIZE + 1] = {'\0'};
	struct work_task *ptask = NULL;
	copy_info *cpyinfo = NULL;
	struct proc_ctrl proc_info;
	extern char *path_log;
	extern char *log_file;
	extern pbs_list_head task_list_event;
	int is_network_drive = 0;
	char current_dir[MAX_PATH + 1] = {'\0'};
	int direct_write = 0;

	if (preq->rq_type == PBS_BATCH_CopyFiles_Cred)
		rqcpf = &preq->rq_ind.rq_cpyfile_cred.rq_copyfile;
	else
		rqcpf = &preq->rq_ind.rq_cpyfile;

	pjob = find_job(rqcpf->rq_jobid);
	if (pjob != NULL) {
		/*
		 * Once a job starts file processing, the checkpoint
		 * flags need to be turned off so a restart cannot
		 * send us back to the future.
		 */
		if (pjob->ji_qs.ji_svrflags &
		    (JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig)) {
			pjob->ji_qs.ji_svrflags &=
				~(JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig);
			(void) job_save(pjob);
		}
		/*
		 * change substate so Mom doesn't send another obit
		 * do not record to disk, so Obit is resent on recovery
		 */
		if (check_job_substate(pjob, JOB_SUBSTATE_OBIT))
			set_job_substate(pjob, JOB_SUBSTATE_EXITED);
	}

	dir = (rqcpf->rq_dir & STAGE_DIRECTION) ? STAGE_DIR_OUT : STAGE_DIR_IN;
	stage_inout.sandbox_private = (rqcpf->rq_dir & STAGE_JOBDIR) ? TRUE : FALSE;
	if (pjob != NULL && (dir == STAGE_DIR_OUT)) {
		direct_write = direct_write_requested(pjob);
	}

	/*
	 * In Windows, we need to be the user in order to call
	 * map_unc_path() therefore we will fork_to_user before
	 * calling getpwnam()
	 */
	if (fork_to_user(preq, pjob) == INVALID_HANDLE_VALUE) {
		req_reject(PBSE_BADUSER, 0, preq);
		return;
	}

	if (pjob == NULL) {
		/*
		 * no homedir can be cached in job's gc_homedir/altid
		 * attribute, so we call map_unc_path to get it now
		 */
		if ((pw = getpwnam(preq->rq_ind.rq_cpyfile.rq_user)) != NULL) {
			pbs_strncpy(actual_homedir,
				    map_unc_path(pw->pw_dir, pw), sizeof(actual_homedir));
			pbs_jobdir = jobdirname(rqcpf->rq_jobid, actual_homedir);
		} else {
			sprintf(log_buffer, "unable to find a password entry for %s", preq->rq_ind.rq_cpyfile.rq_user);
			log_err(errno, "req_cpyfile", log_buffer);
			req_reject(PBSE_BADUSER, 0, preq);
			return;
		}
	} else {
		/*
		 * stage out will have the pjob already set, and the
		 * home directory should have also been set.
		 * Find the pbs_jobdir based off the user home info
		 * stored in the pjob
		 */
		if (pjob->ji_grpcache)
			pbs_jobdir = jobdirname(pjob->ji_qs.ji_jobid, pjob->ji_grpcache->gc_homedir);
		else
			pbs_jobdir = jobdirname(pjob->ji_qs.ji_jobid, NULL);
	}

	/*
	 * revert to ADMIN to do stuff like
	 * create the job directory as PBS, so it has
	 * the same permissions as TMPDIR
	 */
	(void) revert_impersonated_user();

	if ((dir == STAGE_DIR_IN) && (stage_inout.sandbox_private)) {
		/* Create PBS_JOBDIR */
		rc = mkjobdir(rqcpf->rq_jobid, pbs_jobdir, preq->rq_ind.rq_cpyfile.rq_user, (pjob != NULL && pjob->ji_user != NULL) ? pjob->ji_user->pw_userlogin : INVALID_HANDLE_VALUE);
		if (rc != 0) {
			sprintf(log_buffer, "unable to create the job directory %s", pbs_jobdir);
			log_err(errno, "req_cpyfile", log_buffer);
			req_reject(PBSE_MOMREJECT, 0, preq);
			return;
		}
	}

	snprintf(cmdline, sizeof(cmdline), "%s/sbin/pbs_stage_file.exe", pbs_conf.pbs_exec_path);

	if ((cpyinfo = (copy_info *) malloc(sizeof(copy_info))) == NULL) {
		(void) snprintf(log_buffer, sizeof(log_buffer), "unable to allocate memory for copy_info for job %s", rqcpf->rq_jobid);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR, rqcpf->rq_jobid, log_buffer);
		req_reject(PBSE_MOMREJECT, 0, preq);
		return;
	}

	memset(cpyinfo, 0, sizeof(copy_info));
	CLEAR_LINK(cpyinfo->al_link);

	if ((cpyinfo->jobid = strdup(rqcpf->rq_jobid)) == NULL) {
		(void) snprintf(log_buffer, sizeof(log_buffer), "unable to allocate memory for copy_info->jobid for job %s", rqcpf->rq_jobid);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR, rqcpf->rq_jobid, log_buffer);
		free(cpyinfo);
		cpyinfo = NULL;
		req_reject(PBSE_MOMREJECT, 0, preq);
		return;
	}

	cpyinfo->pjob = pjob;
	cpyinfo->preq = preq;
	proc_info.bInheritHandle = TRUE;
	proc_info.bnowait = 0;
	proc_info.buse_cmd = TRUE;
	proc_info.need_ptree_termination = TRUE;
#ifndef DEBUG
	proc_info.flags = 0;
#else
	proc_info.flags = CREATE_NO_WINDOW | CREATE_BREAKAWAY_FROM_JOB;
#endif
	/* win_popen() doesn't launch process if current directory is a mapped path is user session */
	current_dir[0] = '\0';
	_getcwd(current_dir, MAX_PATH + 1);
	if ((pjob != NULL) && (pjob->ji_user != NULL) && impersonate_user(pjob->ji_user->pw_userlogin) == 0) {
		snprintf(log_buffer, sizeof(log_buffer) - 1, "req_cpyfile: failed to impersonate user %s error=%d",
			 pjob->ji_user->pw_name, GetLastError());
		log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB,
			  LOG_DEBUG, pjob->ji_qs.ji_jobid, log_buffer);
		return;
	}
	is_network_drive = is_network_drive_path(current_dir);
	proc_info.is_current_path_network = is_network_drive;
	(void) revert_impersonated_user();

	if (win_popen(cmdline, "w", &cpyinfo->pio, &proc_info) == 0) {
		errno = GetLastError();
		pbs_errno = errno;
		(void) snprintf(log_buffer, sizeof(log_buffer) - 1, "executing %s for job %s failed errno=%d", cmdline, rqcpf->rq_jobid, errno);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR, rqcpf->rq_jobid, log_buffer);
		win_pclose(&cpyinfo->pio);
		free(cpyinfo->jobid);
		cpyinfo->jobid = NULL;
		free(cpyinfo);
		cpyinfo = NULL;
		req_reject(PBSE_MOMREJECT, 0, preq);
		return;
	}

	ptask = set_task(WORK_Deferred_Child, (long) cpyinfo->pio.pi.hProcess, post_cpyfile, cpyinfo);
	if (!ptask) {
		errno = ENOMEM;
		pbs_errno = errno;
		(void) snprintf(log_buffer, sizeof(log_buffer) - 1, "unable to set task for cpyreq for job %s", rqcpf->rq_jobid);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR, rqcpf->rq_jobid, log_buffer);
		win_pclose(&cpyinfo->pio);
		free(cpyinfo->jobid);
		cpyinfo->jobid = NULL;
		free(cpyinfo);
		cpyinfo = NULL;
		req_reject(PBSE_MOMREJECT, 0, preq);
		return;
	}

	cpyinfo->ptask = ptask;
	append_link(&mom_copyreqs_list, &cpyinfo->al_link, cpyinfo);

	addpid(cpyinfo->pio.pi.hProcess);

	snprintf(buf, sizeof(buf) - 1, "path_log=%s\n", path_log);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "path_spool=%s\n", path_spool);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "path_undeliv=%s\n", path_undeliv);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "path_checkpoint=%s\n", path_checkpoint);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "pbs_jobdir=%s\n", pbs_jobdir);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "actual_homedir=%s\n",
		 (pjob ? (pjob->ji_grpcache ? pjob->ji_grpcache->gc_homedir : "") : actual_homedir));
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "mom_host=%s\n", mom_host);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "log_file=%s\n", (log_file ? log_file : ""));
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "log_event_mask=%ld\n", *log_event_mask);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	snprintf(buf, sizeof(buf) - 1, "direct_write=%d\n", direct_write);
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	send_pcphosts(&cpyinfo->pio, pcphosts);

	if (!send_rq_cpyfile_cred(&cpyinfo->pio, rqcpf)) {
		log_err(-1, __func__, "Failed to send data");
	}

	snprintf(buf, sizeof(buf) - 1, "quit\n");
	check_err(__func__, buf, win_pwrite(&cpyinfo->pio, buf, strlen(buf)));

	chdir(mom_home);
}

/**
 * @brief
 *	delete the specifled output/staged files
 *	<Windows version>
 *
 * @param[in]	preq - pointer to batch request for delete file
 *
 * @return	void
 *
 */

void
req_delfile(struct batch_request *preq)
{
	int rc = 0;
	struct rq_cpyfile *rqcpf = NULL;
	job *pjob = NULL;
	char *bad_list = NULL;
	HANDLE hUser = INVALID_HANDLE_VALUE;

	if (preq->rq_type == PBS_BATCH_DelFiles_Cred)
		rqcpf = &preq->rq_ind.rq_cpyfile_cred.rq_copyfile;
	else
		rqcpf = &preq->rq_ind.rq_cpyfile;

	pjob = find_job(rqcpf->rq_jobid);
	if (pjob) {
		/*
		 * check to see is there any copy request pending
		 * for this job ?
		 */
		if (get_copyinfo_from_list(rqcpf->rq_jobid) != NULL) {
			/*
			 * we have copy request pending so we
			 * need to first process the post_cpyfile
			 * request before starting this one.
			 * Tell the server to try again later.
			 */
			req_reject(PBSE_TRYAGAIN, 0, preq);
			return;
		}

		/*
		 * Once a job starts file processing, the checkpoint
		 * flags need to be turned off so a restart cannot
		 * send us back to the future.
		 */
		if (pjob->ji_qs.ji_svrflags & (JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig)) {
			pjob->ji_qs.ji_svrflags &= ~(JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig);
			(void) job_save(pjob);
		}

		if (check_job_substate(pjob, JOB_SUBSTATE_OBIT)) {
			/* change substate so Mom doesn't send another obit
			 * do not record to disk, so Obit is resent on recovery
			 */
			set_job_substate(pjob, JOB_SUBSTATE_EXITED);
		}
	}

	hUser = fork_to_user(preq, pjob);
	if (hUser == INVALID_HANDLE_VALUE) {
		req_reject(PBSE_BADUSER, 0, preq);
		return;
	}

	/* Child process ... delete the files */

	if ((rc = del_files(rqcpf, pjob, &bad_list)) != 0) {
		reply_text(preq, rc, bad_list);
		if (bad_list != NULL) {
			free(bad_list);
			bad_list = NULL;
		}
	} else
		reply_ack(preq);

	(void) revert_impersonated_user();
	chdir(mom_home);
}

#else /* UNIX---------------------------------------------------------------*/
/**
 * @brief
 * 	Do post cpyfile processing and cleanup in case of tpp connection
 * 	and for stagein when the job is not yet available at the server
 *
 * @param[in]	ptask - Work task
 *
 * @return 	none
 *
 */

static void
post_cpyfile_nojob(struct work_task *ptask)
{
	struct batch_request *preq = ptask->wt_parm1;
	if (preq == NULL)
		return;

	if (ptask->wt_aux != 0)
		req_reject(PBSE_NOCOPYFILE, 0, preq);
	else
		reply_ack(preq);
}

/**
 * @brief
 * 	Do post cpyfile processing and cleanup
 * @ par
 * 	Called when child process started in req_cpyfile() on
 *	stageout only
 *	If it had a major failure, resend obit to server, otherwise set
 *	substate back to OBIT
 *
 * @param[in]	pjob - pointer to the job structure
 * @param[in]	ev - exit value of the child process
 *
 * @return 	none
 *
 */

static void
post_cpyfile(job *pjob, int ev)
{
	if (pjob == NULL)
		return;

	pjob->ji_mompost = NULL;
	if (ev != 0) {
		if (pjob->ji_preq)
			req_reject(PBSE_NOCOPYFILE, 0, pjob->ji_preq);
		pjob->ji_preq = NULL;
		if ((is_jattr_set(pjob, JOB_ATR_sandbox)) &&
		    (strcasecmp(get_jattr_str(pjob, JOB_ATR_sandbox), "PRIVATE") == 0) &&
		    (ev == STAGEOUT_FAILURE)) {
			/* We are in sandbox=private mode and there was */
			/* a stageout failure */
			/* Set the flag to show the stageout failure */
			pjob->ji_qs.ji_svrflags |= JOB_SVFLG_StgoFal;
		} else {
			/* child that was doing file copies had major error */
			/* was killed or crashed,  resend obit to restart   */
			send_obit(pjob, 0);
			return;
		}
	} else {
		if (pjob->ji_preq)
			reply_ack(pjob->ji_preq);
		pjob->ji_preq = NULL;
		/* reset substate to OBIT,  if server doesn't move  */
		/* on to next step in End of Job processing quickly */
		/* we will resend obit, see mom_main.c              */
		set_job_substate(pjob, JOB_SUBSTATE_OBIT);
		pjob->ji_sampletim = time(0);
	}
}

/**
 * @brief
 * 	req_cpyfile - process the Copy Files request from the server to dispose
 *	of output from the job.  This is done by a child of MOM since it
 *	might take time.
 *
 *	UNIX version
 *
 *	The supplied PBS means of moving the file is by "rcp".
 * 	A site may wish to change this.
 *
 * @param[in] preq - pointer to batch_request structure
 *
 * @return	Void
 *
 */

void
req_cpyfile(struct batch_request *preq)
{
	job *pjob;
	struct rq_cpyfile *rqcpf;
	time_t copy_start;
	time_t copy_stop;
	int num_copies = 0;
	int dir;
	struct passwd *pwdp;
	struct group *grpp;
	uid_t useruid = 0;
	gid_t usergid = 0;
	int rc;
	pid_t pid;
	struct rqfpair *pair;
	int rmtflag;
	cpy_files stage_inout;
	char *prmt;
	char dup_rqcpf_jobid[PBS_MAXSVRJOBID + 1];
	struct work_task *wtask = NULL;
	int tot_copies = 0;
	bool copy_failed = FALSE;

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	struct krb_holder *ticket = NULL;
	char *krbccname = NULL;
#endif

	if (mock_run) {
		/*
		 * in mock run we don't have any files to copy back,
		 * so just ack request to make server happy
		 * and return
		 */
		reply_ack(preq);
		return;
	}

	DBPRT(("%s: entered\n", __func__))

	if (preq->rq_type == PBS_BATCH_CopyFiles_Cred)
		rqcpf = &preq->rq_ind.rq_cpyfile_cred.rq_copyfile;
	else
		rqcpf = &preq->rq_ind.rq_cpyfile;

	stage_inout.stageout_failed = FALSE;
	stage_inout.bad_files = 0;
	stage_inout.file_num = 0;
	stage_inout.file_max = 0;
	stage_inout.file_list = NULL;
	stage_inout.bad_list = NULL;
	pjob = find_job(rqcpf->rq_jobid);
	if (pjob) {
		/*
		 ** Once a job starts file processing, the checkpoint
		 ** flags need to be turned off so a restart cannot
		 ** send us back to the future.
		 */
		if (pjob->ji_qs.ji_svrflags &
		    (JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig)) {
			pjob->ji_qs.ji_svrflags &=
				~(JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig);
			(void) job_save(pjob);
		}
		/* change substate so Mom doesn't send another obit     */
		/* do not record to disk, so Obit is resent on recovery */
		if (check_job_substate(pjob, JOB_SUBSTATE_OBIT))
			set_job_substate(pjob, JOB_SUBSTATE_EXITED);
	}

	dir = (rqcpf->rq_dir & STAGE_DIRECTION) ? STAGE_DIR_OUT : STAGE_DIR_IN;
	stage_inout.sandbox_private = (rqcpf->rq_dir & STAGE_JOBDIR) ? TRUE : FALSE;

	/* Call getpwnam for user info */
	pwdp = getpwnam(rqcpf->rq_user);
	if (pwdp != NULL) {
		pbs_jobdir = jobdirname(rqcpf->rq_jobid, pwdp->pw_dir);
	} else {
		sprintf(log_buffer, "unable to find a password entry");
		log_joberr(errno, __func__, log_buffer, rqcpf->rq_jobid);
		req_reject(PBSE_BADUSER, 0, preq);
		return;
	}

	if ((dir == STAGE_DIR_IN) && stage_inout.sandbox_private) {
		/* Need to look up the uid, gid */
		if (pwdp == NULL) {
			req_reject(PBSE_BADUSER, 0, preq);
			return;
		}
		useruid = pwdp->pw_uid;

		if (rqcpf->rq_group[0] == '\0') {
			usergid = pwdp->pw_gid; /* default to login group */
		} else {
			if ((grpp = getgrnam(rqcpf->rq_group)) == NULL) {
				req_reject(PBSE_BADUSER, 0, preq);
				return;
			}
			usergid = grpp->gr_gid;
		}
		/* Create PBS_JOBDIR  and no change of environment */
		rc = mkjobdir(rqcpf->rq_jobid, pbs_jobdir, useruid, usergid);

		if (rc != 0) {
			sprintf(log_buffer, "unable to create the job directory %s", pbs_jobdir);
			log_err(errno, __func__, log_buffer);
			req_reject(PBSE_MOMREJECT, 0, preq);
			return;
		}
	}

	if ((pjob != NULL) && (dir == STAGE_DIR_OUT) && direct_write_requested(pjob))
		stage_inout.direct_write = 1;
	else
		stage_inout.direct_write = 0;

		/* Become the user */
#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	ticket = alloc_ticket();
	pid = fork_to_user(preq, pjob, ticket);
#else
	pid = fork_to_user(preq, pjob);
#endif
	rc = (int) pid;
	if (pid > 0) {
#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
		free_ticket(ticket, CRED_CLOSE);
#endif

		if (pjob) {
			/* change substate so Mom doesn't send another obit     */
			/* do not record to disk, so Obit is resent on recovery */
			if (check_job_substate(pjob, JOB_SUBSTATE_OBIT))
				set_job_substate(pjob, JOB_SUBSTATE_EXITED);
			pjob->ji_momsubt = pid;
			pjob->ji_mompost = post_cpyfile;
			if (preq->prot == PROT_TPP)
				pjob->ji_preq = preq; /* keep the batch request pointer */
		} else {
			if (preq->prot == PROT_TPP) {
				/* there is no job yet, so cant hang this post function to job
				 * but this is tpp based connection, so we cannot reply in child
				 * lets hang the preq in a work task
				 */
				wtask = set_task(WORK_Deferred_Child, pid, post_cpyfile_nojob, preq);
				if (!wtask) {
					log_err(errno, __func__, "Failed to create deferred work task, Out of memory");
					req_reject(PBSE_SYSTEM, 0, preq);
					return;
				}
			}
		}
		return; /* parent - continue with someother task */
	} else if (rc < 0) {
#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
		free_ticket(ticket, CRED_DESTROY);
#endif

		req_reject(-rc, 0, preq);
		return;
	}

	/* chdir to job pbs_jobdir directory if "sandbox=PRIVATE" mode is requested */
	if (stage_inout.sandbox_private) {
		if (chdir(pbs_jobdir) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));			
	}

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	krbccname = get_ticket_ccname(ticket);
	if (krbccname != NULL)
		setenv("KRB5CCNAME", krbccname, 1);
#endif

	/*
	 * Child process ...
	 * Now running in the user's home or job staging and execution directory as the user.
	 * Build up cp/rcp command(s), one per file pair
	 */

	copy_start = time(0);
	for (pair = (struct rqfpair *) GET_NEXT(rqcpf->rq_pair);
	     pair != 0;
	     pair = (struct rqfpair *) GET_NEXT(pair->fp_link), tot_copies++) {
		if (copy_failed)
			continue;
		DBPRT(("%s: local %s remote %s\n", __func__, pair->fp_local, pair->fp_rmt))

		stage_inout.from_spool = 0;
		prmt = pair->fp_rmt;

		if (local_or_remote(&prmt) == 0) {
			/* destination host is this host, use cp */
			rmtflag = 0;
		} else {
			/* destination host is another, use (pbs_)rcp */
			rmtflag = 1;
		}

		rc = stage_file(dir, rmtflag, rqcpf->rq_owner,
				pair, preq->rq_conn, &stage_inout, prmt, rqcpf->rq_jobid);
		/*
		 ** Here we break out of the the loop on error.
		 ** This will only happen on a stagein failure.
		 */
		if (rc != 0) {
			copy_failed = TRUE;
			continue;
		}
		num_copies++;
	}
	copy_stop = time(0);

	/* If there was a stage in failure, remove the job directory.
	 * There is no guarantee we'll run on this mom again,
	 * So we need to cleanup.
	 */
	if ((dir == STAGE_DIR_IN) && stage_inout.sandbox_private && stage_inout.bad_files) {
		/* cd to user's home to be out of   */
		/* the sandbox so it can be deleted */
		if (chdir(pwdp->pw_dir) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));			
		rmjobdir(rqcpf->rq_jobid, pbs_jobdir, useruid, usergid, 0);
	}

	pbs_strncpy(dup_rqcpf_jobid, rqcpf->rq_jobid, sizeof(dup_rqcpf_jobid));
	if (preq->prot == PROT_TCP) {
		if (stage_inout.bad_files) {
			reply_text(preq, PBSE_NOCOPYFILE, stage_inout.bad_list);
		} else {
			reply_ack(preq);
		}
	} else {
		if (stage_inout.bad_files) {
			char *token = NULL;
			char *rest = stage_inout.bad_list;
			char *save_ptr = NULL;
			log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG,
				  dup_rqcpf_jobid, "Job files not copied:---->>>>");
			token = strtok_r(rest, "\n", &save_ptr);
			while (token != NULL) {
				char *temp_buff = NULL;
				if ((pbs_asprintf(&temp_buff, "%s\n", token)) != -1) {
					log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG,
						  dup_rqcpf_jobid, temp_buff);
					free(temp_buff);
					temp_buff = NULL;
				} else
					break;
				token = strtok_r(NULL, "\n", &save_ptr);
			}
			log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG,
				  dup_rqcpf_jobid, "---->>>>");
		}
	}

	/* log the number of files/directories copied and the time it took */
	copy_stop = copy_stop - copy_start;

#ifdef NAS /* localmod 005 */
	sprintf(log_buffer, "Staged %d/%d items %s over %ld:%02ld:%02ld",
		num_copies, tot_copies, (dir == STAGE_DIR_OUT) ? "out" : "in",
		(long) copy_stop / 3600, ((long) copy_stop % 3600) / 60,
		(long) copy_stop % 60);
#else
	sprintf(log_buffer, "Staged %d/%d items %s over %d:%02d:%02d",
		num_copies, tot_copies, (dir == STAGE_DIR_OUT) ? "out" : "in",
		(int) copy_stop / 3600, ((int) copy_stop % 3600) / 60,
		(int) copy_stop % 60);
#endif /* localmod 005 */
	log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG,
		  dup_rqcpf_jobid, log_buffer);

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	free_ticket(ticket, CRED_DESTROY);
#endif

	if (preq->prot == PROT_TPP && stage_inout.bad_files)
		exit(STAGEOUT_FAILURE);

	if (stage_inout.sandbox_private && stage_inout.stageout_failed) {
		exit(STAGEOUT_FAILURE);
	}

	exit(0); /* remember, we are the child, exit not return */
}

/**
 * @brief
 * 	Do post delete file processing and cleanup.
 *
 * @par
 * 	Called when the child process started in req_delfile() exits.
 * 	If it had a major failure, resend obit to server, otherwise
 * 	set substate back to OBIT
 *	This is only called in the UNIX code
 *
 * @param[in]	pjob - pointer to the job structure
 * @param[in]	ev   - exit value of the chile process
 *
 * @return	none
 *
 */
static void
post_delfile(job *pjob, int ev)
{
	if (pjob == NULL)
		return;
	pjob->ji_mompost = NULL;

	if (pjob->ji_preq)
		reply_ack(pjob->ji_preq);
	pjob->ji_preq = NULL;

	if (ev == 0) {
		/* reset substate to OBIT,  if server doesn't move  */
		/* on to next step in End of Job processing quickly */
		/* we will resend obit, see mom_main.c              */
		set_job_substate(pjob, JOB_SUBSTATE_OBIT);
		pjob->ji_sampletim = time(0);
	} else {
		/* child that was doing file copies had major error */
		/* was killed or crashed,  resend obit to restart   */
		send_obit(pjob, 0);
	}
}

/**
 * @brief
 * 	req_delfile - delete the specifled output/staged files
 *
 * UNIX version
 *
 * @param[in] preq - pointer to batch_request structure
 *
 * @return	Void
 *
 */
void
req_delfile(struct batch_request *preq)
{
	int rc;
	pid_t pid;
	job *pjob;
	char *bad_list = NULL;
#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	struct krb_holder *ticket = NULL;
	char *krbccname = NULL;
#endif

	if (mock_run) {
		/*
		 * in mock run we don't have any files to delete,
		 * so just ack request to make server happy
		 * and return
		 */
		reply_ack(preq);
		return;
	}

	pjob = find_job(preq->rq_ind.rq_cpyfile.rq_jobid);
	if (pjob) {
		/*
		 * Check to see if the post_cpyfile has already been
		 * processed.  If it has been processed the momsubt == 0
		 */
		if (pjob->ji_momsubt != 0 && pjob->ji_mompost == post_cpyfile) {
			/* Need to first process the post_cpyfile
			 * request before starting this one.
			 * Tell the server to try again later.
			 */
			req_reject(PBSE_TRYAGAIN, 0, preq);
			return;
		}
	}

	if (pjob) {
		pjob->ji_preq = NULL;
		if (preq->prot == PROT_TPP)
			pjob->ji_preq = preq; /* keep the batch request pointer */
	}

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	ticket = alloc_ticket();
	if ((pid = fork_to_user(preq, pjob, ticket)) > 0)
#else
	if ((pid = fork_to_user(preq, pjob)) > 0)
#endif
	{
		/* parent */
		if (pjob) {
			pjob->ji_momsubt = pid;
			pjob->ji_mompost = post_delfile;
			pjob->ji_sampletim = time(0);
			set_job_substate(pjob, JOB_SUBSTATE_EXITED);
		}
		return; /* parent - continue with someother task */
	} else if (pid < 0) {
		req_reject(-(int) pid, 0, preq);
		return;
	}

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	krbccname = get_ticket_ccname(ticket);
	if (krbccname != NULL)
		setenv("KRB5CCNAME", krbccname, 1);
#endif

	/* Child process ... delete the files */

	rc = del_files(&(preq->rq_ind.rq_cpyfile), pjob, &bad_list);
	if (rc != 0) {
		if (preq->prot == PROT_TCP) {
			reply_text(preq, rc, bad_list);
		} else {
			char *token = NULL;
			char *rest = bad_list;
			char *save_ptr = NULL;
			log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG, preq->rq_ind.rq_cpyfile.rq_jobid, "Job files not deleted:---->>>>");
			token = strtok_r(rest, "\n", &save_ptr);
			while (token != NULL) {
				char *temp_buff = NULL;
				if ((pbs_asprintf(&temp_buff, "%s\n", token)) != -1) {
					log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG, preq->rq_ind.rq_cpyfile.rq_jobid, temp_buff);
					free(temp_buff);
					temp_buff = NULL;
				} else
					break;
				token = strtok_r(NULL, "\n", &save_ptr);
			}
			log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, LOG_DEBUG, preq->rq_ind.rq_cpyfile.rq_jobid, "---->>>>");
		}
	} else {
		if (preq->prot == PROT_TCP)
			reply_ack(preq);
	}

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
	free_ticket(ticket, CRED_DESTROY);
#endif

	exit(0); /* remember, we are the child, exit not return */
}
#endif /* WIN32/UNIX ------------------------------------------------------- */

/**
 * @brief
 * 	Checkpoint the job.
 *
 *	If abort is TRUE, kill it too.  Return a PBS error code.
 *	Done in a child of MOM.
 *
 * @param[in] pjob - job pointer
 * @param[in] abort - indiacation whether abort true or false
 *
 * @return	PBSerrorcode
 * @retval	0		no error
 * @retval	!0		error
 *
 */

int
mom_checkpoint_job(job *pjob, int abort)
{
	int hasold = 0;
	int ckerr = ENOENT;
	struct stat statbuf;
	char path[MAXPATHLEN + 1];
	char oldp[MAXPATHLEN + 1];
	char file[MAXPATHLEN + 1], *name;
	int filelen;
	pbs_task *ptask;
	char *cwdname = NULL;
	struct passwd *pwdp = NULL;

	assert(pjob != NULL);

	DBPRT(("mom_checkpoint_job: %s %s abort\n", pjob->ji_qs.ji_jobid,
	       abort ? "with" : "no"))

	pbs_strncpy(path, path_checkpoint, sizeof(path));
	if (*pjob->ji_qs.ji_fileprefix != '\0')
		strcat(path, pjob->ji_qs.ji_fileprefix);
	else
		strcat(path, pjob->ji_qs.ji_jobid);
	strcat(path, JOB_CKPT_SUFFIX);

	if (stat(path, &statbuf) == 0) {
		(void) strcpy(oldp, path); /* file already exists, rename it */
		(void) strcat(oldp, ".old");
		if (rename(path, oldp) < 0)
#ifdef WIN32
			return 73;
#else
			return errno;
#endif
		hasold = 1;
	}

	if (mkdir(path, 0755) == -1) {
		ckerr = errno;
		goto checkpoint_fail;
	}

	filelen = strlen(path);
	strcpy(file, path);
	name = &file[filelen];

	/* Change to user's home to pick up .cpr */
#ifdef WIN32
	if ((cwdname = getcwd(NULL, _MAX_PATH + 2)) != NULL) {
		if ((is_jattr_set(pjob, JOB_ATR_sandbox)) &&
		    (strcasecmp(get_jattr_str(pjob, JOB_ATR_sandbox), "PRIVATE") == 0)) {
			/* "sandbox=PRIVATE" mode is enabled, so restart job in PBS_JOBDIR */
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL) {
				(void) chdir(jobdirname(pjob->ji_qs.ji_jobid,
							save_actual_homedir(pwdp, pjob)));
			}
		} else {
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL)
				(void) chdir(save_actual_homedir(pwdp, pjob));
		}
	}
#else
	if ((cwdname = getcwd(NULL, 0)) != NULL) {
		if ((is_jattr_set(pjob, JOB_ATR_sandbox)) &&
		    (strcasecmp(get_jattr_str(pjob, JOB_ATR_sandbox), "PRIVATE") == 0)) {
			/* "sandbox=PRIVATE" mode is enabled, so restart job in PBS_JOBDIR */
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL) {
				if (chdir(jobdirname(pjob->ji_qs.ji_jobid, pwdp->pw_dir)) == -1) 
					log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));					
			}
		} else {
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL)
				if (chdir(pwdp->pw_dir) == -1) 
					log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));
		}
	}
#endif

	errno = 0;
	for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
		int i;

		if (ptask->ti_qs.ti_status != TI_STATE_RUNNING)
			continue;
		sprintf(name, task_fmt, ptask->ti_qs.ti_task);

		/*
		 **	Try action script with no post function.
		 */
		i = do_mom_action_script(abort ? ChkptAbtAction : ChkptAction,
					 pjob, ptask, file, NULL);
		if (i != 0) { /* script didn't work */
			/* if there is no script, try native support */
			if (i == -2)
				i = mach_checkpoint(ptask, file, abort);
			if (i != 0) /* nothing worked */
				goto checkpoint_fail;
		}
		if (stat(file, &statbuf) == -1) { /* no file created */
			int fd;

			/*
			 ** create a zero len file to mark checkpoint
			 */
			fd = open(file, O_CREAT | O_TRUNC | O_WRONLY, 0600);
			if (fd == -1)
				goto errout;
			close(fd);
		}
	}

	/* Checkpoint successful */
	/* return to MOM's rightful lair */
	if (cwdname) {
		if (chdir(cwdname) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));		
		free(cwdname);
	}

	sprintf(log_buffer, "checkpointed to %s", path);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
		  pjob->ji_qs.ji_jobid, log_buffer);
	if (hasold)
		(void) remtree(oldp);

	return 0;

checkpoint_fail:
	switch (errno) {
#ifdef ERFLOCK
		case ERFLOCK:
#endif
#ifdef EQUSR
		case EQUSR:
#endif
#ifdef EQGRP
		case EQGRP:
#endif
#ifdef EQACT
		case EQACT:
#endif
#ifdef ENOSDS
		case ENOSDS:
#endif
		case EAGAIN:
		case ENOMEM:
		case ENOLCK:
		case ENOSPC:
		case ENFILE:
		case EDEADLK:
		case EBUSY:
			ckerr = EAGAIN;
			break;
	}

errout:
	/*
	 ** A checkpoint has failed.  Log and return error.
	 */
	sprintf(log_buffer, "checkpoint failed: errno=%d", errno);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
		  pjob->ji_qs.ji_jobid, log_buffer);

	/* return to MOM's rightful lair */
	if (cwdname) {
		if (chdir(cwdname) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));		
		free(cwdname);
	}

	/*
	 ** See if any checkpoints worked and abort is set.
	 ** If so, we need to restart these tasks so the whole job is
	 ** still running.  This has to wait until we reap the
	 ** aborted task(s).
	 */
	if (abort)
		return ckerr;

	/*
	 ** Clean up files.
	 */
	(void) remtree(path);
	if (hasold) {
		if (rename(oldp, path) == -1) {
			pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHKPT;
			(void) job_save(pjob);
		}
	}
	return ckerr;
}

/**
 * @brief
 * 	post processor for start_checkpoint()
 *
 * Called from scan_for_terminated() when found in ji_mompost;
 * This sets the "has checkpoint image" bit in the job.
 *
 * @param[in]	pjob	job pointer
 * @param[in]	ev	exit value of checkpoint process
 *
 * @return 	Void
 *
 */

void
post_chkpt(job *pjob, int ev)
{
	char path[MAXPATHLEN + 1];
	char oldname[MAXPATHLEN + 1];
	struct stat statbuf;
	DIR *dir;
	struct dirent *pdir;
	tm_task_id tid;
	pbs_task *ptask;
	int i;
	int abort = pjob->ji_flags & MOM_CHKPT_ACTIVE;

	DBPRT(("%s: %s %s abort err %d\n", __func__, pjob->ji_qs.ji_jobid,
	       abort ? "with" : "no", ev))

	if (ev != 0) {
		/* checkpoint action exited with an error, set error flag */
		pjob->ji_flags |= MOM_SISTER_ERR;
		if (pjob->ji_preq) {
			/* as there is request waiting, reply with error */
			req_reject(PBSE_CKPSHORT, ev, pjob->ji_preq);
			/* and clear request pointer */
			pjob->ji_preq = NULL;
		}
	}

	if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) {
		/*
		 **	If I'm MS, I need to check for checkpoint events
		 **	to see if non-local processing is still going on.
		 */

		if (pjob->ji_momsubt != 0) /* child running */
			return;

		/*
		 ** See if there are any checkpoint events left
		 ** to wait for.
		 */
		for (i = 0; i < pjob->ji_numnodes; i++) {
			hnodent *np;
			eventent *ep;

			np = &pjob->ji_hosts[i];
			ep = (eventent *) GET_NEXT(np->hn_events);
			while (ep) {
				if (ep->ee_command == IM_CHECKPOINT)
					break;
				if (ep->ee_command == IM_CHECKPOINT_ABORT)
					break;
				ep = (eventent *) GET_NEXT(ep->ee_next);
			}
			if (ep != NULL)
				return;
		}
	} else
		post_reply(pjob, ev);

	/*
	 ** No more operations are waiting.
	 ** Now is the time to clear ji_mompost.
	 ** Wait to turn off MOM_CHKPT_ACTIVE until scan_for_exiting is done.
	 */
	pjob->ji_mompost = NULL;

	/*
	 ** Set the TI_FLAGS_CHKPT flag for each task that was checkpointed.
	 */
	pbs_strncpy(path, path_checkpoint, sizeof(path));
	if (*pjob->ji_qs.ji_fileprefix != '\0')
		strcat(path, pjob->ji_qs.ji_fileprefix);
	else
		strcat(path, pjob->ji_qs.ji_jobid);
	strcat(path, JOB_CKPT_SUFFIX);

	dir = opendir(path);
	if (dir != NULL) {
		while (errno = 0, (pdir = readdir(dir)) != NULL) {
			if (pdir->d_name[0] == '.')
				continue;
			tid = strtoul(pdir->d_name, NULL, 16);
			if (tid == 0)
				continue;
			ptask = task_find(pjob, tid);
			if (ptask == NULL)
				continue;
			ptask->ti_flags |= TI_FLAGS_CHKPT;
		}
		if (errno != 0 && errno != ENOENT) {
			sprintf(log_buffer, "readdir failed for directory :%s", path);
			log_joberr(errno, __func__, log_buffer, pjob->ji_qs.ji_jobid);
		}
		closedir(dir);
	}

	if ((pjob->ji_flags & MOM_SISTER_ERR) == 0) {
		/*
		 **	Everything worked.  The checkpoint process
		 **	is done and no IM_CHECKPOINT events are
		 **	outstanding.  Any resources owned by the
		 **	job should be cleaned up here.
		 **	If abort is set the job's tasks will be killed
		 **	and should be picked up in scan_for_exiting().
		 **	Any saved batch request will be acked after the
		 **	obit is sent.
		 */
		if (abort) {
			mom_hook_input_t *hook_input = NULL;

			hook_input = (mom_hook_input_t *) malloc(sizeof(mom_hook_input_t));
			if (hook_input) {
				mom_hook_input_init(hook_input);
				hook_input->pjob = pjob;
			}
			if ((hook_input != NULL) && (mom_process_hooks(HOOK_EVENT_EXECJOB_END, PBS_MOM_SERVICE_NAME, mom_host, hook_input, NULL, NULL, 0, 1) == HOOK_RUNNING_IN_BACKGROUND)) {
				pjob->ji_hook_running_bg_on = BG_CHECKPOINT_ABORT;
			} else {
				free(hook_input);
				exiting_tasks = 1;
				term_job(pjob);
			}
		} else if (pjob->ji_preq) {
			/*
			 **	If abort is not set and there is a request
			 **	saved, do an ack for it.  This will be
			 **	the response for the hold.
			 */
			reply_ack(pjob->ji_preq);
			pjob->ji_preq = NULL;
		}
		/*
		 ** Turn off TI_FLAGS_SAVECKP.
		 */
		for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
		     ptask != NULL;
		     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
			ptask->ti_flags &= ~TI_FLAGS_SAVECKP;
		}
		pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHKPT;
		(void) job_save(pjob);
		return;
	}
	pjob->ji_flags &= ~MOM_SISTER_ERR;

	/*
	 ** If we get here, an error happened.  Only try to recover
	 ** if we had abort set.
	 */
	if (pjob->ji_qs.ji_un.ji_momt.ji_exitstat == JOB_EXEC_CHKP)
		pjob->ji_qs.ji_un.ji_momt.ji_exitstat = 0;

	/*
	 ** If abort is on, I'm MS and there is a sisterhood, send restart.
	 */
	if (abort && (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) &&
	    pjob->ji_numnodes > 1) {
		int i;

		pjob->ji_mompost = post_restart;
		i = send_sisters(pjob, IM_RESTART, NULL);

		if (i != (pjob->ji_numnodes - 1)) {
			log_joberr(errno, __func__, "could not send restart",
				   pjob->ji_qs.ji_jobid);
			(void) kill_job(pjob, SIGKILL);
			return;
		}
	}

	for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
		if (ptask->ti_flags & TI_FLAGS_CHKPT)
			break;
	}

	/*
	 ** If any tasks were checkpointed and abort was set, set a flag for
	 ** scan_for_exiting() to be able to deal with a failed checkpoint.
	 */
	if (ptask != NULL && abort) {
		pjob->ji_flags |= MOM_CHKPT_POST;
		return;
	}

	/*
	 ** No tasks were checkpointed.
	 ** Get rid of incomplete checkpoint directory and
	 ** move old chkpt dir back to regular if it exists.
	 */
	(void) remtree(path);
	strcpy(oldname, path);
	strcat(oldname, ".old");
	if (stat(oldname, &statbuf) == 0) {
		if (rename(oldname, path) == -1) {
			pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHKPT;
		}
	}

	/*
	 ** Set TI_FLAGS_CHKPT back on if it was on before this attempt
	 ** started.  Turn off TI_FLAGS_SAVECKP.
	 */
	for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
		if (ptask->ti_flags & TI_FLAGS_SAVECKP)
			ptask->ti_flags |= TI_FLAGS_CHKPT;
		ptask->ti_flags &= ~TI_FLAGS_SAVECKP;
	}

	/* clear checkpoint active flag so a following checkpoint can happen */
	pjob->ji_flags &= ~MOM_CHKPT_ACTIVE;
	(void) job_save(pjob);
	return;
}

int
local_checkpoint(job *pjob, int abort, struct batch_request *preq) /* may be null */
{
	svrattrl *pal;
	int rc;
	attribute tmph;
	pbs_task *ptask;
	int hok = 1;
	pid_t pid;

	DBPRT(("local_checkpoint: %s %s abort %s request\n",
	       pjob->ji_qs.ji_jobid,
	       abort ? "with" : "no", preq ? "with" : "no"))

	/* no checkpoint, reject request */
	rc = (int) (abort ? ChkptAbtAction : ChkptAction);
	if ((mom_does_chkpnt == 0) &&
	    (mom_action[rc].ma_script == NULL))
		return PBSE_NOSUP;

	/*
	 **	Check to see if anything is going on.
	 */
	if (pjob->ji_momsubt != 0 ||
	    pjob->ji_mompost != NULL)
		return PBSE_MOMREJECT;

	/*
	 **	Reset TI_FLAGS_CHKPT flag for this attempt.
	 */
	for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
		if (ptask->ti_flags & TI_FLAGS_CHKPT)
			ptask->ti_flags |= TI_FLAGS_SAVECKP;
		ptask->ti_flags &= ~TI_FLAGS_CHKPT;
	}

	/* now try set up as child of MOM */
	pid = fork_me(-1);
	if ((pid < 0) && (errno != ENOSYS))
		return PBSE_SYSTEM; /* error on fork */

	if (pid > 0) {
		/* parent, record pid in job for when child terminates */

		DBPRT(("local_checkpoint: %s pid %d\n", pjob->ji_qs.ji_jobid, pid))
		pjob->ji_momsubt = pid;
		pjob->ji_mompost = post_chkpt;
		pjob->ji_actalarm = 0;

		/*
		** If we are going to have tasks dieing, set a flag.
		*/
		if (abort) {
			pjob->ji_flags |= MOM_CHKPT_ACTIVE;
			pjob->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_CHKP;
		}
		(void) job_save(pjob);

		return PBSE_NONE; /* parent return */
	}

	/* if fork available child does the checkpoint else by foreground */

	clear_attr(&tmph, &job_attr_def[(int) JOB_ATR_hold]);
	if (preq) {
		pal = (svrattrl *) GET_NEXT(preq->rq_ind.rq_hold.rq_orig.rq_attr);
		if (pal)
			hok = set_attr_generic(&tmph, &job_attr_def[(int) JOB_ATR_hold], pal->al_value, NULL, INTERNAL);
	}
	rc = mom_checkpoint_job(pjob, abort);
	if ((rc == 0) && (hok == 0))
		rc = site_mom_postchk(pjob, (int) tmph.at_val.at_long);
	if (pid) {
		pjob->ji_preq = preq;
		if (abort) {
			pjob->ji_flags |= MOM_CHKPT_ACTIVE;
			pjob->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_CHKP;
		}

		(void) job_save(pjob);

		post_chkpt(pjob, rc);

		return (rc);
	} else
		exit(rc); /* zero exit tells main chkpnt ok */
}

/**
 * @brief
 * 	start_checkpoint - start a checkpoint going
 *
 *	checkpoint done from a child because it takes a while
 *
 * @param[in] pjob - pointer to job
 * @param[in] abort - indication for abort true or false
 * @param[in] preq - pointer to batch_request structure
 *
 * @return 	int
 * @retval	0 	success
 * @retval	!0	Error
 *
 */
int
start_checkpoint(job *pjob,
		 int abort,
		 struct batch_request *preq) /* may be null */
{
	int rc;

	DBPRT(("start_checkpoint: %s %s abort %s request\n",
	       pjob->ji_qs.ji_jobid,
	       abort ? "with" : "no", preq ? "with" : "no"))

	if ((rc = local_checkpoint(pjob, abort, preq)) != PBSE_NONE) {
		req_reject(rc, errno, preq);
		return rc;
	}

	/*
	 ** If there is a sisterhood, send command.
	 */
	if (pjob->ji_numnodes > 1) {
		int i;

		i = send_sisters(pjob, abort ? IM_CHECKPOINT_ABORT : IM_CHECKPOINT, NULL);

		if (i != (pjob->ji_numnodes - 1)) {
			pjob->ji_flags |= MOM_SISTER_ERR;
			req_reject(PBSE_SYSTEM, errno, preq);
			return PBSE_SYSTEM;
		}
	}
	pjob->ji_preq = preq;

	return 0;
}

/**
 * @brief
 *	Restart the job.
 *	May be done in a child of MOM.
 *
 * @param[in] pjob - pointer to job
 *
 * @return 	int
 * @retval	0	no error
 * @retval	!0	error
 *
 */

int
mom_restart_job(job *pjob)
{
	int i;
	int rserr = ENOENT;
	char path[MAXPATHLEN + 1];
	char *filnam;
	tm_task_id taskid;
	pbs_task *ptask;
	int tcount = 0;
	struct stat sbuf;
	extern pid_t mom_pid;

	/* changing directory to job user's home */
	char *cwdname = NULL;
	struct passwd *pwdp = NULL;

	assert(pjob != NULL);
	DBPRT(("%s: %s\n", __func__, pjob->ji_qs.ji_jobid))

	/* perform any site required setup before restart */
	if ((i = site_mom_prerst(pjob)) != 0) {
		sprintf(log_buffer, "Pre-restart failed: return=%d errno=%d",
			i, errno);
		rserr = errno;
		goto done;
	}

	pbs_strncpy(path, path_checkpoint, sizeof(path));
	if (*pjob->ji_qs.ji_fileprefix != '\0')
		strcat(path, pjob->ji_qs.ji_fileprefix);
	else
		strcat(path, pjob->ji_qs.ji_jobid);
	strcat(path, JOB_CKPT_SUFFIX);

	i = strlen(path);
	filnam = &path[i];

	/* Change to user's home or PBS_JOBDIR to pick up .cpr */
#ifdef WIN32
	if ((cwdname = getcwd(NULL, _MAX_PATH + 2)) != NULL) {
		if ((is_jattr_set(pjob, JOB_ATR_sandbox)) &&
		    (strcasecmp(get_jattr_str(pjob, JOB_ATR_sandbox), "PRIVATE") == 0)) {
			/* "sandbox=PRIVATE" mode is enabled, so restart job in PBS_JOBDIR */
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL) {
				if (chdir(jobdirname(pjob->ji_qs.ji_jobid,
					save_actual_homedir(pwdp, pjob))) == -1) 
					log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));				
			}
		} else {
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL)
				if (chdir(save_actual_homedir(pwdp, pjob)) == -1) 
					log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));
		}
	}
#else
	if ((cwdname = getcwd(NULL, 0)) != NULL) {
		if ((is_jattr_set(pjob, JOB_ATR_sandbox)) &&
		    (strcasecmp(get_jattr_str(pjob, JOB_ATR_sandbox), "PRIVATE") == 0)) {
			/* "sandbox=PRIVATE" mode is enabled, so restart job in PBS_JOBDIR */
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL)
				if (chdir(jobdirname(pjob->ji_qs.ji_jobid, pwdp->pw_dir)) == -1) 
					log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));		
		} else {
			pwdp = getpwnam(get_jattr_str(pjob, JOB_ATR_euser));
			if (pwdp != NULL)
				if (chdir(pwdp->pw_dir) == -1) {
					log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));			
			}
		}
	}
#endif

	for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {

		taskid = ptask->ti_qs.ti_task;
		sprintf(filnam, task_fmt, taskid);

		/* check to see if checkpoint file exists */
		if (stat(path, &sbuf) == -1) {
			if (errno == ENOENT)
				continue;

			sprintf(log_buffer,
				"checkpoint path %s stat failed %d",
				path, errno);
			goto done;
		}

		/*
		 **	Try action script with no post function.
		 */
		i = do_mom_action_script(RestartAction, pjob, ptask,
					 path, NULL);
		if (i == 0) { /* script worked */
			tcount++;
			continue;
		}

		/* if there is no script, try native support */
		if (i == -2) {
			i = mach_restart(ptask, path);
			if (i != -1) { /* it worked */
				tcount++;
				continue;
			}
			/*
			 ** Look to see if errno is any of the set
			 ** of values that should cause us to return
			 ** EAGAIN.  Don't need to do this for action
			 ** script since it runs in a child.
			 */
			switch (errno) {
#ifdef ERFLOCK
				case ERFLOCK:
#endif
#ifdef EQUSR
				case EQUSR:
#endif
#ifdef EQGRP
				case EQGRP:
#endif
#ifdef EQACT
				case EQACT:
#endif
#ifdef ENOSDS
				case ENOSDS:
#endif
				case EAGAIN:
				case ENOMEM:
				case ENOLCK:
				case ENOSPC:
				case ENFILE:
				case EDEADLK:
				case EBUSY:
					rserr = EAGAIN;
					break;
			}
		}

		sprintf(log_buffer,
			"restart of task %8.8X from file %s failed",
			taskid, path);
		goto done;
	}

	sprintf(log_buffer, "Restarted %d task(s)", tcount);
	rserr = PBSE_NONE;

done:
	/* log if not the main mom */
	if (getpid() != mom_pid) {
		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
			  pjob->ji_qs.ji_jobid, log_buffer);
	}

	/* return to MOM's rightful lair */
	if (cwdname) {
		if (chdir(cwdname) == -1) 
			log_errf(-1, __func__, "chdir failed. ERR : %s", strerror(errno));		
		free(cwdname);
	}
	return rserr;
}

/**
 * @brief
 * 	post_restart - post processor for start_restart()
 *
 *	Called from catch_child() when found in ji_mompost.
 *
 * @param[in] pjob - pointer to job\
 * @param[in] ev -  exit value of the child process
 *
 * @return	Void
 *
 */
void
post_restart(job *pjob, int ev)
{
	pbs_task *ptask;

	DBPRT(("post_restart: %s err %d\n", pjob->ji_qs.ji_jobid, ev))

	if (post_action(pjob, IM_RESTART, ev))
		return;

	/*
	 ** No more operations are waiting.
	 ** Now is the time to clear ji_mompost.
	 */
	pjob->ji_mompost = NULL;
	pjob->ji_flags &= ~MOM_RESTART_ACTIVE;

	if (pjob->ji_flags & MOM_SISTER_ERR) {
		/*
		 ** If we get here, an error happened.
		 */
		set_job_substate(pjob, JOB_SUBSTATE_EXITING);
		exiting_tasks = 1;
		return;
	}

	/*
	 **	The restart worked.
	 */
	pjob->ji_flags &= ~MOM_SISTER_ERR;

	/* reset sample time for cpupercent, to start over */
	pjob->ji_sampletim = 0;

	if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) == 0) {
		/*
		 ** Set all checkpointed tasks running.
		 */
		for (ptask = (pbs_task *) GET_NEXT(pjob->ji_tasks);
		     ptask != NULL;
		     ptask = (pbs_task *) GET_NEXT(ptask->ti_jobtask)) {
			if (ptask->ti_flags & TI_FLAGS_CHKPT) {
				ptask->ti_qs.ti_status = TI_STATE_RUNNING;
				/*
				 * KLUDGE
				 * The sid for the task is saved as a negative value in
				 * scan_for_exiting() when it goes into DEAD state. We
				 * need to keep it for the restarted task if a new sid
				 * has not been generated.
				 */
				if (ptask->ti_qs.ti_sid < 0) {
					ptask->ti_qs.ti_sid =
						-ptask->ti_qs.ti_sid;
				}
				(void) task_save(ptask);
			}
		}

		set_job_substate(pjob, JOB_SUBSTATE_RUNNING);
		start_walltime(pjob);

		if (mom_get_sample() != PBSE_NONE) {
			time_resc_updated = time_now;
			(void) mom_set_use(pjob);
		}
	} else {
		set_job_substate(pjob, JOB_SUBSTATE_SUSPEND);
		stop_walltime(pjob);
	}
	if (pjob->ji_preq) {
		/*
		 **	If there is a request saved, do an ack
		 **	for it.  This will be the response for
		 **	the release.
		 */
		reply_ack(pjob->ji_preq);
		pjob->ji_preq = NULL;
	}

	return;
}

int
local_restart(job *pjob,
	      struct batch_request *preq) /* may be null */
{
	pid_t pid;
	int rc;
	int background = restart_background;

	DBPRT(("local_restart: %s %s request\n",
	       pjob->ji_qs.ji_jobid, preq ? "with" : "without"))

	/* no restart, reject request */
	if ((mom_does_chkpnt == 0) &&
	    (mom_action[RestartAction].ma_script == NULL))
		return PBSE_NOSUP;

	/*
	 ** If a script is going to transmogrify, don't run in the
	 ** background, otherwise, do run in the background.
	 */
	if (mom_action[RestartAction].ma_script != NULL)
		background = (restart_transmogrify ? FALSE : TRUE);

	if ((pjob->ji_mompost != NULL) && (pjob->ji_mompost != post_restart))
		return PBSE_CKPBSY;

	/*
	 * If restart_background is NOT enabled, perform the restart
	 * in the foreground.
	 */
	if (background == FALSE) {
		rc = mom_restart_job(pjob);

		/* retry for any kind of changable thing */
		switch (rc) {
			case PBSE_NONE:
				break;
			case 75:
				rc = PBSE_CKPBSY;
				pjob->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_RETRY;
				break;
			default:
				rc = PBSE_SYSTEM;
				pjob->ji_qs.ji_un.ji_momt.ji_exitstat =
					JOB_EXEC_BADRESRT;
				break;
		}
		/* post_restart gets called from start_exec or finish_exec */
		return rc;
	}

	/*
	 **	Check to see if anything is going on.
	 */
	if (pjob->ji_momsubt != 0)
		return PBSE_CKPBSY;

	/*
	 * If we get to this point, restart_background is enabled, perform
	 * the restart as a subtask of MOM.
	 */
	pid = fork_me(-1);
	if ((pid < 0) && (errno != ENOSYS))
		return PBSE_SYSTEM; /* error on fork */

	if (pid > 0) {
		/* parent, records pid in job for when child terminates */

		DBPRT(("local_restart: %s pid %d\n", pjob->ji_qs.ji_jobid, pid))
		pjob->ji_momsubt = pid;
		pjob->ji_mompost = post_restart;
		pjob->ji_actalarm = 0;
		pjob->ji_flags |= MOM_RESTART_ACTIVE;
		(void) job_save(pjob);
		return PBSE_NONE; /* parent return */
	}
	/* child - does the restart if fork avaialable else by foreground*/
	rc = mom_restart_job(pjob);
	if (pid) {
		pjob->ji_preq = preq;
		post_restart(pjob, rc);
		return (rc);
	} else
		exit(rc); /* zero exit tells main restart ok */
}

/**
 * @brief
 *	Parse a resourcedef file and return an array of resource names
 *
 * @param[in] path - Path to a resourcedef file
 *
 * @par The return value is to be freed by the caller using a call to
 * free_str_array()
 *
 * @return 	an array of resource names.
 * @retval	array of resource names
 * @retval	NULL on failure
 *
 */
static char **
get_resources_from_file(char *path)
{
	FILE *fp;
	char line[256];
	char *n;
	char **resources;
	int numlines;
	int i;

	/* Assume no resources */
	resources = malloc(sizeof(char *));
	if (resources == NULL) {
		log_err(errno, __func__, MALLOC_ERR_MSG);
		return NULL;
	}

	resources[0] = NULL;

	/* Note that the absence of a file is not an error, it means that
	 * there are no resources defined
	 */
	if ((fp = fopen(path, "r")) == NULL) {
		return resources;
	}

	for (numlines = 0; fgets(line, sizeof(line), fp); numlines++)
		;

	(void) fseek(fp, 0, SEEK_SET);

	/* now that we have the number of resources defined, we allocate an
	 * array that can hold those resources names
	 */
	free(resources);
	resources = malloc((numlines + 1) * sizeof(char *));
	if (resources == NULL) {
		log_err(PBSEVENT_SYSTEM, __func__, MALLOC_ERR_MSG);
		fclose(fp);
		return NULL;
	}

	for (i = 0; fgets(line, sizeof(line), fp);) {
		n = strtok(line, " ");
		if (n[0] != '#') {
			resources[i] = strdup(n);
			if (resources[i] == NULL) {
				log_err(PBSEVENT_SYSTEM, __func__, MALLOC_ERR_MSG);
				free_str_array(resources);
				fclose(fp);
				return NULL;
			}
			i++;
		}
	}
	resources[i] = NULL;
	fclose(fp);

	return resources;
}

/**
 * @brief
 *	Returns an array of names of resources that were deleted based on
 * 	the comparison between an 'old' (r1) and 'new' (r2) set of names.
 *
 * @param[in] r1 - The array of 'old' resource names. i.e., prior to update
 * @param[in] r2 - The array of 'new' resource names.
 *
 * @par The return value is to be freed by the caller.
 *
 *
 * @return An array of names of resources that were deleted, i.e., that were
 * in r1 but are not in r2.
 * @retval	names of resource	Success
 * @retval	NULL			Failure
 *
 */

static char **
get_deleted_resources(char **r1, char **r2)
{
	char **deleted_resources;
	int i, j;
	int k = 0;

	for (i = 0; r1[i] != NULL; i++)
		;

	/* worst case is that all resources in r1 were deleted */
	deleted_resources = malloc((i + 1) * sizeof(char *));
	if (deleted_resources == NULL) {
		log_err(errno, __func__, MALLOC_ERR_MSG);
		return NULL;
	}

	for (i = 0; r1[i] != NULL; i++) {
		for (j = 0; r2[j] != NULL; j++) {
			if (strcmp(r1[i], r2[j]) == 0) {
				break;
			}
		}
		/* r1[i] is no longer reported in r2 */
		if (r2[j] == NULL) {
			deleted_resources[k++] = r1[i];
		}
	}
	deleted_resources[k] = NULL;

	return deleted_resources;
}

/**
 * @brief
 *	Update vnodes when resource definitions have changed
 *
 * @param deleted_resources - Array of deleted resources
 *
 * @return	Void
 *
 */
static void
update_vnodes_on_resourcedef_change(char **deleted_resources)
{
	extern vnl_t *vnlp;
	int i, j, k;
	char *attr;
	int attrlen;
	char *attrprefix = "resources_available.";
	vnl_t *nv = NULL;
	int mod_vnlp = 0; /* track whether vnode list was modified */

	if (vnlp == NULL)
		return;

	/* The deleted resources may have a single NULL entry if no resources
	 * are defined
	 */
	if ((deleted_resources == NULL) || (deleted_resources[0] == NULL)) {
		return;
	}

	if (vnl_alloc(&nv) == NULL) {
		log_err(errno, __func__, "vnl_alloc failed!");
		return;
	}

	attrlen = strlen(attrprefix) + 20; /* for resources_available.<some arbitrary resource> (will be extended as needed) */
	attr = malloc(attrlen);
	if (attr == NULL) {
		vnl_free(nv);
		log_err(errno, __func__, MALLOC_ERR_MSG);
		return;
	}

	for (i = 0; i < vnlp->vnl_used; i++) {
		vnal_t *vnrlp = VNL_NODENUM(vnlp, i);
		for (j = 0; j < vnrlp->vnal_used; j++) {
			vna_t *vnrp = VNAL_NODENUM(vnrlp, j);
			for (k = 0; deleted_resources[k] != NULL; k++) {
				strcpy(attr, attrprefix);
				(void) pbs_strcat(&attr, &attrlen, deleted_resources[k]);
				if (strcmp(vnrp->vna_name, attr) == 0) {
					break;
				}
			}
			/* attribute not found in deleted resources list, add
			 * it to new vnode list */
			if (deleted_resources[k] == NULL) {
				vn_addvnr(nv, vnrlp->vnal_id, vnrp->vna_name, vnrp->vna_val, vnrp->vna_type, vnrp->vna_flag, NULL);
			} else {
				/* attribute was deleted, do not add it to new
				 * vnode list, keep track of modification */
				mod_vnlp = 1;
			}
		}
	}

	free(attr);
	if (mod_vnlp) {
		vnl_free(vnlp);
		nv->vnl_modtime = time(0);
		vnlp = nv;
	} else {
		vnl_free(nv);
	}
}

/**
 * @brief
 *	Receive a hook-related file.
 *
 *  @param[in] 	preq - Pointer to batch request structure for a Copy Hook
 *			request.
 *			The 'preq' parameter holds:
 * 			- preq->rq_ind.rq_hookfile.rq_filename - the basename
 *			  including suffix)  of the target hook file.
 *			- preq->rq_ind.rq_hookfile.rq_data contains the hook
 *			  data.
 *			- preq->rq_ind.rq_hookfile.rq_size is the size of
 *			  rq_data.
 * @note
 *	The idea is to put contents (preq->rq_ind.rq_hookfile.rq_data) into
 *	[PATH_HOOKS]/<preq->rq_ind.rq_hookfile.rq_filename>
 *
 *	If the file received is for a periodic hook, then attempt is made
 *	to instantiate the hook, if none is queued up for execution.
 *
 *	This function expects 4 types of files and they are:
 *		<file_name>HOOK_FILE_SUFFIX
 *		<file_name>HOOK_CONFIG_SUFFIX
 *		<file_name>HOOK_SCRIPT_SUFFIX
 *		PBS_RESCDEF
 *
 * @return 	Void
 *
 */

void
req_copy_hookfile(struct batch_request *preq) /* ptr to the decoded request   */
{
	int filemode = 0700;
	int fds;
	char namebuf[MAXPATHLEN + 1];
	char *p;
	int is_hook_cntrl_file = 0;
	int is_hook_config_file = 0;
	int is_hook_script_file = 0;
	int is_hook_resourcedef_file = 0;
	hook *phook = NULL;
	char *hook_name;
	char hook_msg[HOOK_MSG_SIZE + 1];
	int oflag;
	char **prev_resources = NULL;

	if (reject_root_scripts == TRUE) {
		log_err(-1, __func__, msg_mom_reject_root_scripts);
		req_reject(PBSE_MOM_REJECT_ROOT_SCRIPTS, 0, preq);
		return;
	}

	p = strstr(preq->rq_ind.rq_hookfile.rq_filename, HOOK_FILE_SUFFIX);
	if ((p != NULL) && (strcmp(p, HOOK_FILE_SUFFIX) == 0)) {
		is_hook_cntrl_file = 1;
	}
	if (!is_hook_cntrl_file) {
		p = strstr(preq->rq_ind.rq_hookfile.rq_filename, HOOK_SCRIPT_SUFFIX);
		if ((p != NULL) && (strcmp(p, HOOK_SCRIPT_SUFFIX) == 0))
			is_hook_script_file = 1;
	}

	if (!is_hook_cntrl_file && !is_hook_script_file) {
		p = strstr(preq->rq_ind.rq_hookfile.rq_filename, HOOK_CONFIG_SUFFIX);
		if ((p != NULL) && (strcmp(p, HOOK_CONFIG_SUFFIX) == 0))
			is_hook_config_file = 1;
	}

	if (!is_hook_cntrl_file && !is_hook_script_file &&
	    !is_hook_config_file) {
		p = strstr(preq->rq_ind.rq_hookfile.rq_filename, PBS_RESCDEF);
		if ((p == NULL) || (strcmp(p, PBS_RESCDEF) != 0)) {
			log_err(errno, __func__, "malformed request");
			req_reject(PBSE_INTERNAL, 0, preq);
			return;
		}
		is_hook_resourcedef_file = 1;
	}

	snprintf(namebuf, sizeof(namebuf), "%s%s", path_hooks,
		 preq->rq_ind.rq_hookfile.rq_filename);

	/* Resources prior to update of the resourcedef file */
	if (is_hook_resourcedef_file) {
		prev_resources = get_resources_from_file(namebuf);
	}

	if (preq->rq_ind.rq_hookfile.rq_sequence == 0) { /* 1st chunk of data */
		oflag = O_TRUNC | O_RDWR | O_CREAT | O_Sync;
	} else {
		oflag = O_RDWR | O_APPEND | O_CREAT | O_Sync;
	}

	fds = open(namebuf, oflag, filemode);

	if (fds < 0) {
		log_err(errno, __func__, msg_hookfile_open);
		req_reject(PBSE_SYSTEM, 0, preq);
		free_str_array(prev_resources);
		return;
	}

#ifdef WIN32
	secure_file2(namebuf, "Administrators", READS_MASK | WRITES_MASK | STANDARD_RIGHTS_REQUIRED, "Everyone", READS_MASK | READ_CONTROL);
	setmode(fds, O_BINARY);
#endif /* WIN32 */

	if (write(fds, preq->rq_ind.rq_hookfile.rq_data,
		  (unsigned) preq->rq_ind.rq_hookfile.rq_size) !=
	    preq->rq_ind.rq_hookfile.rq_size) {
		log_err(errno, __func__, msg_hookfile_write);
		req_reject(PBSE_SYSTEM, 0, preq);
		(void) close(fds);
		free_str_array(prev_resources);
		return;
	}

	if (is_hook_cntrl_file) {
		FILE *fp = NULL;

		(void) lseek(fds, 0L, SEEK_SET);

		fp = fdopen(fds, "r");

		/* Load the contents of hook control file into memory */
		/* passed fp to hook_recov()  so file does not need to be */
		/* reopened */
		if ((phook = hook_recov(namebuf, fp, hook_msg, HOOK_MSG_SIZE,
					python_script_alloc, python_script_free)) == NULL) {
			log_err(-1, __func__, hook_msg);
			req_reject(PBSE_SYSTEM, 0, preq);
			if (fp != NULL)
				(void) fclose(fp);
			else
				(void) close(fds);
			return;
		} else {

			hook *phook2;
			int i;
			int j;

			if ((phook->event & HOOK_EVENT_EXECHOST_PERIODIC) &&
			    !has_task_by_parm1(phook)) {
				run_periodic_hook_bg(phook);
			}

			phook2 = (hook *) GET_NEXT(svr_allhooks);
			i = j = 0;
			for (phook2 = (hook *) GET_NEXT(svr_allhooks); phook2 != NULL;
			     phook2 = (hook *) GET_NEXT(phook2->hi_allhooks)) {
				if (update_joinjob_alarm_time &&
				    (phook2->enabled == TRUE) &&
				    ((phook2->event & HOOK_EVENT_EXECJOB_BEGIN) != 0)) {
					if (i == 0)
						joinjob_alarm_time = 0;
					joinjob_alarm_time += phook2->alarm;
					i++;
				} else if (update_job_launch_delay &&
					   (phook2->enabled == TRUE) &&
					   ((phook2->event & HOOK_EVENT_EXECJOB_PROLOGUE) != 0)) {
					if (j == 0)
						job_launch_delay = 0;
					job_launch_delay += phook2->alarm;
					j++;
				}
			}
			if (i > 0) {
				snprintf(log_buffer, sizeof(log_buffer), "joinjob_alarm_time updated to %ld", joinjob_alarm_time);
				log_err(-1, __func__, log_buffer);
			}
			if (j > 0) {
				snprintf(log_buffer, sizeof(log_buffer), "job_launch_delay updated to %ld", job_launch_delay);
				log_err(-1, __func__, log_buffer);
			}
		}
		if (fp != NULL)
			(void) fclose(fp);
		else
			(void) close(fds);

	} else { /* a hook script, resourcedef, or hook config  file*/

		phook = NULL;
		if (is_hook_script_file)
			p = strstr(namebuf, HOOK_SCRIPT_SUFFIX);
		else if (is_hook_config_file)
			p = strstr(namebuf, HOOK_CONFIG_SUFFIX);
		else
			p = NULL;

		if (p != NULL) {
			*p = '\0';
			hook_name = strrchr(namebuf, '/');
		} else {
			hook_name = NULL;
		}
		if (hook_name != NULL) { /* hook related */
			hook_name++;

			if ((phook = find_hook(hook_name)) != NULL) {
				if (is_hook_script_file) {
					strcat(p, HOOK_SCRIPT_SUFFIX);
					if (python_script_alloc(namebuf,
								(struct python_script **) &phook->script) == -1) {
						log_err(-1, __func__, "python_script_alloc call failed!");
						req_reject(PBSE_SYSTEM, 0, preq);
						(void) close(fds);
						return;
					}
				} else if (is_hook_config_file) {
					strcat(p, HOOK_CONFIG_SUFFIX);
				}
				if ((phook->event & HOOK_EVENT_EXECHOST_PERIODIC) &&
				    !has_task_by_parm1(phook)) {
					run_periodic_hook_bg(phook);
				}
			}
		}
		(void) close(fds);

		if (is_hook_resourcedef_file) {
			/* check if any deleted resources were set on the
			 * vnodes attribute list, and if so, update the vnodes
			 */
			char **new_resources;
			char **deleted_resources;

			new_resources = get_resources_from_file(namebuf);
			deleted_resources = get_deleted_resources(prev_resources, new_resources);
			if (deleted_resources != NULL) {
				update_vnodes_on_resourcedef_change(deleted_resources);
				free(deleted_resources);
			}
			free_str_array(new_resources);
			free_str_array(prev_resources);

			/* Call setup_resc() only if received
			 * resourcedef file is the one for path_rescdef,
 			 * which is set up at mom startup and used when
			 * HUP-ed.
			 */
			if ((path_rescdef != NULL) &&
			    (strcmp(path_rescdef, namebuf) == 0) &&
			    (setup_resc(1) != 0)) {
				/* log_buffer set in setup_resc */
				log_err(-1, "setup_resc",
					"warning: failed to setup resourcedef");
			}
		}
	}
	/* obtain new checksums after file is closed/flushed */
	if (is_hook_cntrl_file) {
		if (phook != NULL) {
			phook->hook_control_checksum = crc_file(namebuf);
		}
	} else if (is_hook_script_file) {
		if (phook != NULL) {
			phook->hook_script_checksum = crc_file(namebuf);
		}
	} else if (is_hook_config_file) {
		if (phook != NULL) {
			phook->hook_config_checksum = crc_file(namebuf);
		}
	} else if (is_hook_resourcedef_file) {
		hooks_rescdef_checksum = crc_file(namebuf);
	}

	reply_ack(preq);
}

/**
 * @brief
 *	Receive a request to delete a hook-related file.
 *
 *  @param[in]	preq - pointer to batch request structure for a Delete Hook
 *			request.
 *		     - contains preq->rq_ind.rq_hookfile.rq_filename which
 *			is the hook-related filename to delete. If this
 *			matches the hook control file (*.HK suffix), then
 *			before deleting the file, the asociated hook in
 *			memory is also purged.
 *
 * @return	Void
 *
 */

void
req_del_hookfile(struct batch_request *preq) /* ptr to the decoded request   */
{

	char namebuf[MAXPATHLEN + 1];
	char *p;
	char hook_name[MAXPATHLEN + 1];
	hook *phook;
	job *pjob = NULL;
	int hook_running = 0;

	p = strstr(preq->rq_ind.rq_hookfile.rq_filename, HOOK_FILE_SUFFIX);
	if ((p == NULL) || (strcmp(p, HOOK_FILE_SUFFIX) != 0)) {
		p = strstr(preq->rq_ind.rq_hookfile.rq_filename,
			   HOOK_SCRIPT_SUFFIX);
	} else {
		*p = '\0';
		snprintf(hook_name, sizeof(hook_name), "%s",
			 preq->rq_ind.rq_hookfile.rq_filename);
		strcat(p, HOOK_FILE_SUFFIX);
		if ((phook = find_hook(hook_name)) != NULL) {
#ifndef WIN32
			pjob = (job *) GET_NEXT(svr_alljobs);
			while (pjob) {
				/* See if any asynchronous hook is running */
				if (pjob->ji_hook_running_bg_on) {
					hook_running = 1;
					break;
				}
				pjob = (job *) GET_NEXT(pjob->ji_alljobs);
			}
			if (hook_running && phook->event & HOOK_EVENT_EXECJOB_END) {
				/**
				 * This event runs hook in the background,
				 * and it's deferred task created while
				 * running the hook, is required for graceful
				 * exit of the job.
				 */
				reply_ack(preq);
				return;
			}
#endif
			delete_task_by_parm1_func(phook, NULL, DELETE_ONE);
			log_event(PBSEVENT_DEBUG3, PBS_EVENTCLASS_HOOK,
				  LOG_INFO, phook->hook_name,
				  "deleted any hook task entry");
			/* inside hook_purge() is where the hook control */
			/* file is deleted */
			hook_purge(phook, python_script_free);
		}
		reply_ack(preq);
		return;
	}
	if (((p == NULL) || (strcmp(p, HOOK_SCRIPT_SUFFIX) != 0)) &&
	    (strcmp(preq->rq_ind.rq_hookfile.rq_filename,
		    PBS_RESCDEF) != 0)) {
		p = strstr(preq->rq_ind.rq_hookfile.rq_filename, HOOK_CONFIG_SUFFIX);
		if ((p == NULL) || (strcmp(p, HOOK_SCRIPT_SUFFIX) != 0)) {
			log_err(errno, __func__, "malformed request");
			req_reject(PBSE_INTERNAL, 0, preq);
		}
	}

	snprintf(namebuf, sizeof(namebuf), "%s%s", path_hooks,
		 preq->rq_ind.rq_hookfile.rq_filename);

	if (unlink(namebuf) < 0) {
		if (errno != ENOENT) {
			sprintf(log_buffer,
				"Failed to delete hook file %s",
				namebuf);
			log_err(errno, __func__, log_buffer);
			req_reject(PBSE_INTERNAL, 0, preq);
			mark_hook_file_bad(namebuf);
		}
	} else {
		if (!strcmp(preq->rq_ind.rq_hookfile.rq_filename, PBS_RESCDEF))
			hooks_rescdef_checksum = 0LU;
	}

	reply_ack(preq);
}

#if defined(PBS_SECURITY) && (PBS_SECURITY == KRB5)
void
req_cred(struct batch_request *preq) /* ptr to the decoded request */
{
	unsigned char out_data[CRED_DATA_SIZE];
	ssize_t out_len = 0;
	char buf[LOG_BUF_SIZE];
	krb5_data *data;
	char *data_base64 = NULL;
	job *pjob;

	if (decode_block_base64((unsigned char *) preq->rq_ind.rq_cred.rq_cred_data, preq->rq_ind.rq_cred.rq_cred_size, out_data, &out_len, buf, LOG_BUF_SIZE) != 0) {
		log_err(errno, __func__, buf);
		req_reject(PBSE_SYSTEM, 0, preq);
		return;
	}

	if ((data = (krb5_data *) malloc(sizeof(krb5_data))) == NULL) {
		log_err(errno, __func__, "Unable to allocate Memory!\n");
		req_reject(PBSE_SYSTEM, 0, preq);
		return;
	}

	if ((data->data = (char *) malloc(sizeof(unsigned char) * out_len)) == NULL) {
		log_err(errno, __func__, "Unable to allocate Memory!\n");
		req_reject(PBSE_SYSTEM, 0, preq);
		return;
	}

	data->length = out_len;
	memcpy(data->data, out_data, out_len);

	data_base64 = strdup(preq->rq_ind.rq_cred.rq_cred_data);

	store_or_update_cred(preq->rq_ind.rq_cred.rq_jobid, preq->rq_ind.rq_cred.rq_credid, preq->rq_ind.rq_cred.rq_cred_type, data, data_base64, preq->rq_ind.rq_cred.rq_cred_validity);

	/* renew ticket for the job */
	if ((pjob = find_job(preq->rq_ind.rq_cred.rq_jobid)) != NULL) {
		/* send cred to sisters too */
		send_cred_sisters(pjob);

		/* new creds received - lets renew cred */
		renew_job_cred(pjob);
	}

	reply_ack(preq);
}
#endif
