/*
 * Copyright (C) 1994-2021 Altair Engineering, Inc.
 * For more information, contact Altair at www.altair.com.
 *
 * This file is part of both the OpenPBS software ("OpenPBS")
 * and the PBS Professional ("PBS Pro") software.
 *
 * Open Source License Information:
 *
 * OpenPBS is free software. You can redistribute it and/or modify it under
 * the terms of the GNU Affero General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or (at your
 * option) any later version.
 *
 * OpenPBS is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Commercial License Information:
 *
 * PBS Pro is commercially licensed software that shares a common core with
 * the OpenPBS software.  For a copy of the commercial license terms and
 * conditions, go to: (http://www.pbspro.com/agreement.html) or contact the
 * Altair Legal Department.
 *
 * Altair's dual-license business model allows companies, individuals, and
 * organizations to create proprietary derivative works of OpenPBS and
 * distribute them - whether embedded or bundled with other software -
 * under a commercial license agreement.
 *
 * Use of Altair's trademarks, including but not limited to "PBS™",
 * "OpenPBS®", "PBS Professional®", and "PBS Pro™" and Altair's logos is
 * subject to Altair's trademark licensing policies.
 */

#ifndef PBSMOM_HTUNIT
#include <pbs_config.h> /* the master config generated by configure */

#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include <stddef.h>
#include <dirent.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <pwd.h>
#include <time.h>
#include <ftw.h>
#include <dlfcn.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/param.h>
#include <sys/stat.h>
#ifdef __linux__
#include <sys/vfs.h>
#else
#include <sys/mount.h>
#endif
#include <sys/resource.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <signal.h>

#include "mom_mach.h"
#include "pbs_error.h"
#include "portability.h"
#include "list_link.h"
#include "server_limits.h"
#include "attribute.h"
#include "resource.h"
#include "job.h"
#include "log.h"
#include "mom_func.h"
#include "resmon.h"
#include "../rm_dep.h"
#include "tpp.h"
#include "pbs_license.h"
#include "pbs_ifl.h"
#include "placementsets.h"
#include "mom_vnode.h"

/**
 * @file
 * @brief
 *	System dependent code to gather information for the resource
 *	monitor for a Linux i386 machine.
 *
 * @par Resources known by this code:
 *		cput		cpu time for a pid or session
 *		mem		memory size for a pid or session in KB
 *		resi		resident memory size for a pid or session in KB
 *		sessions	list of sessions in the system
 *		pids		list of pids in a session
 *		nsessions	number of sessions in the system
 *		nusers		number of users in the system
 *		totmem		total memory size in KB
 *		availmem	available memory size in KB
 *		ncpus		number of cpus
 *		physmem		physical memory size in KB
 *		size		size of a file or filesystem
 *		idletime	seconds of idle time (see mom_main.c)
 *		walltime	wall clock time for a pid
 *		loadave		current load average
 */

#ifndef TRUE
#define FALSE 0
#define TRUE 1
#endif /* TRUE */

#define TBL_INC 20
#define CPUT_POSSIBLE_FACTOR 5

static char procfs[] = "/proc";
static DIR *pdir = NULL;
static int pagesize;
static long hz;

/* convert between jiffies and seconds */
#define JTOS(x) (((x) + (hz / 2)) / hz)

static char *choose_procflagsfmt(void);

proc_stat_t *proc_info = NULL;
int nproc = 0;
int max_proc = 0;

extern char *ret_string;
extern char extra_parm[];
extern char no_parm[];
extern int exiting_tasks;
extern vnl_t *vnlp;

extern time_t time_now;

/*
 ** external functions and data
 */
extern int nice_val;
extern int rm_errno;
extern int reqnum;
extern double cputfactor;
extern double wallfactor;
extern pid_t mom_pid;
extern int num_acpus;
extern int num_pcpus;
extern int num_oscpus;
struct config *search(struct config *, char *);
struct rm_attribute *momgetattr(char *);

char *physmem(struct rm_attribute *attrib);

/*
 ** local functions and data
 */
static char *resi(struct rm_attribute *attrib);
static char *totmem(struct rm_attribute *attrib);
static char *availmem(struct rm_attribute *attrib);
static char *ncpus(struct rm_attribute *attrib);
static char *walltime(struct rm_attribute *attrib);

extern char *loadave(struct rm_attribute *attrib);
extern char *nullproc(struct rm_attribute *attrib);

time_t wait_time = 10;

typedef struct proc_mem {
	unsigned long total;
	unsigned long used;
	unsigned long free;
} proc_mem_t;

int mom_does_chkpnt = 0;
unsigned long totalmem;

static int myproc_max = 0;    /* entries in Proc_lnks  */
pbs_plinks *Proc_lnks = NULL; /* process links table head */
static time_t sampletime_ceil;
static time_t sampletime_floor;

/*
 ** local resource array
 */
struct config dependent_config[] = {
	{"resi", {resi}},
	{"totmem", {totmem}},
	{"availmem", {availmem}},
	{"physmem", {physmem}},
	{"ncpus", {ncpus}},
	{"loadave", {loadave}},
	{"walltime", {walltime}},
	{NULL, {nullproc}},
};

unsigned linux_time = 0;
/**
 * @brief
 * 	support routine for getting system time -- sets linux_time
 *
 * @return	Void
 *
 */
void
proc_get_btime(void)
{
	FILE *fp;
	char label[256];

	if ((fp = fopen("/proc/stat", "r")) == NULL)
		return;

	while (!feof(fp)) {
		if (fscanf(fp, "%s", label) == EOF) 
			log_errf(-1, __func__, "fscanf failed. ERR : %s", strerror(errno));
		if (strcmp(label, "btime")) {
			if (fscanf(fp, "%*[^\n]%*c") == EOF) 
				log_errf(-1, __func__, "fscanf failed. ERR : %s", strerror(errno));				
		} else {
			if (fscanf(fp, "%u", &linux_time)) 
				log_errf(-1, __func__, "fscanf failed. ERR : %s", strerror(errno));				
			fclose(fp);
			return;
		}
	}

	fclose(fp);
	return;
}

static char stat_str_pre[] =
	"%%d "	    /* 1  pid %d The process id */
	"(%%[^)]) " /* 2  comm %s The filename of the executable */
	"%%c "	    /* 3  state %c "RSDZTW" */
	"%%d "	    /* 4  ppid %d The PID of the parent */
	"%%d "	    /* 5  pgrp %d The process group ID */
	"%%d "	    /* 6  session %d The session ID */
	"%%*d "	    /* 7  ignored:  tty_nr */
	"%%*d "	    /* 8  ignored:  tpgid */
	"%s "	    /* 9  flags - %u or %lu */
	"%%*lu "    /* 10 ignored:  minflt */
	"%%*lu "    /* 11 ignored:  cminflt */
	"%%*lu "    /* 12 ignored:  majflt */
	"%%*lu "    /* 13 ignored:  cmajflt */
	"%%lu "	    /* 14 utime %lu */
	"%%lu "	    /* 15 stime %lu */
	"%%ld "	    /* 16 cutime %ld */
	"%%ld "	    /* 17 cstime %ld */
	"%%*ld "    /* 18 ignored:  priority %ld */
	"%%*ld "    /* 19 ignored:  nice %ld */
	"%%*ld "    /* 20 ignored:  num_threads %ld */
	"%%*ld "    /* 21 ignored:  itrealvalue %ld - no longer maintained */
	"%%llu "    /* 22 starttime (was %lu before Linux 2.6 - see proc(5) for conversion details */
	"%%lu "	    /* 23 vsize (bytes) */
	"%%ld "	    /* 24 rss (number of pages) */
	;

/**
 * @brief
 *	returns the process memory (used,free,total).
 *
 * @return	structure handle
 * @retval	pointer to proc_mem_t structure 	Success
 * @retval	NULL					Error
 *
 */
proc_mem_t *
get_proc_mem(void)
{
	static proc_mem_t mm;
	FILE *fp;
	unsigned long m_tot, m_use, m_free;
	unsigned long s_tot, s_use, s_free;
	char strbuf[BUFSIZ];

	if ((fp = fopen("/proc/meminfo", "r")) == NULL)
		return NULL;

	m_tot = m_free = s_tot = s_free = (unsigned long) 0;
	while (fgets(strbuf, sizeof(strbuf), fp) != NULL) {
		sscanf(strbuf, "MemTotal: %ld k", &m_tot);
		sscanf(strbuf, "MemFree: %ld k", &m_free);
		sscanf(strbuf, "SwapTotal: %ld k", &s_tot);
		sscanf(strbuf, "SwapFree: %ld k", &s_free);
	}

	/* convert from kB to B */
	m_tot <<= 10;
	m_free <<= 10;
	s_tot <<= 10;
	s_free <<= 10;
	m_use = m_tot - m_free;
	s_use = s_tot - s_free;

	mm.total = m_tot + s_tot;
	mm.used = m_use + s_use;
	mm.free = m_free + s_free;

	fclose(fp);
	return (&mm);
}

/**
 * @brief
 *	Check if attribute ATTR_NODE_TopologyInfo is in the global 'vnlp' structure.
 *
 * @return int
 * @retval 1	- if ATTR_NODE_TopologyInfo is found as one of the entries in 'vnlp'.
 * @retval 0	- otherwise, if not found or 'vnlp' is NULL.
 *
 */
static int
vnlp_has_topology_info(void)
{
	int i, j;

	if (vnlp == NULL) {
		return (0);
	}

	for (i = 0; i < vnlp->vnl_used; i++) {
		vnal_t *vnalp;

		vnalp = VNL_NODENUM(vnlp, i);

		for (j = 0; j < vnalp->vnal_used; j++) {
			vna_t *vnap;

			vnap = VNAL_NODENUM(vnalp, j);
			if (strcmp(vnap->vna_name, ATTR_NODE_TopologyInfo) == 0) {
				return (1);
			}
		}
	}

	return (0);
}

/**
 * @brief
 * 	dep_topology - compute and export platform-dependent topology information
 *
 * @return	void
 *
 * @par MT-Safe:	no
 * @par Side Effects:
 *	None
 *
 * @par Note:	nominally, we use the Open-MPI hardware locality (a.k.a. hwloc)
 *		functions to export the topology information that it generates,
 *		but on Cray systems we instead export information via the
 *		alps_inventory() function.
 * @brief A synopsis of the function call sequence (for vnode creation).
 *	  1. Process the System (BASIL 1.7) Query in alps_system_KNL(). This
 *	  	does not include KNL vnode creation.
 *	  2. Process the Inventory (BASIL 1.4) Query in alps_inventory() and
 *		create non-KNL vnodes.
 *	  	KNL vnodes returned by the earlier System Query (step 1) are
 *		filtered from the Inventory (1.4) response.
 *	  3. Create KNL vnodes in system_to_vnodes_KNL(), using information
 *		retrieved earlier in alps_system_KNL() (step 1).
 *
 * @see	alps_inventory
 * @see	mom_topology
 * @see alps_system_KNL
 * @see system_to_vnodes_KNL
 */
void
dep_topology(void)
{
#if MOM_ALPS
	/* This function is the entry point for System Query processing. */
	/* Activities include making a System XML Request & handling the XML Response. */
	alps_system_KNL();
	/*
	 * The call to physmem needs to take place before the ALPS inventory
	 * because a vnode for the "login node" will be created which
	 * must have the memory set.
	 */
	/* Inventory (BASIL 1.4) Query processing. */
	/* Create non-KNL vnodes. */
	if (alps_inventory() != -1) {
		/* Create KNL VNodes. */
		system_to_vnodes_KNL();
	}
#endif
	if (!vnlp_has_topology_info()) {
		/* Populate "topology_info", only if the attribute */
		/* has not been set inside alps_inventory(). */
		mom_topology();
	}
}

/**
 * @brief
 *	initialize the platform-dependent topology information
 *
 * @return	Void
 *
 */
void
dep_initialize(void)
{
	pagesize = getpagesize();

	if ((pdir = opendir(procfs)) == NULL) {
		log_err(errno, __func__, "opendir");
		return;
	}

	proc_get_btime();

	/*
	 ** The global cpu counts are now set in ncpus()
	 */
	(void) ncpus(NULL);

	(void) physmem(0); /* get memory info */

	dep_topology();
}

/**
 * @brief
 *	clean up platform-dependent topology information
 *
 * @return	Void
 *
 */
void
dep_cleanup(void)
{
	if (pdir) {
		closedir(pdir);
		pdir = NULL;
	}
}

/**
 * @brief
 *	 Scan a list of tasks and return true if one of them matches sid
 *
 * @param[in] pjob - job pointer
 * @param[in] sid - session id
 *
 * @return	Bool
 * @retval	TRUE
 * @retval	FALSE	Error
 *
 */
static int
injob(job *pjob, pid_t sid)
{
	task *ptask;

	for (ptask = (task *) GET_NEXT(pjob->ji_tasks);
	     ptask;
	     ptask = (task *) GET_NEXT(ptask->ti_jobtask)) {
		if (ptask->ti_qs.ti_sid <= 1)
			continue;
		if (ptask->ti_qs.ti_sid == sid)
			return TRUE;
	}
	return FALSE;
}

/**
 * @brief
 * 	Internal session cpu time decoding routine.
 *
 * @param[in] job - a job pointer.
 *
 * @return	unsigned long
 * @retval	sum of all cpu time consumed for all tasks executed by the job, in seconds,
 *		adjusted by cputfactor.
 *
 */
static unsigned long
cput_sum(job *pjob)
{
	int i;
	unsigned long cputime = 0;
	int nps = 0;
	int active_tasks = 0;
	int taskprocs;
	proc_stat_t *ps;
	task *ptask;
	unsigned long pcput, tcput;

	for (ptask = (task *) GET_NEXT(pjob->ji_tasks);
	     ptask != NULL;
	     ptask = (task *) GET_NEXT(ptask->ti_jobtask)) {

		/* DEAD task */
		if (ptask->ti_qs.ti_sid <= 1) {
			cputime += ptask->ti_cput;
			continue;
		}

		active_tasks++;
		tcput = 0;
		taskprocs = 0;
		for (i = 0; i < nproc; i++) {
			ps = &proc_info[i];

			/* is this process part of the task? */
			if (ptask->ti_qs.ti_sid != ps->session)
				continue;

			/*
			 * is the owner of this process the job owner?
			 * prevents random PID matches after reboot/restart
			 */
			if (ps->uid != pjob->ji_qs.ji_un.ji_momt.ji_exuid)
				continue;

			nps++;
			taskprocs++;

			/* don't include zombie unless it is the top proc */
			if ((ps->state == 'Z') && (ps->pid != ps->session) &&
			    (ps->ppid != mom_pid))
				continue;

			pcput = (ps->utime + ps->stime +
				 ps->cutime + ps->cstime);

			if (pcput > num_oscpus * (sampletime_ceil + 1 - pjob->ji_qs.ji_stime) * CPUT_POSSIBLE_FACTOR) {
				sprintf(log_buffer,
					"cput for process %d impossible (%lds > %lds * %d), ignoring",
					ps->pid,
					pcput,
					(sampletime_ceil + 1 - pjob->ji_qs.ji_stime),
					num_oscpus);
				log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB,
					  LOG_DEBUG, pjob->ji_qs.ji_jobid,
					  log_buffer);
				sampletime_floor = pjob->ji_qs.ji_stime;
				sampletime_ceil = pjob->ji_qs.ji_stime;
				return 0;

			} else {
				tcput += pcput;
			}

			DBPRT(("%s: task %8.8X ses %d pid %d cputime %lu\n",
			       __func__, ptask->ti_qs.ti_task,
			       ps->session, ps->pid, tcput))
		}
		if (tcput > ptask->ti_cput)
			ptask->ti_cput = tcput;
		cputime += ptask->ti_cput;
		DBPRT(("%s: task %8.8X cput %lu total %lu\n", __func__,
		       ptask->ti_qs.ti_task, ptask->ti_cput, cputime))

		if (taskprocs == 0) {
			/*
			 * Linux seems to be able to forget about a
			 * process on rare occations.  See if the
			 * kill system call can see it.
			 */
			if (kill(ptask->ti_qs.ti_sid, 0) == 0) {
				sprintf(log_buffer,
					"active processes for task %8.8X "
					"session %d exist but are not "
					"reported in /proc",
					ptask->ti_qs.ti_task,
					(int) ptask->ti_qs.ti_sid);
				log_event(PBSEVENT_DEBUG3, PBS_EVENTCLASS_JOB,
					  LOG_DEBUG, pjob->ji_qs.ji_jobid,
					  log_buffer);
				/*
				 * Fake a non-zero nps so the job is not killed.
				 */
				nps++;
				continue;
			}

			/*
			 * Don't declare a running task exited without a small
			 * grace time.
			 */
			if ((ptask->ti_qs.ti_status == TI_STATE_RUNNING) &&
			    ((time_now - pjob->ji_qs.ji_stime) < 10)) {
				sprintf(log_buffer,
					"no active processes for task %8.8X "
					"session %d exist but the job is"
					"only %ld secs old",
					ptask->ti_qs.ti_task,
					(int) ptask->ti_qs.ti_sid,
					time_now - pjob->ji_qs.ji_stime);
				log_event(PBSEVENT_DEBUG3, PBS_EVENTCLASS_JOB,
					  LOG_DEBUG, pjob->ji_qs.ji_jobid,
					  log_buffer);
				/*
				 * Fake a non-zero nps so the job is not killed.
				 */
				nps++;
				continue;
			}
			sprintf(log_buffer,
				"no active process for task %8.8X",
				ptask->ti_qs.ti_task);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
				  LOG_INFO, pjob->ji_qs.ji_jobid,
				  log_buffer);
			ptask->ti_qs.ti_status = TI_STATE_EXITED;
			task_save(ptask);
			exiting_tasks = 1;
		}
	}

	if (active_tasks == 0) {
		sprintf(log_buffer, "no active tasks");
		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
			  LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer);
	}
	if (nps == 0)
		pjob->ji_flags |= MOM_NO_PROC;

	if (cputime > num_oscpus * (sampletime_ceil + 1 - pjob->ji_qs.ji_stime) * CPUT_POSSIBLE_FACTOR) {
		sprintf(log_buffer,
			"cput for job impossible (%lds > %lds * %d), ignoring",
			cputime,
			(sampletime_ceil + 1 - pjob->ji_qs.ji_stime),
			num_oscpus);

		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB,
			  LOG_DEBUG, pjob->ji_qs.ji_jobid,
			  log_buffer);
		sampletime_floor = pjob->ji_qs.ji_stime;
		sampletime_ceil = pjob->ji_qs.ji_stime;
		return 0;
	}

	return ((unsigned long) ((double) cputime * cputfactor));
}

/**
 * @brief
 * 	Internal session memory usage function.
 *
 * @param[in] job - job pointer
 *
 * @return	unsigned long
 * @retval	the total number of bytes of address
 *		space consumed by all current processes within the job.
 *
 */
static unsigned long
mem_sum(job *pjob)
{
	int i;
	unsigned long segadd;
	proc_stat_t *ps;

	segadd = 0;

	for (i = 0; i < nproc; i++) {

		ps = &proc_info[i];

		if (!injob(pjob, ps->session))
			continue;
		segadd += ps->vsize;
		DBPRT(("%s: pid: %d  pr_size: %lu  total: %lu\n",
		       __func__, ps->pid, (unsigned long) ps->vsize, segadd))
	}

	return (segadd);
}

/**
 * @brief
 * 	Internal session workingset size function.
 *
 * @param[in] pjob - job pointer
 *
 * @return	unsigned long
 * @retval	new resident set size 	Success
 * @retval	old resident set size	Error
 *
 */
static unsigned long
resi_sum(job *pjob)
{
	int i;
	unsigned long resisize;
	proc_stat_t *ps;

	resisize = 0;
	for (i = 0; i < nproc; i++) {

		ps = &proc_info[i];

		if (!injob(pjob, ps->session))
			continue;

		resisize += ps->rss * pagesize;
	}

	return (resisize);
}

/**
 * @brief
 * 	Establish system-enforced limits for the job.
 *
 *	Run through the resource list, checking the values for all items
 *	we recognize.
 *
 * @param[in] pjob - job pointer
 * @param[in]  set_mode	- setting mode
 *
 *	If set_mode is SET_LIMIT_SET, then also set hard limits for the
 *			  system enforced limits (not-polled).
 *	If anything goes wrong with the process, return a PBS error code
 *	and print a message on standard error.  A zero-length resource list
 *	is not an error.
 *
 *	If set_mode is SET_LIMIT_SET the entry conditions are:
 *	    1.	MOM has already forked, and we are called from the child.
 *	    2.	The child is still running as root.
 *	    3.  Standard error is open to the user's file.
 *
 *	If set_mode is SET_LIMIT_ALTER, we are beening called to modify
 *	existing limits.  Cannot alter those set by setrlimit (kernel)
 *	because we are the wrong process.
 *
 * @return	int
 * @retval	PBSE_NONE	Success
 * @retval	PBSE_*		Error
 *
 */
int
mom_set_limits(job *pjob, int set_mode)
{
	char *pname;
	int retval;
	unsigned long value; /* place in which to build resource value */
	resource *pres;
	struct rlimit reslim;
	unsigned long mem_limit = 0;
	unsigned long vmem_limit = 0;
	unsigned long cput_limit = 0;

	DBPRT(("%s: entered\n", __func__))
	assert(pjob != NULL);
	assert((get_jattr(pjob, JOB_ATR_resource))->at_type == ATR_TYPE_RESC);
	pres = (resource *) GET_NEXT(get_jattr_list(pjob, JOB_ATR_resource));

	/*
	 * Cycle through all the resource specifications,
	 * setting limits appropriately.
	 */

	/* mem and vmem limits come from the local node limits, not the job */
	mem_limit = pjob->ji_hosts[pjob->ji_nodeid].hn_nrlimit.rl_mem << 10;
	vmem_limit = pjob->ji_hosts[pjob->ji_nodeid].hn_nrlimit.rl_vmem << 10;

	while (pres != NULL) {
		assert(pres->rs_defin != NULL);
		pname = pres->rs_defin->rs_name;
		assert(pname != NULL);
		assert(*pname != '\0');

		if (strcmp(pname, "cput") == 0 ||
		    strcmp(pname, "pcput") == 0) {
			retval = local_gettime(pres, &value);
			if (retval != PBSE_NONE)
				return (error(pname, retval));
			if ((cput_limit == 0) || (value < cput_limit))
				cput_limit = value;
		} else if (strcmp(pname, "pvmem") == 0) {
			retval = local_getsize(pres, &value);
			if (retval != PBSE_NONE)
				return (error(pname, retval));
			if ((vmem_limit == 0) || (value < vmem_limit))
				vmem_limit = value;
		} else if (strcmp(pname, "pmem") == 0) { /* set */
			retval = local_getsize(pres, &value);
			if (retval != PBSE_NONE)
				return (error(pname, retval));
			if ((mem_limit == 0) || (value < mem_limit))
				mem_limit = value;
		} else if (strcmp(pname, "walltime") == 0) { /* Check */
			retval = local_gettime(pres, &value);
			if (retval != PBSE_NONE)
				return (error(pname, retval));
		} else if (strcmp(pname, "nice") == 0) { /* set nice */
			if (set_mode == SET_LIMIT_SET) {
				errno = 0;
				if ((nice((int) pres->rs_value.at_val.at_long) == -1) && (errno != 0))
					return (error(pname, PBSE_BADATVAL));
			}
		} else if (strcmp(pname, "file") == 0) { /* set */
			if (set_mode == SET_LIMIT_SET) {
				retval = local_getsize(pres, &value);
				if (retval != PBSE_NONE)
					return (error(pname, retval));
				reslim.rlim_cur = reslim.rlim_max = value;
				if (setrlimit(RLIMIT_FSIZE, &reslim) < 0)
					return (error(pname, PBSE_SYSTEM));
			}
		}
		pres = (resource *) GET_NEXT(pres->rs_link);
	}

	if (set_mode == SET_LIMIT_SET) {
		/* if either vmem or pvmem was given, set sys limit to lesser */
		if (vmem_limit != 0) {
			reslim.rlim_cur = reslim.rlim_max = vmem_limit;
			if (setrlimit(RLIMIT_AS, &reslim) < 0)
				return (error("RLIMIT_AS", PBSE_SYSTEM));
		}

		/* if either mem or pmem was given, set sys limit to lesser */
		if (mem_limit != 0) {
			reslim.rlim_cur = reslim.rlim_max = mem_limit;
			if (setrlimit(RLIMIT_RSS, &reslim) < 0)
				return (error("RLIMIT_RSS", PBSE_SYSTEM));
		}

		/* if either cput or pcput was given, set sys limit to lesser */
		if (cput_limit != 0) {
			reslim.rlim_cur = reslim.rlim_max =
				(unsigned long) ((double) cput_limit / cputfactor);
			if (setrlimit(RLIMIT_CPU, &reslim) < 0)
				return (error("RLIMIT_CPU", PBSE_SYSTEM));
		}
	}
	return (PBSE_NONE);
}

/**
 * @brief
 * 	State whether MOM main loop has to poll this job to determine if some
 * 	limits are being exceeded.
 *
 * @param[in] pjob - job pointer
 *
 * @return	int
 * @retval	TRUE	if polling is necessary
 * @retval	FALSE 	otherwise.
 *
 * NOTE: Actual polling is done using the mom_over_limit machine-dependent function.
 *
 */
int
mom_do_poll(job *pjob)
{
	char *pname;
	resource *pres;

	DBPRT(("%s: entered\n", __func__))
	assert(pjob != NULL);
	assert((get_jattr(pjob, JOB_ATR_resource))->at_type == ATR_TYPE_RESC);
	pres = (resource *) GET_NEXT(get_jattr_list(pjob, JOB_ATR_resource));

	while (pres != NULL) {
		assert(pres->rs_defin != NULL);
		pname = pres->rs_defin->rs_name;
		assert(pname != NULL);
		assert(*pname != '\0');

		if (strcmp(pname, "walltime") == 0 ||
		    strcmp(pname, "cput") == 0 ||
		    strcmp(pname, "mem") == 0 ||
		    strcmp(pname, "vmem") == 0 ||
		    strcmp(pname, "ncpus") == 0)
			return (TRUE);
		pres = (resource *) GET_NEXT(pres->rs_link);
	}

	return (FALSE);
}

/**
 * @brief
 * 	Setup for polling.
 *	Open kernel device and get namelist info.
 *
 * @return	int
 * @retval	PBSE_NONE		Success
 * @retval	PBSE_SYSTEM		Error
 *
 */
int
mom_open_poll(void)
{
	DBPRT(("%s: entered\n", __func__))
	pagesize = getpagesize();
	proc_info = (proc_stat_t *) malloc(sizeof(proc_stat_t) * TBL_INC);
	if (proc_info == NULL) {
		log_err(errno, __func__, "malloc");
		return (PBSE_SYSTEM);
	}
	max_proc = TBL_INC;

	return (PBSE_NONE);
}

/**
 * @brief
 * 	Declare start of polling loop.
 *
 * @return	int
 * @retval	PBSE_INTERNAL	Dir pdir in NULL
 * @retval	PBSE_NONE	Success
 *
 */
int
mom_get_sample(void)
{
	struct dirent *dent = NULL;
	FILE *fd = NULL;
	static char path[MAXPATHLEN + 1];
	char procname[MAXPATHLEN + 1]; /* space for dent->d_name plus extra */
	char procid[MAXPATHLEN + 1];
	struct stat sb;
	proc_stat_t *ps = NULL;
	int nprocs = 0;
	int ncached = 0;
	int ncantstat = 0;
	int nnomem = 0;
	unsigned long long starttime;
	int nskipped = 0;
	extern time_t time_last_sample;
	char *stat_str = NULL;

	/* There are no job tasks created in mock run mode, so no need to walk the proc table */
	if (mock_run)
		return PBSE_NONE;

	DBPRT(("%s: entered\n", __func__))
	if (pdir == NULL)
		return PBSE_INTERNAL;

	rewinddir(pdir);
	nproc = 0;
	fd = NULL;
	if (hz == 0)
		hz = sysconf(_SC_CLK_TCK);
	time_last_sample = time(0);
	sampletime_floor = time_last_sample;
	while (errno = 0, (dent = readdir(pdir)) != NULL) {
		int nomem = 0;
		struct stat sbuf;

		nprocs++;

		/*
		 ** Check to see if we have /proc/pid or /proc/.pid
		 */
		if (!isdigit(dent->d_name[0])) {
			if (dent->d_name[0] == '.' && isdigit(dent->d_name[1])) {
				nomem = 1;
				nnomem++;
			} else
				continue;
		}
		snprintf(procid, sizeof(procid), "/proc/%s", dent->d_name);
		if ((stat(procid, &sbuf) == -1) || (sbuf.st_uid == 0)) {
			/* ignore root-owned processes */
			nskipped++;
			continue;
		}
		snprintf(procname, sizeof(procname), "/proc/%s/stat", dent->d_name);

		if ((fd = fopen(procname, "r")) == NULL) {
			ncantstat++;
			continue;
		}

		ps = &proc_info[nproc];
		stat_str = choose_procflagsfmt();
		if (stat_str == NULL) {
			log_err(errno, __func__, "choose_procflagsfmt allocation failed");
			return PBSE_INTERNAL;
		}
		if (fscanf(fd, stat_str,
			   &ps->pid,	 /* "%d "	1  pid %d The process id */
			   path,	 /* "(%[^)]) "	2  comm %s The filename of the executable */
			   &ps->state,	 /* "%c "	3  state %c "RSDZTW" */
			   &ps->ppid,	 /* "%d "	4  ppid %d The PID of the parent */
			   &ps->pgrp,	 /* "%d "	5  pgrp %d The process group ID */
			   &ps->session, /* "%d "	6  session %d The session ID */
			   /* "%*d "	7  ignored:  tty_nr */
			   /* "%*d "	8  ignored:  tpgid */
			   &ps->flags, /* "%u or %lu"	9  flags */
			   /* "%*lu "	10 ignored:  minflt */
			   /* "%*lu "	11 ignored:  cminflt */
			   /* "%*lu "	12 ignored:  majflt */
			   /* "%*lu "	13 ignored:  cmajflt */
			   &ps->utime,	/* "%lu "	14 utime %lu */
			   &ps->stime,	/* "%lu "	15 stime %lu */
			   &ps->cutime, /* "%ld "	16 cutime %ld */
			   &ps->cstime, /* "%ld "	17 cstime %ld */
			   /* "%*ld "	18 ignored:  priority %ld */
			   /* "%*ld "	19 ignored:  nice %ld */
			   /* "%*ld "	20 ignored:  num_threads %ld */
			   /* "%*ld "	21 ignored:  itrealvalue %ld - no longer maintained */
			   &starttime, /* "%llu "	22 starttime (was %lu before Linux 2.6 - see proc(5) for conversion details */
			   &ps->vsize, /* "%lu "	23 vsize (bytes) */
			   &ps->rss    /* "%ld "	24 rss (number of pages) */
			   ) != 14) {
			ncantstat++;
			fclose(fd);
			continue;
		}

		if (fstat(fileno(fd), &sb) == -1) {
			fclose(fd);
			continue;
		}
		ps->uid = sb.st_uid;
		fclose(fd);

		/*
		 ** A .pid thread shows the memory of the process
		 ** but we only want to count it once.
		 */
		if (nomem) {
			ps->vsize = 0;
			ps->rss = 0;
		}

		ps->start_time = linux_time + (starttime / hz);
		snprintf(ps->comm, sizeof(ps->comm), "%.*s",
			 (int) (sizeof(ps->comm) - 1), path);

		ps->utime = JTOS(ps->utime);
		ps->stime = JTOS(ps->stime);
		ps->cutime = JTOS(ps->cutime);
		ps->cstime = JTOS(ps->cstime);
		if (++nproc == max_proc) {
			void *hold;
			DBPRT(("%s: alloc more proc table space %d\n", __func__, nproc))
			max_proc += TBL_INC;
			hold = realloc((void *) proc_info,
				       max_proc * sizeof(proc_stat_t));
			assert(hold != NULL);
			proc_info = (proc_stat_t *) hold;
		}
	}
	if (errno != 0 && errno != ENOENT)
		log_err(errno, __func__, "readdir");
	sampletime_ceil = time_last_sample;
	sprintf(log_buffer,
		"nprocs:  %d, cantstat:  %d, nomem:  %d, skipped:  %d, "
		"cached:  %d",
		nprocs - 2, ncantstat, nnomem, nskipped,
		ncached);
	log_event(PBSEVENT_DEBUG4, 0, LOG_DEBUG, __func__, log_buffer);
	return (PBSE_NONE);
}

/**
 * @brief
 * 	Update the resources used.<attributes> of a job.
 *
 * @param[in]	pjob - job in question.
 *
 * @note
 *	The first time this is called for a job, set up resource entries for
 *	each resource that can be reported for this machine.  Fill in the
 *	correct values.
 *	If a resource attribute has been set in a mom hook, then its value
 *	will not be updated here. This allows a mom  hook to override
 *	resource value.
 *
 * @return int
 * @retval PBSE_NONE	for success.
 */
int
mom_set_use(job *pjob)
{
	resource *pres;
	resource *pres_req;
	attribute *at;
	attribute *at_req;
	resource_def *rd;
	u_Long *lp_sz, lnum_sz;
	unsigned long *lp, lnum, oldcput;
	long ncpus_req;

	assert(pjob != NULL);
	at = get_jattr(pjob, JOB_ATR_resc_used);
	assert(at->at_type == ATR_TYPE_RESC);

	if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) != 0)
		return (PBSE_NONE); /* job suspended, don't track it */

	DBPRT(("%s: entered %s\n", __func__, pjob->ji_qs.ji_jobid))

	at->at_flags |= (ATR_VFLAG_MODIFY | ATR_VFLAG_SET);

	rd = &svr_resc_def[RESC_NCPUS];
	pres = find_resc_entry(at, rd);
	if (pres == NULL) {
		pres = add_resource_entry(at, rd);
		mark_attr_set(&pres->rs_value);
		pres->rs_value.at_type = ATR_TYPE_LONG;

		/*
		 * get pointer to list of resources *requested* for the job
		 * so the ncpus used can be set to ncpus requested
		 */
		at_req = get_jattr(pjob, JOB_ATR_resource);
		assert(at->at_type == ATR_TYPE_RESC);

		pres_req = find_resc_entry(at_req, rd);
		if ((pres_req != NULL) &&
		    ((ncpus_req = pres_req->rs_value.at_val.at_long) != 0))
			pres->rs_value.at_val.at_long = ncpus_req;
		else
			pres->rs_value.at_val.at_long = 0;
	}

	rd = &svr_resc_def[RESC_CPUT];
	pres = find_resc_entry(at, rd);
	if (pres == NULL) {
		pres = add_resource_entry(at, rd);
		mark_attr_set(&pres->rs_value);
		pres->rs_value.at_type = ATR_TYPE_LONG;
		pres->rs_value.at_val.at_long = 0;
	}
	lp = (unsigned long *) &pres->rs_value.at_val.at_long;
	oldcput = *lp;
	lnum = cput_sum(pjob);
	lnum = MAX(*lp, lnum);
	if ((pres->rs_value.at_flags & ATR_VFLAG_HOOK) == 0) {
		/* don't conflict with hook setting a value */
		*lp = lnum;
	}

	rd = &svr_resc_def[RESC_CPUPERCENT];
	pres = find_resc_entry(at, rd);
	if (pres == NULL) {
		pres = add_resource_entry(at, rd);
		mark_attr_set(&pres->rs_value);
		pres->rs_value.at_type = ATR_TYPE_LONG;
		pres->rs_value.at_val.at_long = 0;
	}
	if ((pres->rs_value.at_flags & ATR_VFLAG_HOOK) == 0) {
		/* now calculate weighted moving average cpu usage */
		/* percentage */
		calc_cpupercent(pjob, oldcput, lnum, sampletime_ceil);
	}
	pjob->ji_sampletim = sampletime_floor;

	rd = &svr_resc_def[RESC_VMEM];
	pres = find_resc_entry(at, rd);
	if (pres == NULL) {
		pres = add_resource_entry(at, rd);
		mark_attr_set(&pres->rs_value);
		pres->rs_value.at_type = ATR_TYPE_SIZE;
		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */
		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;
	} else if ((pres->rs_value.at_flags & ATR_VFLAG_HOOK) == 0) {
		lp_sz = &pres->rs_value.at_val.at_size.atsv_num;
		lnum_sz = (mem_sum(pjob) + 1023) >> 10; /* as KB */
		*lp_sz = MAX(*lp_sz, lnum_sz);
	}

	/* update walltime usage */
	update_walltime(pjob);

	rd = &svr_resc_def[RESC_MEM];
	pres = find_resc_entry(at, rd);
	if (pres == NULL) {
		pres = add_resource_entry(at, rd);
		mark_attr_set(&pres->rs_value);
		pres->rs_value.at_type = ATR_TYPE_SIZE;
		pres->rs_value.at_val.at_size.atsv_shift = 10; /* KB */
		pres->rs_value.at_val.at_size.atsv_units = ATR_SV_BYTESZ;
	} else if ((pres->rs_value.at_flags & ATR_VFLAG_HOOK) == 0) {
		lp_sz = &pres->rs_value.at_val.at_size.atsv_num;
		lnum_sz = (resi_sum(pjob) + 1023) >> 10; /* as KB */
		*lp_sz = MAX(*lp_sz, lnum_sz);
	}

	return (PBSE_NONE);
}

/**
 * @brief
 * 	bld_ptree - establish links (parent, child, and sibling) for processes
 * 	in a given session.
 *
 *	The PBS_PROC_* macros are defined in resmom/.../mom_mach.h
 *	to refer to the correct machine dependent table.
 *	Linkage scope changed from static to default as this gets referred
 *	from scan_for_terminated(), declaration	added in the mom_mach.h.
 *
 * @param[in] sid - session id
 *
 * @return	int
 * @retval	number of processes in session	Success
 *
 */
int
bld_ptree(pid_t sid)
{
	int myproc_ct; /* count of processes in a session */
	int i, j;

	if (Proc_lnks == NULL) {
		Proc_lnks = (pbs_plinks *) malloc(TBL_INC * sizeof(pbs_plinks));
		assert(Proc_lnks != NULL);
		myproc_max = TBL_INC;
	}

	/*
	 * Build links for processes in the session in question.
	 * First, load with the processes in the session.
	 */

	myproc_ct = 0;
	for (i = 0; i < nproc; i++) {
		if (PBS_PROC_PID(i) <= 1)
			continue;
		if ((int) PBS_PROC_SID(i) == sid) {
			Proc_lnks[myproc_ct].pl_pid = PBS_PROC_PID(i);
			Proc_lnks[myproc_ct].pl_ppid = PBS_PROC_PPID(i);
			Proc_lnks[myproc_ct].pl_parent = -1;
			Proc_lnks[myproc_ct].pl_sib = -1;
			Proc_lnks[myproc_ct].pl_child = -1;
			Proc_lnks[myproc_ct].pl_done = 0;
			if (++myproc_ct == myproc_max) {
				void *hold;

				myproc_max += TBL_INC;
				hold = realloc((void *) Proc_lnks,
					       myproc_max * sizeof(pbs_plinks));
				assert(hold != NULL);
				Proc_lnks = (pbs_plinks *) hold;
			}
		}
	}

	/* Now build the tree for those processes */

	for (i = 0; i < myproc_ct; i++) {
		/*
		 * Find all the children for this process, establish links.
		 */
		for (j = 0; j < myproc_ct; j++) {
			if (j == i)
				continue;
			if (Proc_lnks[j].pl_ppid == Proc_lnks[i].pl_pid) {
				Proc_lnks[j].pl_parent = i;
				Proc_lnks[j].pl_sib = Proc_lnks[i].pl_child;
				Proc_lnks[i].pl_child = j;
			}
		}
	}
	return (myproc_ct); /* number of processes in session */
}

/**
 * @brief
 * 	kill_ptree - traverse the process tree, killing the processes as we go
 *
 * @param[in]	idx:	current pid index
 * @param[in]	flag:	traverse order, top down (1) or bottom up (0)
 * @param[in]	sig:	the signal to send
 *
 * @return	Void
 *
 */
static void
kill_ptree(int idx, int flag, int sig)
{
	pid_t child;

	if (flag && !Proc_lnks[idx].pl_done) { /* top down */
		DBPRT(("%s: top down %d\n", __func__, Proc_lnks[idx].pl_pid));
		(void) kill(Proc_lnks[idx].pl_pid, sig);
		Proc_lnks[idx].pl_done = 1;
	}
	child = Proc_lnks[idx].pl_child;
	while (child != -1) {
		kill_ptree(child, flag, sig);
		child = Proc_lnks[child].pl_sib;
	}
	if (!flag && !Proc_lnks[idx].pl_done) { /* bottom up */
		DBPRT(("%s: bottom up %d\n", __func__, Proc_lnks[idx].pl_pid));
		(void) kill(Proc_lnks[idx].pl_pid, sig);
		Proc_lnks[idx].pl_done = 1;
	}
}
/**
 * @brief
 *	kill task session
 *
 * @param[in] ptask - pointer to pbs_task structure
 * @param[in] sig - signal number
 * @param[in] dir - indication how to kill
 *		    0 - kill child first
 *		    1 - kill parent first
 *
 * @return	int
 * @retval	number of tasks
 *
 */
int
kill_task(pbs_task *ptask, int sig, int dir)
{
	return kill_session(ptask->ti_qs.ti_sid, sig, dir);
}

/**
 * @brief
 *	Kill a task session.
 *	Call with the task pointer and a signal number.
 *
 * @param[in] sesid - session id
 * @param[in] sig - signal number
 * @param[in] dir - indication how to kill
 *                  0 - kill child first
 *	            1 - kill parent first
 *
 * @return	int
 * @retval      number of tasks
 *
 */
int
kill_session(pid_t sesid, int sig, int dir)
{
	int ct = 0;
	int i;

	DBPRT(("%s: entered sid %d\n", __func__, sesid))
	if (sesid <= 1)
		return 0;

	(void) mom_get_sample();
	ct = bld_ptree(sesid);
	DBPRT(("%s: bld_ptree %d\n", __func__, ct))

	/*
	 ** Find index into the Proc_lnks table for the session lead.
	 */
	for (i = 0; i < ct; i++) {
		if (Proc_lnks[i].pl_pid == sesid) {
			kill_ptree(i, dir, sig);
			break;
		}
	}
	/*
	 ** Do a linear pass.
	 */
	for (i = 0; i < ct; i++) {
		if (Proc_lnks[i].pl_done)
			continue;
		DBPRT(("%s: cleanup %d\n", __func__, Proc_lnks[i].pl_pid))
		kill(Proc_lnks[i].pl_pid, sig);
	}

	/*
	 ** Kill the process group in case anything was missed reading /proc
	 */
	if ((sig == SIGKILL) || (ct == 0))
		killpg(sesid, sig);

	return ct;
}

/**
 * @brief
 *	Clean up everything related to polling.
 *
 * @return	int
 * @retval	PBSE_NONE	Success
 * @retval	PBSE_SYSTEM	Error
 *
 */
int
mom_close_poll(void)
{
	DBPRT(("%s: entered\n", __func__))
	if (pdir) {
		if (closedir(pdir) != 0) {
			log_err(errno, __func__, "closedir");
			return (PBSE_SYSTEM);
		}
		pdir = NULL;
	}
	if (proc_info) {
		(void) free(proc_info);
		proc_info = NULL;
		max_proc = 0;
	}

	return (PBSE_NONE);
}

/**
 * @brief
 * 	Checkpoint the job.
 *
 * @param[in] ptask - pointer to task
 * @param[in] file - filename
 * @param[in] abort - value indicating abort
 *
 * If abort is true, kill it too.
 *
 * @return	int
 * @retval	-1
 */
int
mach_checkpoint(task *ptask, char *file, int abort)
{
	return (-1);
}

/**
 * @brief
 * 	Restart the job from the checkpoint file.
 *
 * @param[in] ptask - pointer to task
 * @param[in] file - filename
 *
 * @return      long
 * @retval      session id	Success
 * @retval	-1		Error
 */
long
mach_restart(task *ptask, char *file)
{
	return (-1);
}

/**
 * @brief
 *	Return 1 if proc table can be read, 0 otherwise.
 */
int
getprocs(void)
{
	static unsigned int lastproc = 0;

	if (lastproc == reqnum) /* don't need new proc table */
		return 1;

	if (mom_get_sample() != PBSE_NONE)
		return 0;

	lastproc = reqnum;
	return 1;
}

#define dsecs(val) ((double) (val))

/**
 * @brief
 *	computes and returns the cpu time process with  pid jobid
 *
 * @param[in] jobid - process id for job
 *
 * @return	string
 * @retval	cputime		Success
 * @retval	NULL		Error
 *
 */
char *
cput_job(pid_t jobid)
{
	int i;
	int found = 0;
	double cputime, addtime;
	proc_stat_t *ps;

	cputime = 0.0;
	for (i = 0; i < nproc; i++) {

		ps = &proc_info[i];
		if (jobid != ps->session)
			continue;

		found = 1;
		addtime = dsecs(ps->cutime) + dsecs(ps->cstime);

		cputime += addtime;
		DBPRT(("%s: total %.2f pid %d %.2f\n", __func__, cputime,
		       ps->pid, addtime))
	}
	if (found) {
		sprintf(ret_string, "%.2f", cputime * cputfactor);
		return ret_string;
	}

	rm_errno = RM_ERR_EXIST;
	return NULL;
}

/**
 * @brief
 *      computes and returns the cpu time process with  pid pid.
 *
 * @param[in] pid - process id
 *
 * @return      string
 * @retval      cputime         Success
 * @retval      NULL            Error
 *
 */
char *
cput_proc(pid_t pid)
{
	int i;
	double cputime;
	proc_stat_t *ps = NULL;

	mom_get_sample();
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];
		if (ps->pid == pid)
			break;
	}
	if (i == nproc) {
		rm_errno = RM_ERR_EXIST;
		return NULL;
	}
	cputime = dsecs(ps->utime) + dsecs(ps->stime);

	sprintf(ret_string, "%.2f", cputime * cputfactor);
	return ret_string;
}

/**
 * @brief
 *	wrapper function for cput_proc and cput_job.
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return	string
 * @retval	cputime		Success
 * @retval	NULL		ERRor
 *
 */
char *
cput(struct rm_attribute *attrib)
{
	int value;

	if (attrib == NULL) {
		log_err(-1, __func__, no_parm);
		rm_errno = RM_ERR_NOPARAM;
		return NULL;
	}
	if ((value = atoi(attrib->a_value)) == 0) {
		sprintf(log_buffer, "bad param: %s", attrib->a_value);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if (momgetattr(NULL)) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (strcmp(attrib->a_qualifier, "session") == 0)
		return (cput_job((pid_t) value));
	else if (strcmp(attrib->a_qualifier, "proc") == 0)
		return (cput_proc((pid_t) value));
	else {
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
}

/**
 * @brief
 *      computes and returns the memory for session with  pid sid..
 *
 * @param[in] sid - process id
 *
 * @return      string
 * @retval      memsize         Success
 * @retval      NULL            Error
 *
 */
char *
mem_job(pid_t sid)
{
	unsigned long memsize;
	int i;
	proc_stat_t *ps;

	memsize = 0;

	mom_get_sample();
	for (i = 0; i < nproc; i++) {

		ps = &proc_info[i];

		if (sid != ps->session)
			continue;
		memsize += ps->vsize;
	}

	if (memsize == 0) {
		rm_errno = RM_ERR_EXIST;
		return NULL;
	} else {
		sprintf(ret_string, "%lukb", memsize >> 10); /* KB */
		return ret_string;
	}
}

/**
 * @brief
 *      computes and returns the memory for process with  pid sid..
 *
 * @param[in] pid - process id
 *
 * @return      string
 * @retval      memsize         Success
 * @retval      NULL            Error
 *
 */
char *
mem_proc(pid_t pid)
{
	int i;
	proc_stat_t *ps = NULL;

	mom_get_sample();
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];
		if (ps->pid == pid)
			break;
	}
	if (i == nproc) {
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}

	sprintf(ret_string, "%lukb", (unsigned long) ps->vsize >> 10); /* KB */
	return ret_string;
}

/**
 * @brief
 *      wrapper function for mem_job and mem_proc..
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return      string
 * @retval      memsize         Success
 * @retval      NULL            ERRor
 *
 */
char *
mem(struct rm_attribute *attrib)
{
	int value;

	if (attrib == NULL) {
		log_err(-1, __func__, no_parm);
		rm_errno = RM_ERR_NOPARAM;
		return NULL;
	}
	if ((value = atoi(attrib->a_value)) == 0) {
		sprintf(log_buffer, "bad param: %s", attrib->a_value);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if (momgetattr(NULL)) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (strcmp(attrib->a_qualifier, "session") == 0)
		return (mem_job((pid_t) value));
	else if (strcmp(attrib->a_qualifier, "proc") == 0)
		return (mem_proc((pid_t) value));
	else {
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
}

/**
 * @brief
 *	computes and returns resident set size for job
 *
 * @param[in] jobid - pid for job
 *
 * @return	string
 * @retval	resident set size	Success
 * @retval	NULL			Error
 *
 */
static char *
resi_job(pid_t jobid)
{
	int i;
	unsigned long resisize;
	int found = 0;
	proc_stat_t *ps;

	resisize = 0;
	mom_get_sample();

	for (i = 0; i < nproc; i++) {

		ps = &proc_info[i];

		if (jobid != ps->session)
			continue;

		found = 1;
		resisize += ps->rss;
	}
	if (found) {
		/* in KB */
		sprintf(ret_string, "%lukb", (resisize * (unsigned long) pagesize) >> 10);
		return ret_string;
	}

	rm_errno = RM_ERR_EXIST;
	return NULL;
}

/**
 * @brief
 *      computes and returns resident set size for process
 *
 * @param[in] pid - process id
 *
 * @return      string
 * @retval      resident set size       Success
 * @retval      NULL                    Error
 *
 */
static char *
resi_proc(pid_t pid)
{
	int i;
	proc_stat_t *ps = NULL;

	mom_get_sample();
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];
		if (ps->pid == pid)
			break;
	}
	if (i == nproc) {
		rm_errno = RM_ERR_EXIST;
		return NULL;
	}
	/* in KB */
	sprintf(ret_string, "%lukb", ((unsigned long) ps->rss * (unsigned long) pagesize) >> 10);
	return ret_string;
}

/**
 * @brief
 *      wrapper function for mem_job and mem_proc..
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return      string
 * @retval      resident set size     	Success
 * @retval      NULL            	ERRor
 *
 */
static char *
resi(struct rm_attribute *attrib)
{
	int value;

	if (attrib == NULL) {
		log_err(-1, __func__, no_parm);
		rm_errno = RM_ERR_NOPARAM;
		return NULL;
	}
	if ((value = atoi(attrib->a_value)) == 0) {
		sprintf(log_buffer, "bad param: %s", attrib->a_value);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if (momgetattr(NULL)) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (strcmp(attrib->a_qualifier, "session") == 0)
		return (resi_job((pid_t) value));
	else if (strcmp(attrib->a_qualifier, "proc") == 0)
		return (resi_proc((pid_t) value));
	else {
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
}

/**
 * @brief
 *	returns the number of sessions
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return	string
 * @retval	sessions	Success
 * @retval	NULL		error
 *
 */
char *
sessions(struct rm_attribute *attrib)
{
	char *fmt;
	int i, j;
	proc_stat_t *ps;
	int njids = 0;
	pid_t *jids, *hold;
	static int maxjid = 200;
	register pid_t jobid;

	if (attrib) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if ((jids = (pid_t *) calloc(maxjid, sizeof(pid_t))) == NULL) {
		log_err(errno, __func__, "no memory");
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}

	mom_get_sample();

	/*
	 ** Search for members of session
	 */
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];

		if (ps->uid == 0)
			continue;
		if ((jobid = ps->session) == 0)
			continue;
		DBPRT(("%s[%d]: pid %d sid %d\n",
		       __func__, njids, ps->pid, jobid))

		for (j = 0; j < njids; j++) {
			if (jids[j] == jobid)
				break;
		}
		if (j == njids) {	       /* not found */
			if (njids == maxjid) { /* need more space */
				maxjid += 100;
				hold = (pid_t *) realloc(jids, maxjid);
				if (hold == NULL) {
					log_err(errno, __func__, "realloc");
					rm_errno = RM_ERR_SYSTEM;
					free(jids);
					return NULL;
				}
				jids = hold;
			}
			jids[njids++] = jobid; /* add jobid to list */
		}
	}

	fmt = ret_string;
	for (j = 0; j < njids; j++) {
		checkret(&fmt, 100);
		sprintf(fmt, " %d", (int) jids[j]);
		fmt += strlen(fmt);
	}
	free(jids);
	return ret_string;
}

/**
 * @brief
 *	wrapper function for sessions().
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return      string
 * @retval      sessions        Success
 * @retval      0           	error
 *
 */
char *
nsessions(struct rm_attribute *attrib)
{
	char *result, *ch;
	int num = 0;

	if ((result = sessions(attrib)) == NULL)
		return result;

	for (ch = result; *ch; ch++) {
		if (*ch == ' ') /* count blanks */
			num++;
	}
	sprintf(ret_string, "%d", num);
	return ret_string;
}

/**
 * @brief
 *      returns the number of processes in session
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return      string
 * @retval      process        Success
 * @retval      NULL            error
 *
 */
char *
pids(struct rm_attribute *attrib)
{
	char *fmt;
	int i;
	pid_t jobid;
	proc_stat_t *ps;
	int num_pids;

	if (attrib == NULL) {
		log_err(-1, __func__, no_parm);
		rm_errno = RM_ERR_NOPARAM;
		return NULL;
	}
	if ((jobid = (pid_t) atoi(attrib->a_value)) == 0) {
		sprintf(log_buffer, "bad param: %s", attrib->a_value);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if (momgetattr(NULL)) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (strcmp(attrib->a_qualifier, "session") != 0) {
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	mom_get_sample();

	/*
	 ** Search for members of session
	 */
	fmt = ret_string;
	num_pids = 0;

	for (i = 0; i < nproc; i++) {

		ps = &proc_info[i];
		DBPRT(("%s[%d]: pid: %d sid %d\n",
		       __func__, num_pids, ps->pid, ps->session))
		if (jobid != ps->session)
			continue;

		sprintf(fmt, "%d ", ps->pid);
		fmt += strlen(fmt);
		num_pids++;
	}
	if (num_pids == 0) {
		rm_errno = RM_ERR_EXIST;
		return NULL;
	}
	return ret_string;
}

/**
 * @brief
 *      returns the number of users
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return      string
 * @retval      users        Success
 * @retval      NULL            error
 *
 */
char *
nusers(struct rm_attribute *attrib)
{
	int i;
	int j;
	proc_stat_t *ps;
	int nuids = 0;
	uid_t *uids, *hold;
	static int maxuid = 200;
	register uid_t uid;

	if (attrib) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if ((uids = (uid_t *) calloc(maxuid, sizeof(uid_t))) == NULL) {
		log_err(errno, __func__, "no memory");
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}

	mom_get_sample();
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];

		if ((uid = ps->uid) == 0)
			continue;

		DBPRT(("%s[%d]: pid %d uid %d\n",
		       __func__, nuids, ps->pid, uid))

		for (j = 0; j < nuids; j++) {
			if (uids[j] == uid)
				break;
		}
		if (j == nuids) {	       /* not found */
			if (nuids == maxuid) { /* need more space */
				maxuid += 100;
				hold = (uid_t *) realloc(uids, maxuid);
				if (hold == NULL) {
					log_err(errno, __func__, "realloc");
					rm_errno = RM_ERR_SYSTEM;
					free(uids);
					return NULL;
				}
				uids = hold;
			}
			uids[nuids++] = uid; /* add uid to list */
		}
	}

	sprintf(ret_string, "%d", nuids);
	free(uids);
	return ret_string;
}

/**
 * @brief
 *	returns all the process ids
 *
 * @return	pid_t
 * @retval	pids	Success
 * @retval	NULl	Error
 *
 */
pid_t *
allpids(void)
{
	int i;
	proc_stat_t *ps;
	static pid_t *pids = NULL;

	getprocs();

	if (pids != NULL)
		free(pids);
	if ((pids = (pid_t *) calloc(nproc + 1, sizeof(pid_t))) == NULL) {
		log_err(errno, __func__, "no memory");
		return NULL;
	}

	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];

		pids[i] = ps->pid; /* add pid to list */
	}
	pids[nproc] = -1;
	return pids;
}

/**
 * @brief
 *	 return amount of total memory on system in KB as numeric string
 *
 * @return      string
 * @retval      total memory    	Success
 * @retval      NULl    		Error
 *
 */
static char *
totmem(struct rm_attribute *attrib)
{
	proc_mem_t *mm;

	if (attrib) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if ((mm = get_proc_mem()) == NULL) {
		log_err(errno, __func__, "get_proc_mem");
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}
	DBPRT(("%s: total mem=%lu\n", __func__, mm->total))
	sprintf(ret_string, "%lukb", (unsigned long) mm->total >> 10); /* KB */
	return ret_string;
}

/**
 * @brief
 *      returns available free process memory
 *
 * @return      string
 * @retval      avbl free process memory		Success
 * @retval      NULl 					Error
 *
 */
static char *
availmem(struct rm_attribute *attrib)
{
	proc_mem_t *mm;

	if (attrib) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if ((mm = get_proc_mem()) == NULL) {
		log_err(errno, __func__, "get_proc_mem");
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}
	DBPRT(("%s: free mem=%lu\n", __func__, mm->free))
	sprintf(ret_string, "%lukb", (unsigned long) mm->free >> 10); /* KB */
	return ret_string;
}

/**
 * @brief	find and remember the current Linux release number
 * @param[in]	struct utsname *
 *
 * @return	value returned by uname(2)'s utsname release[] member
 */
static char *
uname2release(struct utsname *u)
{
	static char *u_release = NULL;

	if (u_release != NULL)
		return (u_release);
	else if ((u_release = malloc(strlen(u->release) + 1)) != NULL) {
		memcpy(u_release, u->release, strlen(u->release) + 1);
		sprintf(log_buffer, "uname release:  %s", u_release);
		log_event(PBSEVENT_DEBUG4, 0, LOG_DEBUG, __func__, log_buffer);
		return (u_release);
	} else
		return NULL;
}

/**
 * @brief	choose the format for the /proc "flags" field
 * @param[in]	release
 * @param[out]	stdio format string
 *
 * @return	"%lu" for /proc before Linux version 2.6.22
 * @return	"%u" for  /proc Linux version 2.6.22 and later
 * *
 * @note	To derive release information, we're at the mercy of whoever
 *		configures the kernel's UTS_RELEASE value when it's built.
 *		We hope that the version information is in the format
 *		<major>.<minor>.<micro>, or - if not - that at least we can
 *		depend on sscanf() to throw away extraneous characters and
 *		derive a number for the "micro" version that can be used to
 *		leverage proc(5)'s "%u (%lu before Linux 2.6.22)" flags
 *		field format specification.
 *
 *		This code is not designed to work for Linux versions < 2.
 *
 * @par MT-Safe:	yes
 */
static char *
procflagsfmt(char *release)
{
	char *p;
	char *ver_begin = release;
	char rfseparator_dot = '.';
	char rfseparator_dash = '-';
	int nseparators_seen = 0;
	int major, minor, micro, ver;
	static char before[] = "%lu";
	static char after[] = "%u";

	for (p = release; *p != '\0'; p++) {
		if ((*p == rfseparator_dot) || (*p == rfseparator_dash)) {
			p++;
			if (sscanf(ver_begin, "%d", &ver) == 1) {
				if (nseparators_seen == 0) {
					major = ver;
					if (major > 2)
						return (after);
				} else if (nseparators_seen == 1) {
					minor = ver;
					if (minor > 6)
						return (after);
				} else {
					micro = ver;
					/* "flags %u (%lu before Linux 2.6.22)" */
					if ((minor == 6) && (micro >= 22))
						return (after);
					else
						return (before);
				}
			}
			ver_begin = p;
			nseparators_seen++;
		}
	}

	return NULL;
}

/**
 * @brief	return the stdio format directive for the /proc flags field
 *
 * @param[out]	format string for the /proc flags field
 *
 * @return	static char *
 *
 * @see	procflagsfmt
 * @see	uname2release
 */
static char *
choose_procflagsfmt(void)
{
	char buf[1024];
	static char *fmtstr = NULL;
	static int initialized = 0;
	struct utsname u;

	if (initialized)
		return (fmtstr);

	if (uname(&u) == -1) {
		log_err(errno, __func__, "uname");
		return NULL;
	} else {
		char *release;
		char *fffs; /* the flags field format string */

		if ((release = uname2release(&u)) == NULL) {
			log_err(-1, __func__, "uname2release returned NULL");
			return NULL;
		} else if ((fffs = procflagsfmt(release)) == NULL) {
			log_err(-1, __func__, "procflagsfmt returned NULL");
			return NULL;
		} else {
			sprintf(buf, stat_str_pre, fffs);
			if ((fmtstr = strdup(buf)) == NULL) {
				log_err(-1, __func__, "strdup returned NULL");
				return NULL;
			} else {
				initialized = 1;
				return (fmtstr);
			}
		}
	}
}

#else /* PBSMOM_HTUNIT */

/*
 **	This is code to compile the ncpus function for unit testing.
 **	What follows is a bit of cruft needed to make a correct program.
 */
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <assert.h>
#include <sys/utsname.h>

char log_buffer[4096];
char ret_string[4096];

/**
 * @brief
 *	outputs logevent on stdout
 *
 * @param[in] a - event  number
 * @param[in] b - event number
 * @param[in] c - type of log
 * @param[in] id - id to indicate log from which object
 * @param[in] mess - message to be logged
 *
 * @return	Void
 *
 */
void
log_event(int a, int b, int c, char *id, char *mess)
{
	printf("%s: %s\n", id, mess);
}
/**
 * @brief
 *      outputs logevent on stdout
 *
 * @param[in] a - error number
 * @param[in] id - id to indicate log from which object
 * @param[in] mess - message to be logged
 *
 * @return      Void
 *
 */
void
log_err(int a, char *id, char *mess)
{
	printf("error %d %s: %s\n", a, id, mess);
}

struct rm_attribute;

static char *ncpus(struct rm_attribute *);

#define PBSEVENT_SYSTEM 0
#define LOG_NOTICE 0
#define RM_ERR_BADPARAM 0
#define pbs_strsep strsep

int num_pcpus, num_acpus, num_oscpus, rm_errno;
char extra_parm[] = "extra_parm";

int
main()
{
	if (ncpus(NULL) != NULL)
		printf("ncpus = %s\n", ret_string);
	printf("physical %d  logical %d\n", num_pcpus, num_oscpus);
	return 0;
}
#endif /* PBSMOM_HTUNIT */

/**
 * @brief
 *	returns the processed string (skip).
 *	processed string format "string	:"
 *
 * @param[in] str - label
 * @param[in] skip - string to be processed
 *
 * @return	string
 * @retval	NULL			Error
 * @retval	processed string	Success
 *
 */

char *
skipstr(char *str, char *skip)
{
	int len = strlen(skip);

	if (strncmp(str, skip, len) != 0)
		return NULL;

	skip = str + len;
	return skip + strspn(skip, "\t :");
}

int linenum;
int errflag = 0;

char badformat[] = "warning: /proc/cpuinfo format not recognized";

/**
 * @brief
 *	prints log events about ncpus
 *
 * @return	Void
 *
 */
void
warning(void)
{
	if (!errflag) {
		log_event(PBSEVENT_SYSTEM, 0, LOG_NOTICE, "ncpus", badformat);
		errflag = 1;
	}
	log_event(PBSEVENT_SYSTEM, 0, LOG_NOTICE, "ncpus", log_buffer);
	return;
}

/**
 * @brief
 *	converts and return the string value
 *
 * @param[in] str - string to be processed
 *
 * @return	int
 * @retval	converted val(strtol)	Success
 * @retval	0			Error
 *
 */
int
getnum(char *str)
{
	long val;
	char *extra;

	if (str == NULL || *str == '\0') {
		sprintf(log_buffer, "line %d: number needed", linenum);
		warning();
		return 0;
	}

	val = strtol(str, &extra, 10);
	if (*extra != '\0') {
		sprintf(log_buffer, "line %d: bad number %s", linenum, str);
		warning();
	}
	return (int) val;
}

#define LABLELEN 2048

struct {
	int physid;
	int coreid;
} *proc_array = NULL;
int proc_num = 0;

/**
 * @brief
 *	Add an entry to the proc_array[] with the physid/coreid
 *	combination of a cpu.  We do this to count the number of
 *	unique tuples since HyperThread(tm) "cpus" will have duplicate
 *	physid/coreid values.
 *
 * @param[in] physid - physical id
 * @param[in] coreid - core id
 *
 * @return	Void
 *
 */
static void
proc_new(int physid, int coreid)
{
	int i;

	if (physid < 0 || coreid < 0)
		return;

	for (i = 0; i < proc_num; i++) {
		if (proc_array[i].physid == physid &&
		    proc_array[i].coreid == coreid)
			break;
	}
	if (i == proc_num) { /* need new proc entry */
		proc_num++;
		proc_array = realloc(proc_array, sizeof(*proc_array) * proc_num);
		assert(proc_array != NULL);
		proc_array[i].physid = physid;
		proc_array[i].coreid = coreid;
	}
}

/**
 * @brief
 *	return the number of cpus
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return	string
 * @retval	number of cpus	Success
 * @retval	NULL		Error
 *
 */
static char *
ncpus(struct rm_attribute *attrib)
{
	char *file = "/proc/cpuinfo";
	char label[LABLELEN];
	char *cp;
	FILE *fp;
	int procs, logical;
	int skip = 0;
	int siblings = 0;
	int coreid = -1;
	int physid = -1;
	int maxsib = 0;
	int maxsibcpu = 0;
	int procnum = -1;
	int htseen, htany;
	int intelany;
	static int oldlinux = -1;
	int len;

	if (attrib) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (num_pcpus > 0) {
		sprintf(ret_string, "%d", num_pcpus);
		return ret_string;
	}

	if ((fp = fopen(file, "r")) == NULL)
		return NULL;

	if (oldlinux == -1) {
		struct utsname ubuf;

		oldlinux = 0;
		if (uname(&ubuf) == 0) {
			if (strncmp(ubuf.release, "2.4.", 4) == 0 &&
			    strcmp(ubuf.machine, "x86_64") == 0)
				oldlinux = 1;
		}
	}

	errflag = 0;
	logical = procs = 0;
	linenum = 0;
	htany = intelany = 0;

	while (!feof(fp)) {
		if (fgets(label, LABLELEN, fp) == NULL)
			break;

		linenum++;
		len = strlen(label);
		if (label[len - 1] == '\n')
			label[len - 1] = '\0';
		else {
			sprintf(log_buffer, "line %d too long", linenum);
			warning();
		}

		/* x86 linux /proc/cpuinfo format is
		 ** processor 0
		 ** info about processor 0
		 ** processor 1
		 ** info about processor 1
		 ** etc.... Alpha linux just prints "cpus detected: X"
		 */
		if ((cp = skipstr(label, "processor")) != NULL) {
			proc_new(physid, coreid);
			physid = coreid = -1;
			htseen = 0;
			siblings = 0;
			procnum = getnum(cp);
			logical++;
			if (skip == 0)
				procs++;
		} else if ((cp = skipstr(label, "cpus detected")) != NULL) {
			logical = procs = getnum(cp);
			break;
		} else if ((cp = skipstr(label, "siblings")) != NULL ||
			   (cp = skipstr(label, "threads")) != NULL ||
			   (cp = skipstr(label, "Number of siblings")) != NULL) {
			siblings = getnum(cp);
			if (siblings > maxsib) {
				maxsib = siblings;
				maxsibcpu = procnum;
			}
			if (skip == 0)
				skip = siblings - 1;
			else
				skip--;
		} else if ((cp = skipstr(label, "physical id")) != NULL) {
			physid = getnum(cp);
		} else if ((cp = skipstr(label, "core id")) != NULL) {
			coreid = getnum(cp);
		} else if ((cp = skipstr(label, "vendor_id")) != NULL) {
			if (strcmp(cp, "GenuineIntel") == 0)
				intelany = 1;
		} else if ((cp = skipstr(label, "flags")) != NULL) {
			while (cp != NULL) {
				char *flag = pbs_strsep(&cp, " ");

				if (flag == NULL)
					break;
				if (strcmp(flag, "ht") == 0) {
					htany = htseen = 1;
					break;
				}
			}
		}
	}
	fclose(fp);
	proc_new(physid, coreid);

	if (maxsib > logical) {
		sprintf(log_buffer, "cpu %d: siblings=%d but OS only "
				    "reports %d cpus",
			maxsibcpu, maxsib, logical);
		warning();
	}
	if (errflag)
		procs = logical;
	else if (htany || (oldlinux && intelany)) {
		/*
		 ** If the version of linux is new enough to have
		 ** physid and coreid, we can use the proc_num
		 ** count as the value of physical processors.
		 */
		if (proc_num > 0)
			procs = proc_num;
		sprintf(log_buffer, "hyperthreading %s",
			(procs < logical) ? "enabled" : "disabled");
		log_event(PBSEVENT_SYSTEM, 0, LOG_NOTICE, "ncpus", log_buffer);
	}

	num_pcpus = num_acpus = num_oscpus = logical;
	if (proc_array != NULL) {
		free(proc_array);
		proc_array = NULL;
		proc_num = 0;
	}

	sprintf(ret_string, "%d", num_oscpus);
	return ret_string;
}

/**
 * @brief
 *	returns the total physical memory
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return      string
 * @retval      tot physical memory  	Success
 * @retval      NULL            	Error
 *
 */

#ifndef PBSMOM_HTUNIT
char *
physmem(struct rm_attribute *attrib)
{
	char strbuf[256];
	FILE *fp;

	if (attrib) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if ((fp = fopen("/proc/meminfo", "r")) == NULL) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}
	/* the physmem of the machine is in MemTotal */
	while (fgets(strbuf, 256, fp) != NULL) {
		if (sscanf(strbuf, "MemTotal: %s k", ret_string) == 1) {
			fclose(fp);
			totalmem = (unsigned long) atol(ret_string);

			sprintf(ret_string, "%lukb",
				totalmem * (num_acpus / num_pcpus));
			return ret_string;
		}
	}
	fclose(fp);
	rm_errno = RM_ERR_SYSTEM;
	return NULL;
}

/**
 * @brief
 *	returns the size of file system present in machine
 *
 * @param[in] param - attribute value(file system)
 *
 * @return 	string
 * @retval	size of file system	Success
 * @retval	NULL			Error
 *
 */
char *
size_fs(char *param)
{
	struct statfs fsbuf;

	if (param[0] != '/') {
		sprintf(log_buffer, "%s: not full path filesystem name: %s",
			__func__, param);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if (statfs(param, &fsbuf) == -1) {
		log_err(errno, __func__, "statfs");
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	sprintf(ret_string, "%lukb",
		(unsigned long) (((double) fsbuf.f_bsize *
			  (double) fsbuf.f_bfree) /
			 1024.0)); /* KB */
	return ret_string;
}

/**
 * @brief
 *	get file attribute(size) from param and put them in buffer.
 *
 * @param[in] param - file attributes
 *
 * @return	string
 * @retval	size of file	Success
 * @retval	NULL		Error
 *
 */
char *
size_file(char *param)
{
	struct stat sbuf;

	if (param[0] != '/') {
		sprintf(log_buffer, "%s: not full path filesystem name: %s",
			__func__, param);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (stat(param, &sbuf) == -1) {
		log_err(errno, __func__, "stat");
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	sprintf(ret_string, "%lukb", (unsigned long) (sbuf.st_size >> 10)); /* KB */
	return ret_string;
}

/**
 * @brief
 *	wrapper function for size_file which returns the size of file system
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return	string
 * @retval      size of file system     Success
 * @retval      NULL                    Error
 *
 */
char *
size(struct rm_attribute *attrib)
{
	char *param;

	if (attrib == NULL) {
		log_err(-1, __func__, no_parm);
		rm_errno = RM_ERR_NOPARAM;
		return NULL;
	}
	if (momgetattr(NULL)) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	param = attrib->a_value;
	if (strcmp(attrib->a_qualifier, "file") == 0)
		return (size_file(param));
	else if (strcmp(attrib->a_qualifier, "fs") == 0)
		return (size_fs(param));
	else {
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
}

/**
 * @brief
 *	computes and returns walltime for process or session.
 *
 * @param[in] attrib - pointer to rm_attribute structure
 *
 * @return	string
 * @retval	walltime	Success
 * @retval	NULL		Error
 *
 */
static char *
walltime(struct rm_attribute *attrib)
{
	int i;
	int value, job, found = 0;
	time_t now, start;
	proc_stat_t *ps;

	if (attrib == NULL) {
		log_err(-1, __func__, no_parm);
		rm_errno = RM_ERR_NOPARAM;
		return NULL;
	}
	if ((value = atoi(attrib->a_value)) == 0) {
		sprintf(log_buffer, "bad param: %s", attrib->a_value);
		log_err(-1, __func__, log_buffer);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	if (momgetattr(NULL)) {
		log_err(-1, __func__, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (strcmp(attrib->a_qualifier, "proc") == 0)
		job = 0;
	else if (strcmp(attrib->a_qualifier, "session") == 0)
		job = 1;
	else {
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if ((now = time(NULL)) <= 0) {
		log_err(errno, __func__, "time");
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}
	mom_get_sample();

	start = now;
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];

		if (job) {
			if (value != ps->session)
				continue;
		} else {
			if (value != ps->pid)
				continue;
		}

		found = 1;
		start = MIN(start, ps->start_time);
	}

	if (found) {
		sprintf(ret_string, "%ld",
			(long) ((double) (now - start) * wallfactor));
		return ret_string;
	}

	rm_errno = RM_ERR_EXIST;
	return NULL;
}

/**
 * @brief
 *	reads load avg from file and returns
 *
 * @param[out] rv - var to hold load avg
 *
 * @return	int
 * @retval	0			Success
 * @retval	RM_ERR_SYSTEM(15205)	error
 *
 */
int
get_la(double *rv)
{
	FILE *fp;
	float load;

	if ((fp = fopen("/proc/loadavg", "r")) == NULL)
		return (rm_errno = RM_ERR_SYSTEM);

	if (fscanf(fp, "%f", &load) != 1) {
		log_err(errno, __func__, "fscanf of load in /proc/loadavg");
		(void) fclose(fp);
		return (rm_errno = RM_ERR_SYSTEM);
	}

	*rv = (double) load;
	(void) fclose(fp);
	return 0;
}

u_long
gracetime(u_long secs)
{
	time_t now = time(NULL);

	if (secs > now) /* time is in the future */
		return (secs - now);
	else
		return 0;
}

/**
 * @brief
 *	set priority of processes.
 *
 * @return	Void
 *
 */
void
mom_nice(void)
{
	if ((nice_val != 0) && (setpriority(PRIO_PROCESS, 0, nice_val) == -1)) {
		(void) sprintf(log_buffer, "failed to nice(%d) mom", nice_val);
		log_err(errno, __func__, log_buffer);
	}
}

/**
 * @brief
 *      Unset priority of processes.
 *
 * @return      Void
 *
 */
void
mom_unnice(void)
{
	if ((nice_val != 0) && (setpriority(PRIO_PROCESS, 0, 0) == -1)) {
		(void) sprintf(log_buffer, "failed to nice(%d) mom", nice_val);
		log_err(errno, __func__, log_buffer);
	}
}

/**
 * @brief
 *	Get the info required for tm_attach.
 *
 * @param[in] pid - process id
 * @param[in] sid - session id
 * @param[in] uid - user id
 * @param[in] comm - command name
 * @param[in] len - size of command
 *
 * @return	int
 * @retval	TM_OKAY			Success
 * @retval	TM_ENOPROC(17011)	Error
 *
 */
int
dep_procinfo(pid_t pid, pid_t *sid, uid_t *uid, char *comm, size_t len)
{
	int i;
	proc_stat_t *ps;

	getprocs();
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];
		if (ps->pid == pid) {
			*sid = ps->session;
			*uid = ps->uid;
			memset(comm, '\0', len);
			memcpy(comm, ps->comm,
			       MIN(len - 1, sizeof(ps->comm)));
			return TM_OKAY;
		}
	}
	return TM_ENOPROC;
}

#ifdef NAS_UNKILL /* localmod 011 */
/**
 * @brief
 *	Get the info required for tracking killed processes.
 *
 * @param[in] pid - process id
 * @param[in] ppid - parent process id
 * @param[in] start_time - start time of process
 *
 * @return      int
 * @retval      TM_OKAY                 Success
 * @retval      TM_ENOPROC(17011)       Error
 *
 */
int
kill_procinfo(pid_t pid, pid_t *ppid, u_Long *start_time)
{
	int i;
	proc_stat_t *ps;

	getprocs();
	for (i = 0; i < nproc; i++) {
		ps = &proc_info[i];
		if (ps->pid == pid) {
			*ppid = ps->ppid;
			*start_time = ps->start_time;
			return TM_OKAY;
		}
	}
	return TM_ENOPROC;
}
#endif /* localmod 011 */

/**
 * @brief
 *	For cpuset machine, migrate new task to a cpuset.
 *
 * @param[in] ptask - pointer to task structure
 *
 * @return	int
 * @retval	TM_OKAY			Success
 * @retval	TM_ESYSTEM(17000)	Error
 *
 */
int
dep_attach(task *ptask)
{
	return TM_OKAY;
}

/**
 * @brief
 *	adjusts the reserved mem attribute to make it hold in space
 *
 * @param[in] vp - pointer to vnl_t structure( vnode list)
 *
 * @return	Void
 *
 */
#endif /* PBSMOM_HTUNIT */
