--- torque-2.5.10/src/resmom/mom_main.c 2012-01-12 16:34:39.000000000 -0500 +++ torque-2.5.10-nvml/src/resmom/mom_main.c 2012-02-17 16:23:58.528731483 -0500 @@ -150,6 +150,7 @@ #include "mcom.h" + #ifdef PENABLE_LINUX26_CPUSETS void initialize_root_cpuset(); #endif @@ -267,6 +268,8 @@ extern time_t pbs_tcp_timeout; extern long MaxConnectTimeout; +int mom_gpu_initialized = 0; + char tmpdir_basename[MAXPATHLEN]; /* for $TMPDIR */ char rcp_path[MAXPATHLEN]; @@ -8310,7 +8313,8 @@ mom_server_all_update_stat(); #ifdef NVIDIA_GPUS - mom_server_all_update_gpustat(); + if (mom_gpu_initialized) + mom_server_all_update_gpustat(); #endif /* NVIDIA_GPUS */ LastServerUpdateTime = time_now; @@ -8531,7 +8535,8 @@ /* shutdown mom */ #if defined(NVIDIA_GPUS) && defined(NVML_API) - shut_nvidia_nvml(); + if (mom_gpu_initialized) + shut_nvidia_nvml(); #endif /* NVIDIA_GPUS and NVML_API */ mom_close_poll(); --- torque-2.5.10/src/resmom/mom_server.c 2012-01-12 16:34:39.000000000 -0500 +++ torque-2.5.10-nvml/src/resmom/mom_server.c 2012-02-17 16:24:32.205092174 -0500 @@ -299,6 +299,7 @@ extern char *conf_res(char *resline, struct rm_attribute *attr); extern char *dependent(char *res, struct rm_attribute *attr); extern char *reqgres(struct rm_attribute *); +extern int mom_gpu_initialized; #ifdef NVIDIA_GPUS extern int find_file(char *, char *); @@ -1255,8 +1256,15 @@ rc = nvmlInit(); - if (rc == NVML_SUCCESS) + if (rc == NVML_SUCCESS){ + mom_gpu_initialized = 1; return (TRUE); + } + + if (rc == NVML_ERROR_DRIVER_NOT_LOADED){ + mom_gpu_initialized = 0; + return(TRUE); + } log_nvml_error (rc, NULL, id); @@ -2039,6 +2047,9 @@ /* if node does not have Nvidia recognized driver version then forget it */ + if (!mom_gpu_initialized) + return(PBSE_NONE); + if (MOMNvidiaDriverVersion < 260) return(PBSE_NONE); --- torque-2.5.10/src/resmom/req_quejob.c 2011-08-02 17:42:26.000000000 -0400 +++ torque-2.5.10-nvml/src/resmom/req_quejob.c 2012-02-17 16:48:15.945163233 -0500 @@ -129,6 +129,8 @@ extern const char *PJobSubState[]; +extern int mom_gpu_initialized; + /* sync w/enum job_file TJobFileType[]) */ const char *TJobFileType[] = @@ -1046,7 +1048,8 @@ * if so, then update gpu status */ if (((pj->ji_wattr[JOB_ATR_exec_gpus].at_flags & ATR_VFLAG_SET) != 0) && - (pj->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str != NULL)) + (pj->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str != NULL) && + mom_gpu_initialized) { mom_server_all_update_gpustat(); } --- torque-2.5.10/src/resmom/start_exec.c 2012-01-16 13:10:09.000000000 -0500 +++ torque-2.5.10-nvml/src/resmom/start_exec.c 2012-02-17 16:48:00.989219530 -0500 @@ -222,6 +222,8 @@ extern int src_login_batch; extern int src_login_interactive; +extern int mom_gpu_initialized; + /* Local Variables */ static int script_in; /* script file, will be stdin */ --- torque-2.5.10/src/resmom/requests.c 2011-10-26 12:04:09.000000000 -0400 +++ torque-2.5.10-nvml/src/resmom/requests.c 2012-02-17 16:24:50.332595006 -0500 @@ -169,6 +169,8 @@ extern char *TNoSpoolDirList[]; extern char path_checkpoint[]; +extern int mom_gpu_initialized; + /* Local Data Items */ static uid_t useruid;