[From nobody Tue Nov 29 08:28:30 2005
Message-ID: &lt;438422CE.5090605@obs.ujf-grenoble.fr&gt;
Date: Wed, 23 Nov 2005 08:05:34 +0000
From: Ghislain ESCORNE &lt;ghislain.escorne@obs.ujf-grenoble.fr&gt;
Organization: LGIT
User-Agent: Mozilla Thunderbird 1.0.2-6 (X11/20050513)
X-Accept-Language: en-us, en
MIME-Version: 1.0
To: torqueusers@supercluster.org,  mauiusers@supercluster.org
Subject: node is in state Idle
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

/Hi,

I have a problem when I try to submit many jobs which need to run on 
more than one node.



----------------------script --------------------------------------
-bash-3.00$ more testpbs
#PBS -l nodes=2:ppn=2,walltime=00:05:00
###PBS -m abe
# Ca c'est bon
echo `cat $PBS_NODEFILE`
# Ca c'est pas bon
echo test : $PBS_NODEFILE
echo $PBS_O_WORKDIR
#echo $PBS_WORKDIR
echo $PBS_JOBID


----------------------------pbsnodes -a --------------------------------
-bash-3.00$ pbsnodes -a
compute-0-0.local
    state = free
    np = 2
    ntype = cluster
    status = arch=linux,uname=Linux compute-0-0.local 2.6.9-5.0.5.ELsmp 
#1 SMP Wed Apr 20 00:16:40 BST 2005 i686,sessions=? 0,nsessions=? 
0,nusers=0,idletime=618735,totmem=8250696kb,availmem=7832220kb,physmem=4154132kb,ncpus=4,loadave=0.00,netload=3650089177,state=free,rectime=1132671043 


compute-0-1.local
    state = free
    np = 2
    ntype = cluster
    status = arch=linux,uname=Linux compute-0-1.local 2.6.9-5.0.5.ELsmp 
#1 SMP Wed Apr 20 00:16:40 BST 2005 i686,sessions=? 0,nsessions=? 
0,nusers=0,idletime=1230804,totmem=8250696kb,availmem=7988232kb,physmem=4154132kb,ncpus=4,loadave=0.03,netload=3146652115,state=free,rectime=1132671047 



--------------------------------qstat -n --------------------------------
-bash-3.00$ qstat -n

rock-lgit.obs.ujf-grenoble.fr:
                                                           Req'd  
Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time  
S Time
--------------- -------- -------- ---------- ------ --- --- ------ ----- 
- -----
66.rock-lgit.ob gescorne short_mp testpbs       --    2  --    --  00:05 
Q   --
  compute-0-1+compute-0-1+compute-0-0+compute-0-0

------------------------ qstat -f --------------------------------------
-bash-3.00$ qstat -f
Job Id: 66.rock-lgit.obs.ujf-grenoble.fr
   Job_Name = testpbs
   Job_Owner = gescorne@rock-lgit.obs.ujf-grenoble.fr
   job_state = Q
   queue = short_mpi
   server = rock-lgit.obs.ujf-grenoble.fr
   Checkpoint = u
   ctime = Tue Nov 22 16:08:03 2005
   Error_Path = rock-lgit.obs.ujf-grenoble.fr:/home/gescorne/testpbs.e66
   exec_host = 
compute-0-1.local/1+compute-0-1.local/0+compute-0-0.local/1+com
       pute-0-0.local/0
   Hold_Types = n
   Join_Path = n
   Keep_Files = n
   Mail_Points = a
   mtime = Tue Nov 22 16:12:17 2005
   Output_Path = rock-lgit.obs.ujf-grenoble.fr:/home/gescorne/testpbs.o66
   Priority = 0
   qtime = Tue Nov 22 16:08:03 2005
   Rerunable = True
   Resource_List.nodect = 2
   Resource_List.nodes = 2:ppn=2
   Resource_List.walltime = 00:05:00
   Variable_List = PBS_O_HOME=/home/gescorne,PBS_O_LANG=en_US.iso885915,
       PBS_O_LOGNAME=gescorne,
       
PBS_O_PATH=/usr/kerberos/bin:/usr/local/pbs/bin:/usr/local/MAUI/bin:/u
       
sr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/opt/ganglia/bin:/usr/java/jd
       k1.5.0/bin:/opt/rocks/bin:/opt/rocks/sbin,
       PBS_O_MAIL=/var/spool/mail/gescorne,PBS_O_SHELL=/bin/bash,
       
PBS_O_HOST=rock-lgit.obs.ujf-grenoble.fr,PBS_O_WORKDIR=/home/gescorne,
       PBS_O_QUEUE=feed
   etime = Tue Nov 22 16:08:03 2005


---------------------- showq ------------------------------------
-bash-3.00$ showq
ACTIVE JOBS--------------------
JOBNAME            USERNAME      STATE  PROC   REMAINING            
STARTTIME

64                 gescorne    Running     4    00:05:00  Tue Nov 22 
15:59:46

    1 Active Job        4 of    4 Processors Active (100.00%)
                        2 of    2 Nodes Active      (100.00%)

IDLE JOBS----------------------
JOBNAME            USERNAME      STATE  PROC     WCLIMIT            
QUEUETIME


0 Idle Jobs

BLOCKED JOBS----------------
JOBNAME            USERNAME      STATE  PROC     WCLIMIT            
QUEUETIME

Total Jobs: 1   Active Jobs: 1   Idle Jobs: 0   Blocked Jobs: 0

-------------------------------- showstate 
-----------------------------------------------
[root@rock-lgit ~]# showstate
cluster state summary for Tue Nov 22 16:13:42


   JobName            S      User    Group Procs   Remaining            
StartTime
   ------------------ - --------- -------- ----- -----------  
-------------------

(A) 66                 R  gescorne     1110     4    00:05:00  Tue Nov 
22 16:13:42

usage summary:  1 active jobs  2 active nodes

              [0]
              [1]

Frame      01: [A]

Key:  [?]:Unknown [*]:Down w/Job [#]:Down [ ]:Idle [@] Busy w/No Job [!] 
Drained

job 66 has allocated node compute-0-1.local (node is in state Idle)


---------------------- maui.log ------------------------------------


11/22 16:26:56 INFO:     MNode[000] ' compute-0-0.local'      Idle 
VM:     4001 Mem:  4056 Dk:     1 Cl: [verylong_serial 2
:2][long_mpi 2:2][short_serial 2:2][medium_serial 2:2][feed 
2:2][medium_mpi 2:2][verylong_mpi 2:2][short_mpi 2:2][long_seri
al 2:2] [NONE]
11/22 16:26:56 INFO:     MNode[000] ' compute-0-0.local' C/A/D procs:  
2/2/0
11/22 16:26:56 MRMNodePostUpdate(compute-0-0.local,Idle)
11/22 16:26:56 __MPBSGetNodeState(Name,State,PNode)
11/22 16:26:56 INFO:     PBS node compute-0-1.local set to state Idle 
(free)
11/22 16:26:56 MNodeFind(compute-0-1.local,N)
11/22 16:26:56 MRMNodePreUpdate(compute-0-1.local,Idle,rock-lgit)
11/22 16:26:56 MNodeCheckAllocation(compute-0-1.local)
11/22 16:26:56 ALERT:    node 'compute-0-1.local' expected Busy but is 
Idle and not allocated
11/22 16:26:56 
MPBSNodeUpdate(compute-0-1.local,compute-0-1.local,Idle,rock-lgit)
11/22 16:26:56 
__MPBSIGetSSSStatus(compute-0-1.local,arch=linux,uname=Linux 
compute-0-1.local 2.6.9-5.0.5.ELsmp #1 SMP Wed
Apr 20 00:16:40 BST 2005 i686,sessions=? 0,nsessions=? 
0,nusers=0,idletime=1232963,totmem=8250696kb,availmem=7962200kb,phys
mem=4154132kb,ncpus=4,loadave=0.16,netload=3168017740,state=free,rectime=1132673206) 

11/22 16:26:56 MPBSLoadQueueInfo(rock-lgit,compute-0-1.local,SC)
11/22 16:26:56 INFO:     queue 'verylong_serial' priority set to 50
11/22 16:26:56 INFO:     queue 'verylong_serial' started state set to True
11/22 16:26:56 INFO:     queue 'long_mpi' priority set to 100
11/22 16:26:56 INFO:     queue 'long_mpi' started state set to True
11/22 16:26:56 INFO:     queue 'short_serial' priority set to 50
11/22 16:26:56 INFO:     queue 'short_serial' started state set to True
11/22 16:26:56 INFO:     queue 'medium_serial' priority set to 50
11/22 16:26:56 INFO:     queue 'medium_serial' started state set to True
11/22 16:26:56 INFO:     queue 'feed' started state set to True
11/22 16:26:56 INFO:     queue 'medium_mpi' priority set to 100
11/22 16:26:56 INFO:     queue 'medium_mpi' started state set to True
11/22 16:26:56 INFO:     queue 'verylong_mpi' priority set to 100
11/22 16:26:56 INFO:     queue 'verylong_mpi' started state set to True
11/22 16:26:56 INFO:     queue 'short_mpi' priority set to 100
11/22 16:26:56 INFO:     queue 'short_mpi' started state set to True
11/22 16:26:56 INFO:     queue 'long_serial' priority set to 50
11/22 16:26:56 INFO:     queue 'long_serial' started state set to True
11/22 16:26:56 INFO:     MNode[001] ' compute-0-1.local'      Idle 
VM:     4001 Mem:  4056 Dk:     1 Cl: [verylong_serial 2
:2][long_mpi 2:2][short_serial 2:2][medium_serial 2:2][feed 
2:2][medium_mpi 2:2][verylong_mpi 2:2][short_mpi 2:2][long_seri
al 2:2] [NONE]
11/22 16:26:56 INFO:     MNode[001] ' compute-0-1.local' C/A/D procs:  
2/2/0
11/22 16:26:56 MRMNodePostUpdate(compute-0-1.local,Idle)
11/22 16:26:56 INFO:     2 PBS resources detected on RM rock-lgit
11/22 16:26:56 INFO:     resources detected: 2
11/22 16:26:56 MRMWorkloadQuery()
11/22 16:26:56 MPBSWorkloadQuery(rock-lgit,JCount,SC)
11/22 16:26:56 MClassFind(short_mpi,C)
11/22 16:26:56 MJobFind('66',J,0)
11/22 16:26:56 INFO:     job '66'  hash 1432
11/22 16:26:56 INFO:     job '66' found at hash[1432] 1 '66' (J-&gt;Name: 66)
11/22 16:26:56 MRMJobPreUpdate(66)
11/22 16:26:56 
MPBSJobUpdate(66,66.rock-lgit.obs.ujf-grenoble.fr,TaskList,0)
11/22 16:26:56 INFO:     job '66' changed states from Running to Idle
11/22 16:26:56 MReqSetAttr(66,RQ,ReqClass,Value,1,2)
11/22 16:26:56 __MPBSGetTaskList(66,2:ppn=2,TaskList,0)
11/22 16:26:56 MReqSetAttr(66,RQ,ReqNodeFeature,Value,1,2)
11/22 16:26:56 INFO:     0 host task(s) located for job
11/22 16:26:56 INFO:     job 66 starttime: 1132673215 (00:00:01)  
presenttime: 1132673216  wclimit: 300  mtime: 1132673215
etime: 1132672083  walltime: 0  state: Idle
11/22 16:26:56 MRMJobPostUpdate(66,TaskList,Idle,rock-lgit)
11/22 16:26:56 INFO:     job flags for job 66: 800
11/22 16:26:56 MJobSetAttr(66,GAttr,Value,1,5)
11/22 16:26:56 MJobCheckClassJLimits(66,C,0,Buffer,BufSize)
11/22 16:26:56 INFO:     1 PBS jobs detected on RM rock-lgit
11/22 16:26:56 INFO:     jobs detected: 1
11/22 16:26:56 MStatClearUsage(node,Active)
11/22 16:26:56 MClusterUpdateNodeState()
11/22 16:26:56 INFO:     node 'compute-0-0.local' C/A/D procs:  2/2/0
11/22 16:26:56 INFO:     node 'compute-0-1.local' C/A/D procs:  2/2/0
11/22 16:26:56 MParUpdate(ALL)
11/22 16:26:56 INFO:     P[ALL]:  Total 2:4  Up 2:4  Idle 0:0  Active 2:4
11/22 16:26:56 INFO:     MNode[compute-0-0.local] added to MPar[DEFAULT] 
(2:2)
11/22 16:26:56 INFO:     MNode[compute-0-1.local] added to MPar[DEFAULT] 
(2:2)
11/22 16:26:56 INFO:     P[ALL]:  Total 2:4  Up 2:4  Idle 2:4  Active 0:0
11/22 16:26:56 INFO:     jobs in queue
11/22 16:26:56 MResAdjustDRes(NULL,FALSE)
11/22 16:26:56 MQueueSelectAllJobs(Q,HARD,ALL,JIList,DP,Msg)
11/22 16:26:56 MJobGetStartPriority(66,0,Priority,NULL)
11/22 16:26:56 INFO:     job '66' Priority:       18
11/22 16:26:56 INFO:     Cred:      0(00.0)  FS:      0(00.0)  
Attr:      0(00.0)  Serv:     18(00.0)  Targ:      0(00.0)
Res:      0(00.0)  Us:      0(00.0)
11/22 16:26:56 INFO:     job '66'  priority:    18.10
11/22 16:26:56 MStatClearUsage([NONE],Active)
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,idle,PU,[ALL],1,NULL)
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,idle,NULL,[ALL],1,NULL)
11/22 16:26:56 INFO:     job[00] '66' added to master list
11/22 16:26:56 INFO:     total jobs selected (ALL): 1/1
11/22 16:26:56 INFO:     jobs selected:
[000:   1]
11/22 16:26:56 MQueueSelectAllJobs(Q,SOFT,ALL,JIList,DP,Msg)
11/22 16:26:56 MJobGetStartPriority(66,0,Priority,NULL)
11/22 16:26:56 INFO:     job '66' Priority:       18
11/22 16:26:56 INFO:     Cred:      0(00.0)  FS:      0(00.0)  
Attr:      0(00.0)  Serv:     18(00.0)  Targ:      0(00.0)
Res:      0(00.0)  Us:      0(00.0)
11/22 16:26:56 INFO:     job '66'  priority:    18.10
11/22 16:26:56 MStatClearUsage([NONE],Idle)
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,idle,PU,[ALL],1,NULL)
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,idle,NULL,[ALL],1,NULL)
11/22 16:26:56 INFO:     job[00] '66' added to master list
11/22 16:26:56 INFO:     total jobs selected (ALL): 1/1
11/22 16:26:56 INFO:     jobs selected:
[000:   1]
11/22 16:26:56 
MQueueSelectJobs(SrcQ,DstQ,HARD,5120,4096,2140000000,EVERY,FReason,FALSE)
11/22 16:26:56 MLocalCheckFairnessPolicy(NULL,1132673216,Message)
11/22 16:26:56 INFO:     checking job[0] '66'
11/22 16:26:56 MJobCheckLimits(66,HARD,P,8,Message)
11/22 16:26:56 MJobCheckPolicies(66,HARD,2,ALL,RIndex,NULL,2140000000)
11/22 16:26:56 MJobCheckLimits(66,HARD,P,2,Message)
11/22 16:26:56 MLocalCheckFairnessPolicy(66,1132673216,Message)
11/22 16:26:56 INFO:     job '66' added to queue at slot 0
11/22 16:26:56 INFO:     total jobs selected in partition ALL: 1/1
11/22 16:26:56 MQueueScheduleSJobs(Q)
11/22 16:26:56 MQueueScheduleRJobs(Q)
11/22 16:26:56 INFO:     checking job 66 in MQueueScheduleRJobs()
11/22 16:26:56 INFO:     located job '66' reserved to start           
Tue Nov 22 16:26:55
11/22 16:26:56 MJobCheckLimits(66,SOFT,P,2,Message)
11/22 16:26:56 MJobSelectMNL(66,DEFAULT,NULL,MNodeList,NodeMap,MaxSpeed,1)
11/22 16:26:56 MReqGetFNL(66,0,DEFAULT,NULL,DstNL,NC,TC,2140000000,0)
11/22 16:26:56 MReqCheckResourceMatch(66,0,compute-0-0.local,NULL)
11/22 16:26:56 INFO:     node compute-0-0.local can provide resources 
for job 66:0
11/22 16:26:56 MNodeCheckPolicies(66,compute-0-0.local,2)
11/22 16:26:56 MJobCheckNRes(66,compute-0-0.local,RQ[0],  
INFINITY,TCAvail,1.000,RIndex,NULL,FeasCheck)
11/22 16:26:56 MReqCheckResourceMatch(66,0,compute-0-0.local,RIndex)
11/22 16:26:56 INFO:     node compute-0-0.local can provide resources 
for job 66:0
11/22 16:26:56 INFO:     node compute-0-0.local added to feasible list 
(2 tasks)
11/22 16:26:56 MReqCheckResourceMatch(66,0,compute-0-1.local,NULL)
11/22 16:26:56 INFO:     node compute-0-1.local can provide resources 
for job 66:0
11/22 16:26:56 MNodeCheckPolicies(66,compute-0-1.local,2)
11/22 16:26:56 MJobCheckNRes(66,compute-0-1.local,RQ[0],  
INFINITY,TCAvail,1.000,RIndex,NULL,FeasCheck)
11/22 16:26:56 MReqCheckResourceMatch(66,0,compute-0-1.local,RIndex)
11/22 16:26:56 INFO:     node compute-0-1.local can provide resources 
for job 66:0
11/22 16:26:56 INFO:     node compute-0-1.local added to feasible list 
(2 tasks)
11/22 16:26:56 INFO:     4 feasible tasks found for job 66:0 in 
partition DEFAULT (4 Needed)
11/22 16:26:56 MJobGetINL(66,FNL,INL,DEFAULT,NodeCount,TaskCount)
11/22 16:26:56 MNodeCheckPolicies(66,compute-0-0.local,2)
11/22 16:26:56 INFO:     idle node compute-0-0.localx2 located (D: 0)
11/22 16:26:56 MNodeCheckPolicies(66,compute-0-1.local,2)
11/22 16:26:56 INFO:     idle node compute-0-1.localx2 located (D: 0)
11/22 16:26:56 INFO:     idle resources (4 tasks/2 nodes) found with 
feasible list specified
11/22 16:26:56 INFO:     adequate idle nodes/tasks located
11/22 16:26:56 
MNodeSelectIdleTasks(66,0,SrcNL,IdleMNL,TC,NC,NMap,RCount,RejReason)
11/22 16:26:56 
MJobCheckNRes(66,compute-0-0.local,RQ[0],00:00:00,TCAvail,1.000,RIndex,Affinity,FeasCheck) 

11/22 16:26:56 
MJobCheckNStartTime(66,RQ,compute-0-0.local,00:00:00,TasksAllowed,1.000000,RIndex,Affinity) 

11/22 16:26:56 
MJobGetSNRange(66,0,compute-0-0.local,(2@00:00:00),1,Affinity,Type,ARange,BRes) 

11/22 16:26:56 MRECheck(compute-0-0.local,MJobGetSNRange-Start,FORCE)
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[000] S 66(0)  
-00:00:01 R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[001] E 66(0)  00:04:59 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     checking reservation 66 start at time -00:00:01 
(O: 299)
11/22 16:26:56 INFO:     resources available at time -00:00:01 during 66 
start
11/22 16:26:56 INFO:     performing starttime check (1)
11/22 16:26:56 INFO:     checking reservation 66 end at time 00:04:59 
(O: 299)
11/22 16:26:56 INFO:     node compute-0-0.local supports 2 tasks of job 
66:0 for   INFINITY at 00:00:00
11/22 16:26:56 INFO:     node[0] compute-0-0.local added to task list (2 
tasks : 2 tasks total)
11/22 16:26:56 
MJobCheckNRes(66,compute-0-1.local,RQ[0],00:00:00,TCAvail,1.000,RIndex,Affinity,FeasCheck) 

11/22 16:26:56 
MJobCheckNStartTime(66,RQ,compute-0-1.local,00:00:00,TasksAllowed,1.000000,RIndex,Affinity) 

11/22 16:26:56 
MJobGetSNRange(66,0,compute-0-1.local,(2@00:00:00),1,Affinity,Type,ARange,BRes) 

11/22 16:26:56 MRECheck(compute-0-1.local,MJobGetSNRange-Start,FORCE)
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[000] S 66(0)  
-00:00:01 R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[001] E 66(0)  00:04:59 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     checking reservation 66 start at time -00:00:01 
(O: 299)
11/22 16:26:56 INFO:     resources available at time -00:00:01 during 66 
start
11/22 16:26:56 INFO:     performing starttime check (1)
11/22 16:26:56 INFO:     checking reservation 66 end at time 00:04:59 
(O: 299)
11/22 16:26:56 INFO:     node compute-0-1.local supports 2 tasks of job 
66:0 for   INFINITY at 00:00:00
11/22 16:26:56 INFO:     node[1] compute-0-1.local added to task list (2 
tasks : 4 tasks total)
11/22 16:26:56 INFO:     4(0) tasks/2(0) nodes found for job 66 in 
MJobSelectMNL
11/22 16:26:56 MJobNLDistribute(66,SrcMNL,DstMNL)
11/22 16:26:56 INFO:     resources found for job 66 tasks: 4+0 of 4  
nodes: 2+0 of 0
11/22 16:26:56 
MJobAllocMNL(66,MFeasibleList,NodeMap,NULL,LASTAVAILABLE,1132673216)
11/22 16:26:56 INFO:     affinity level 0 nodes: 0
11/22 16:26:56 INFO:     affinity level 1 nodes: 2
11/22 16:26:56 INFO:     affinity level 2 nodes: 0
11/22 16:26:56 INFO:     affinity level 3 nodes: 0
11/22 16:26:56 INFO:     affinity level 4 nodes: 0
11/22 16:26:56 INFO:     affinity level 5 nodes: 0
11/22 16:26:56 INFO:     nodelist[0] compute-0-0.local  2  2
11/22 16:26:56 INFO:     nodelist[1] compute-0-1.local  2  2
11/22 16:26:56 INFO:     ignoring pass 1 for job 66:0 (node set forced 
in feasible list)
11/22 16:26:56 INFO:     evaluating nodes on alloc iteration 0 for job 66:0
11/22 16:26:56 INFO:     evaluating nodes on alloc iteration 1 for job 66:0
11/22 16:26:56 INFO:     evaluating nodes on alloc iteration 2 for job 66:0
11/22 16:26:56 INFO:     evaluating nodes on alloc iteration 3 for job 66:0
11/22 16:26:56 INFO:     evaluating nodes on alloc iteration 4 for job 66:0
11/22 16:26:56 INFO:     evaluating nodes on alloc iteration 5 for job 66:0
11/22 16:26:56 INFO:     tasks located for job 66:  4 of 4 required (0 
feasible)
11/22 16:26:56 INFO:     allocated MNode[000]x2 'compute-0-1.local' to 66:0
11/22 16:26:56 INFO:     allocated MNode[001]x2 'compute-0-0.local' to 66:0
11/22 16:26:56 MJobStart(66)
11/22 16:26:56 MJobDistributeTasks(66,rock-lgit,NodeList,TaskMap)
11/22 16:26:56 INFO:     2 tasks assigned to node[0] compute-0-1.local
11/22 16:26:56 INFO:     2 tasks assigned to node[1] compute-0-0.local
11/22 16:26:56 INFO:     2 node(s)/4 task(s) added to 66:0
11/22 16:26:56 INFO:     MNode[000] 'compute-0-1.local'(x2) added to job 
'66'
[001] compute-0-1.local: (P:2,S:7775,M:4056,D:1) 
[Idle][DEFAULT][linux]&lt;0.160000&gt; C:[verylong_serial 2:2][long_mpi 2:2][sho
rt_serial 2:2][medium_serial 2:2][feed 2:2][medium_mpi 2:2][verylong_mpi 
2:2][short_mpi 2:2][long_serial 2:2][DEFAULT] [NON
E] [verylong_serial 2:2][long_mpi 2:2][short_serial 2:2][medium_serial 
2:2][feed 2:2][medium_mpi 2:2][verylong_mpi 2:2][sho
rt_mpi 2:2][long_serial 2:2]
11/22 16:26:56 INFO:     adding 2 task(s) from node compute-0-1.local
11/22 16:26:56 INFO:     task[000] 'compute-0-1.local' added
11/22 16:26:56 INFO:     task[001] 'compute-0-1.local' added
11/22 16:26:56 INFO:     MNode[001] 'compute-0-0.local'(x2) added to job 
'66'
[000] compute-0-0.local: (P:2,S:7647,M:4056,D:1) 
[Idle][DEFAULT][linux]&lt;0.090000&gt; C:[verylong_serial 2:2][long_mpi 2:2][sho
rt_serial 2:2][medium_serial 2:2][feed 2:2][medium_mpi 2:2][verylong_mpi 
2:2][short_mpi 2:2][long_serial 2:2][DEFAULT] [NON
E] [verylong_serial 2:2][long_mpi 2:2][short_serial 2:2][medium_serial 
2:2][feed 2:2][medium_mpi 2:2][verylong_mpi 2:2][sho
rt_mpi 2:2][long_serial 2:2]
11/22 16:26:56 INFO:     adding 2 task(s) from node compute-0-0.local
11/22 16:26:56 INFO:     task[002] 'compute-0-0.local' added
11/22 16:26:56 INFO:     task[003] 'compute-0-0.local' added
11/22 16:26:56 INFO:     all tasks located
11/22 16:26:56 INFO:     end of list reached.  2 nodes found
11/22 16:26:56 INFO:     tasks distributed: 4 (Round-Robin)
11/22 16:26:56 MAMAllocJReserve(66,RIndex,ErrMsg)
11/22 16:26:56 MRMJobStart(66,Msg,SC)
11/22 16:26:56 MPBSJobStart(66,rock-lgit,Msg,SC)
11/22 16:26:56 INFO:     checking node 'compute-0-1.local' for tasklist
11/22 16:26:56 INFO:     checking node 'compute-0-0.local' for tasklist
11/22 16:26:56 
MPBSJobModify(66,Resource_List,Resource,compute-0-1.local:ppn=2+compute-0-0.local:ppn=2) 

11/22 16:26:56 INFO:     hostlist for job '66' set to 
'compute-0-1.local:ppn=2+compute-0-0.local:ppn=2'
11/22 16:26:56 MPBSJobModify(66,Resource_List,Resource,2:ppn=2)
11/22 16:26:56 WARNING:  cannot set job 
'66.rock-lgit.obs.ujf-grenoble.fr' attr 'Resource_List:neednodes' to 
'2:ppn=2' (rc:
15001 'Unknown Job Id')
11/22 16:26:56 WARNING:  cannot reset hostlist for job '66')
11/22 16:26:56 INFO:     job '66' successfully started
11/22 16:26:56 MResDestroy(66)
11/22 16:26:56 MResChargeAllocation(66,2)
11/22 16:26:56 
MRECheck(compute-0-0.local,MResDeallocateResources-Start,FORCE)
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[000] S 66(0)  
-00:00:01 R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[001] E 66(0)  00:04:59 
R: 'PROCS: 1'x2
11/22 16:26:56 
MRECheck(compute-0-0.local,MResDeallocateResources-End,FORCE)
11/22 16:26:56 INFO:     node 'compute-0-0.local' released from reservation
11/22 16:26:56 
MRECheck(compute-0-1.local,MResDeallocateResources-Start,FORCE)
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[000] S 66(0)  
-00:00:01 R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[001] E 66(0)  00:04:59 
R: 'PROCS: 1'x2
11/22 16:26:56 
MRECheck(compute-0-1.local,MResDeallocateResources-End,FORCE)
11/22 16:26:56 INFO:     node 'compute-0-1.local' released from reservation
11/22 16:26:56 INFO:     2 nodes/4 tasks released from reservation
11/22 16:26:56 MJobFind('66',J,0)
11/22 16:26:56 INFO:     job '66'  hash 1432
11/22 16:26:56 INFO:     job '66' found at hash[1432] 1 '66' (J-&gt;Name: 66)
11/22 16:26:56 INFO:     job '66' reservation released (tasks requested: 4)
11/22 16:26:56 MResAdjustDRes(66,TRUE)
11/22 16:26:56 MRECheck(compute-0-0.local,MResAdjustDRes-Start,FORCE)
11/22 16:26:56 INFO:     updating cr reservations for job '66'
11/22 16:26:56 MRECheck(compute-0-0.local,MResAdjustDRes-End,FORCE)
11/22 16:26:56 MRECheck(compute-0-1.local,MResAdjustDRes-Start,FORCE)
11/22 16:26:56 INFO:     updating cr reservations for job '66'
11/22 16:26:56 MRECheck(compute-0-1.local,MResAdjustDRes-End,FORCE)
11/22 16:26:56 INFO:     reservation '66' released
11/22 16:26:56 MJobSetState(66,Running)
11/22 16:26:56 MQueueAddAJob(66)
11/22 16:26:56 MStatUpdateActiveJobUsage(66)
11/22 16:26:56 INFO:     updating statistics for Grid[time: 2][proc: 2]
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,active,NULL,[ALL],1,NULL)
11/22 16:26:56 INFO:     updating statistics for user gescorne (UID: 1243)
11/22 16:26:56 INFO:     updating statistics for group 1110 (GID -1)
11/22 16:26:56 INFO:     job '66' has no account assigned
11/22 16:26:56 INFO:     updating statistics for QOS DEFAULT
11/22 16:26:56 INFO:     updating statistics for class short_mpi
11/22 16:26:56 INFO:     previously detected active Job[001] '66' added 
to MAQ
11/22 16:26:56 INFO:     job '66' added to MAQ at slot 0
11/22 16:26:56 INFO:     MAQ: [1 : 66 : 300]
11/22 16:26:56 MUNLGetMinAVal(NL,NAttr,N,Val)
11/22 16:26:56 MNodeAdjustState(compute-0-1.local,State)
11/22 16:26:56 INFO:     node compute-0-1.local state set to Busy
11/22 16:26:56 MNodeAdjustState(compute-0-0.local,State)
11/22 16:26:56 INFO:     node compute-0-0.local state set to Busy
11/22 16:26:56 MUNLGetMinAVal(NL,NAttr,N,Val)
11/22 16:26:56 MResJCreate(66,MNodeList,00:00:00,ActiveJob,Res)
11/22 16:26:56 MUNLGetMinAVal(NL,NAttr,N,Val)
11/22 16:26:56 INFO:     linking nodes to reservation '66'
11/22 16:26:56 MResAddNode(66,compute-0-1.local,2,0)
11/22 16:26:56 MRECheck(compute-0-1.local,MResAddNode-Start,FORCE)
11/22 16:26:56 MRECheck(compute-0-1.local,MResAddNode-End,FORCE)
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[000] S 66(0)  00:00:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[001] E 66(0)  00:05:00 
R: 'PROCS: 1'x2
11/22 16:26:56 MResAddNode(66,compute-0-0.local,2,0)
11/22 16:26:56 MRECheck(compute-0-0.local,MResAddNode-Start,FORCE)
11/22 16:26:56 MRECheck(compute-0-0.local,MResAddNode-End,FORCE)
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[000] S 66(0)  00:00:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[001] E 66(0)  00:05:00 
R: 'PROCS: 1'x2
11/22 16:26:56 MResAdjustDRes(66,FALSE)
11/22 16:26:56 MRECheck(compute-0-0.local,MResAdjustDRes-Start,FORCE)
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[000] S 66(0)  00:00:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[001] E 66(0)  00:05:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     updating cr reservations for job '66'
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[00] (66 00:00:00)
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[01] (66 00:05:00)
11/22 16:26:56 MRECheck(compute-0-0.local,MResAdjustDRes-End,FORCE)
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[000] S 66(0)  00:00:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-0.local]-&gt;RE[001] E 66(0)  00:05:00 
R: 'PROCS: 1'x2
11/22 16:26:56 MRECheck(compute-0-1.local,MResAdjustDRes-Start,FORCE)
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[000] S 66(0)  00:00:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[001] E 66(0)  00:05:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     updating cr reservations for job '66'
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[00] (66 00:00:00)
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[01] (66 00:05:00)
11/22 16:26:56 MRECheck(compute-0-1.local,MResAdjustDRes-End,FORCE)
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[000] S 66(0)  00:00:00 
R: 'PROCS: 1'x2
11/22 16:26:56 INFO:     N[compute-0-1.local]-&gt;RE[001] E 66(0)  00:05:00 
R: 'PROCS: 1'x2
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,idle,PU,[ALL],-1,NULL)
11/22 16:26:56 MPolicyAdjustUsage(NULL,66,NULL,idle,NULL,[ALL],-1,NULL)
11/22 16:26:56 MParUpdate(DEFAULT)
11/22 16:26:56 INFO:     P[DEFAULT]:  Total 2:4  Up 2:4  Idle 2:4  
Active 0:0
11/22 16:26:56 INFO:     MNode[compute-0-0.local] added to MPar[DEFAULT] 
(0:2)
11/22 16:26:56 INFO:     MNode[compute-0-1.local] added to MPar[DEFAULT] 
(0:2)
11/22 16:26:56 INFO:     P[DEFAULT]:  Total 2:4  Up 2:4  Idle 0:0  
Active 2:4
11/22 16:26:56 MJobAddToNL(66,NULL)
11/22 16:26:56 INFO:     node compute-0-1.local added to job 66.  PSlot: 
[verylong_serial 2:2][long_mpi 2:2][short_serial 2
:2][medium_serial 2:2][feed 2:2][medium_mpi 2:2][verylong_mpi 
2:2][short_mpi 0:2][long_serial 2:2]
11/22 16:26:56 INFO:     node compute-0-0.local added to job 66.  PSlot: 
[verylong_serial 2:2][long_mpi 2:2][short_serial 2
:2][medium_serial 2:2][feed 2:2][medium_mpi 2:2][verylong_mpi 
2:2][short_mpi 0:2][long_serial 2:2]
11/22 16:26:56 INFO:     starting job '66'
11/22 16:26:56 INFO:     reserved job '66' started
11/22 16:26:56 INFO:     reserved jobs scheduled: 1
11/22 16:26:56 
MQueueSelectJobs(SrcQ,DstQ,SOFT,5120,4096,2140000000,EVERY,FReason,TRUE)
11/22 16:26:56 MLocalCheckFairnessPolicy(NULL,1132673216,Message)
11/22 16:26:56 INFO:     checking job[0] '66'
11/22 16:26:56 INFO:     job 66 rejected (job in non-idle state 'Running')
11/22 16:26:56 INFO:     total jobs selected in partition ALL: 0/1 
[State: 1]
11/22 16:26:56 
MQueueSelectJobs(SrcQ,DstQ,HARD,5120,4096,2140000000,EVERY,FReason,TRUE)
11/22 16:26:56 MLocalCheckFairnessPolicy(NULL,1132673216,Message)
11/22 16:26:56 INFO:     checking job[0] '66'
11/22 16:26:56 INFO:     job 66 rejected (job in non-idle state 'Running')
11/22 16:26:56 INFO:     total jobs selected in partition ALL: 0/1 
[State: 1]
11/22 16:26:56 INFO:     cannot finalize RM cycle (RM 'rock-lgit' does 
not support function 'cyclefinalize')
11/22 16:26:56 
MQueueSelectJobs(SrcQ,DstQ,SOFT,5120,4096,2140000000,EVERY,FReason,TRUE)
11/22 16:26:56 MLocalCheckFairnessPolicy(NULL,1132673216,Message)
11/22 16:26:56 INFO:     checking job[0] '66'
11/22 16:26:56 INFO:     job 66 rejected (job in non-idle state 'Running')
11/22 16:26:56 INFO:     total jobs selected in partition ALL: 0/1 
[State: 1]
11/22 16:26:56 MSchedUpdateStats()
11/22 16:26:56 INFO:     iteration:  388   scheduling time:  0.058 seconds
11/22 16:26:56 MResUpdateStats()
11/22 16:26:56 INFO:     current util[388]:  2/2 (100.00%)  PH: 43.95%  
active jobs: 1 of 2 (completed: 49)
11/22 16:26:56 MQueueCheckStatus()
11/22 16:26:56 INFO:     checking purge criteria for job '66'
11/22 16:26:56 MNodeCheckStatus()
11/22 16:26:56 INFO:     checking node 'compute-0-0.local'
11/22 16:26:56 INFO:     checking node 'compute-0-1.local'
11/22 16:26:56 MSysCheck()
11/22 16:26:56 MLimitEnforceAll(ALL)
11/22 16:26:56 MUClearChild(PID)
11/22 16:26:56 MParUpdate(ALL)
11/22 16:26:56 INFO:     P[ALL]:  Total 2:4  Up 2:4  Idle 0:0  Active 2:4
11/22 16:26:56 INFO:     MNode[compute-0-0.local] added to MPar[DEFAULT] 
(0:2)
11/22 16:26:56 INFO:     MNode[compute-0-1.local] added to MPar[DEFAULT] 
(0:2)
11/22 16:26:56 INFO:     P[ALL]:  Total 2:4  Up 2:4  Idle 0:0  Active 2:4
11/22 16:26:56 MResCheckStatus(NULL)
11/22 16:26:56 INFO:     checking R[000]: '66'  end: 00:05:00
11/22 16:26:56 INFO:     scheduling complete.  sleeping 60 seconds
11/22 16:26:56 INFO:     connect request from 195.220.79.5
11/22 16:26:56 INFO:     socket linger enabled
11/22 16:26:56 INFO:     received service request from host 
'rock-lgit.obs.ujf-grenoble.fr'
11/22 16:26:56 INFO:     TCP client connected at sd 11
11/22 16:26:56 MSURecvPacket(11,BufP,4,NULL,100000)
11/22 16:26:56 MSUSelectRead(11,100000)
11/22 16:26:56 INFO:     4 of 4 bytes read from sd 11
11/22 16:26:56 INFO:     PBS command 16777216 received
11/22 16:26:56 INFO:     selecting next client (0 seconds left)
11/22 16:26:57 ServerProcessRequests()
11/22 16:26:57 MLogRoll(NULL,0,1)
11/22 16:26:57 INFO:     not rolling logs (26844 &lt; 10000000)
11/22 16:26:57 MResAdjust(NULL,0,0)
11/22 16:26:57 MJobSetAttr(,PAL,Value,1,2)
11/22 16:26:57 INFO:     job flags for job : 0
11/22 16:26:57 MJobSetAttr(,GAttr,Value,1,5)
11/22 16:26:57 MStatInitializeActiveSysUsage()
11/22 16:26:57 MStatClearUsage([NONE],Active)
11/22 16:26:57 ServerUpdate()
11/22 16:26:57 MSysUpdateTime()
11/22 16:26:57 INFO:     starting iteration 389
11/22 16:26:57 MSchedProcessJobs()
11/22 16:26:57 MRMGetInfo()
11/22 16:26:57 MClusterClearUsage()
11/22 16:26:57 MRMClusterQuery()
11/22 16:26:57 MPBSClusterQuery(rock-lgit,RCount,SC)
11/22 16:26:57 __MPBSGetNodeState(Name,State,PNode)
11/22 16:26:57 INFO:     PBS node compute-0-0.local set to state Idle 
(free)
11/22 16:26:57 MNodeFind(compute-0-0.local,N)
11/22 16:26:57 MRMNodePreUpdate(compute-0-0.local,Idle,rock-lgit)
11/22 16:26:57 MNodeCheckAllocation(compute-0-0.local)
11/22 16:26:57 ALERT:    node 'compute-0-0.local' expected Busy but is 
Idle and not allocated
11/22 16:26:57 
MPBSNodeUpdate(compute-0-0.local,compute-0-0.local,Idle,rock-lgit)
11/22 16:26:57 
__MPBSIGetSSSStatus(compute-0-0.local,arch=linux,uname=Linux 
compute-0-0.local 2.6.9-5.0.5.ELsmp #1 SMP Wed
Apr 20 00:16:40 BST 2005 i686,sessions=? 0,nsessions=? 
0,nusers=0,idletime=620898,totmem=8250696kb,availmem=7830708kb,physm
em=4154132kb,ncpus=4,loadave=0.09,netload=3655992773,state=free,rectime=1132673206) 














11/22 15:28:24 INFO:     resources detected: 8
11/22 15:28:24 MRMWorkloadQuery()
11/22 15:28:24 MPBSWorkloadQuery(rock-lgit,JCount,SC)
11/22 15:28:24 
MPBSJobUpdate(64,64.rock-lgit.obs.ujf-grenoble.fr,TaskList,0)
11/22 15:28:24 INFO:     job '64' changed states from Running to Idle
11/22 15:28:24 INFO:     1 PBS jobs detected on RM rock-lgit
11/22 15:28:24 INFO:     jobs detected: 1
11/22 15:28:24 MStatClearUsage(node,Active)
11/22 15:28:24 MClusterUpdateNodeState()
11/22 15:28:24 MQueueSelectAllJobs(Q,HARD,ALL,JIList,DP,Msg)
11/22 15:28:24 INFO:     job '64' Priority:     1802
11/22 15:28:24 INFO:     Cred:      0(00.0)  FS:      0(00.0)  
Attr:      0(00.0)  Serv:   1802(00.0)  Targ:      0(00.0)
Res:      0(00.0)  Us:      0(00.0)
11/22 15:28:24 MStatClearUsage([NONE],Active)
11/22 15:28:24 INFO:     total jobs selected (ALL): 1/1
11/22 15:28:24 MQueueSelectAllJobs(Q,SOFT,ALL,JIList,DP,Msg)
11/22 15:28:24 INFO:     job '64' Priority:     1802
11/22 15:28:24 INFO:     Cred:      0(00.0)  FS:      0(00.0)  
Attr:      0(00.0)  Serv:   1802(00.0)  Targ:      0(00.0)
Res:      0(00.0)  Us:      0(00.0)
11/22 15:28:24 MStatClearUsage([NONE],Idle)
11/22 15:28:24 INFO:     total jobs selected (ALL): 1/1
11/22 15:28:24 
MQueueSelectJobs(SrcQ,DstQ,HARD,5120,4096,2140000000,EVERY,FReason,FALSE)
11/22 15:28:24 INFO:     total jobs selected in partition ALL: 1/1
11/22 15:28:24 MQueueScheduleRJobs(Q)
11/22 15:28:24 INFO:     located job '64' reserved to start           
Tue Nov 22 15:28:23
11/22 15:28:24 INFO:     4 feasible tasks found for job 64:0 in 
partition DEFAULT (4 Needed)
11/22 15:28:24 INFO:     tasks located for job 64:  4 of 4 required (0 
feasible)
11/22 15:28:24 MJobStart(64)
11/22 15:28:24 MJobDistributeTasks(64,rock-lgit,NodeList,TaskMap)
11/22 15:28:24 MAMAllocJReserve(64,RIndex,ErrMsg)
11/22 15:28:24 MRMJobStart(64,Msg,SC)
11/22 15:28:24 MPBSJobStart(64,rock-lgit,Msg,SC)
11/22 15:28:24 
MPBSJobModify(64,Resource_List,Resource,compute-0-1.local:ppn=2+compute-0-0.local:ppn=2) 

11/22 15:28:24 MPBSJobModify(64,Resource_List,Resource,2:ppn=2)
11/22 15:28:24 WARNING:  cannot set job 
'64.rock-lgit.obs.ujf-grenoble.fr' attr 'Resource_List:neednodes' to 
'2:ppn=2' (rc:
15001 'Unknown Job Id')
11/22 15:28:24 INFO:     job '64' successfully started
11/22 15:28:24 MResDestroy(64)
11/22 15:28:24 MResChargeAllocation(64,2)
11/22 15:28:24 MStatUpdateActiveJobUsage(64)
11/22 15:28:24 MResJCreate(64,MNodeList,00:00:00,ActiveJob,Res)
11/22 15:28:24 INFO:     starting job '64'
11/22 15:28:24 INFO:     reserved job '64' started
11/22 15:28:24 INFO:     reserved jobs scheduled: 1
11/22 15:28:24 
MQueueSelectJobs(SrcQ,DstQ,SOFT,5120,4096,2140000000,EVERY,FReason,TRUE)
11/22 15:28:24 INFO:     total jobs selected in partition ALL: 0/1 
[State: 1]
11/22 15:28:24 
MQueueSelectJobs(SrcQ,DstQ,HARD,5120,4096,2140000000,EVERY,FReason,TRUE)
11/22 15:28:24 INFO:     total jobs selected in partition ALL: 0/1 
[State: 1]
11/22 15:28:24 
MQueueSelectJobs(SrcQ,DstQ,SOFT,5120,4096,2140000000,EVERY,FReason,TRUE)
11/22 15:28:24 INFO:     total jobs selected in partition ALL: 0/1 
[State: 1]
11/22 15:28:24 MSchedUpdateStats()
11/22 15:28:24 INFO:     iteration: 107986   scheduling time:  0.054 
seconds
11/22 15:28:24 MResUpdateStats()
11/22 15:28:24 INFO:     current util[107986]:  2/2 (100.00%)  PH: 
43.81%  active jobs: 1 of 2 (completed: 47)
11/22 15:28:24 MQueueCheckStatus()
11/22 15:28:24 MNodeCheckStatus()
11/22 15:28:24 MUClearChild(PID)
11/22 15:28:24 INFO:     scheduling complete.  sleeping 60 seconds
11/22 15:28:24 INFO:     connect request from 195.220.79.5
11/22 15:28:24 INFO:     received service request from host 
'rock-lgit.obs.ujf-grenoble.fr'
11/22 15:28:24 MSURecvPacket(10,BufP,4,NULL,100000)
11/22 15:28:25 ServerProcessRequests()
11/22 15:28:25 INFO:     not rolling logs (31483 &lt; 10000000)
11/22 15:28:25 MResAdjust(NULL,0,0)
11/22 15:28:25 MStatInitializeActiveSysUsage()
11/22 15:28:25 MStatClearUsage([NONE],Active)
11/22 15:28:25 ServerUpdate()
11/22 15:28:25 MSysUpdateTime()
11/22 15:28:25 INFO:     starting iteration 107987
11/22 15:28:25 MRMGetInfo()
11/22 15:28:25 MClusterClearUsage()
11/22 15:28:25 MRMClusterQuery()
11/22 15:28:25 MPBSClusterQuery(rock-lgit,RCount,SC)
11/22 15:28:25 __MPBSGetNodeState(Name,State,PNode)



]