[torquedev] saving inodes with lots of TM tasks
Garrick Staples
garrick at clusterresources.com
Fri Feb 16 20:33:28 MST 2007
I have an immediate requirement to support hundreds of thousands of TM
tasks within a single job. The limiter at this point is that pbs_mom
saves a TM struct in a file per task. After a few hundred thousand
tasks, the filesystem runs out of inodes!
The purpose of the disk files are only to read in on a pbs_mom restart.
In normal use, they are never read.
So the solution seems simple: use 1 file. I've attached a patch that
implements this idea by opening 1 file and seeking (sizeof(struct) *
task_id) bytes into the file before writing. I didn't bother trying to
be backwards compatible at this point because I think this will only be
for 2.2.x.
Thoughts?
-------------- next part --------------
Index: src/resmom/mom_comm.c
===================================================================
--- src/resmom/mom_comm.c (revision 1249)
+++ src/resmom/mom_comm.c (working copy)
@@ -214,14 +214,11 @@
int fds;
int i;
char namebuf[MAXPATHLEN];
- char filnam[MAXPATHLEN];
int openflags;
strcpy(namebuf,path_jobs); /* job directory path */
strcat(namebuf,pjob->ji_qs.ji_fileprefix);
strcat(namebuf,JOB_TASKDIR_SUFFIX);
- sprintf(filnam,task_fmt,ptask->ti_qs.ti_task);
- strcat(namebuf,filnam);
openflags = O_WRONLY|O_CREAT|O_Sync;
@@ -246,12 +243,17 @@
return(-1);
}
- /* NOTE: to avoid partial write failures in fs full situations, */
- /* attempt write of empty buffer, if success, then write actual task? */
- /* (NYI) */
-
/* just write the "critical" base structure to the file */
+ if (lseek(fds,(off_t)sizeof(ptask->ti_qs) * ptask->ti_qs.ti_task,SEEK_SET) < 0)
+ {
+ log_err(errno,id,"lseek");
+
+ close(fds);
+
+ return(-1);
+ }
+
while ((i = write(
fds,
(char *)&ptask->ti_qs,
@@ -261,7 +263,7 @@
{
/* retry the write */
- if (lseek(fds,(off_t)0,SEEK_SET) < 0)
+ if (lseek(fds,(off_t)sizeof(ptask->ti_qs) * ptask->ti_qs.ti_task,SEEK_SET) < 0)
{
log_err(errno,id,"lseek");
@@ -496,72 +498,40 @@
static char id[] = "task_recov";
int fds;
task *pt;
- char dirname[MAXPATHLEN];
char namebuf[MAXPATHLEN];
- DIR *dir;
- struct dirent *pdirent;
struct taskfix task_save;
- strcpy(dirname,path_jobs); /* job directory path */
- strcat(dirname,pjob->ji_qs.ji_fileprefix);
- strcat(dirname,JOB_TASKDIR_SUFFIX);
+ strcpy(namebuf,path_jobs); /* job directory path */
+ strcat(namebuf,pjob->ji_qs.ji_fileprefix);
+ strcat(namebuf,JOB_TASKDIR_SUFFIX);
- if ((dir = opendir(dirname)) == NULL)
+ if ((fds = open(namebuf,O_RDONLY,0)) < 0)
{
+ log_err(errno,id,"open of task file");
+
return(-1);
}
- strcat(dirname,"/");
-
- while ((pdirent = readdir(dir)) != NULL)
+ /* read in task quick save sub-structure */
+ while (read(fds,(char *)&task_save,sizeof(task_save)) == sizeof(task_save))
{
- if (pdirent->d_name[0] == '.')
- continue;
- strcpy(namebuf,dirname);
- strcat(namebuf,pdirent->d_name);
-
- fds = open(namebuf,O_RDONLY,0);
-
- if (fds < 0)
- {
- log_err(errno,id,"open of task file");
-
- unlink(namebuf);
-
+ if (task_save.ti_task == 0)
continue;
- }
- /* read in task quick save sub-structure */
-
- if (read(fds,(char *)&task_save,sizeof(task_save)) != sizeof(task_save))
- {
- log_err(errno,id,"read");
-
- unlink(namebuf);
-
- close(fds);
-
- continue;
- }
-
if ((pt = pbs_task_create(pjob,TM_NULL_TASK)) == NULL)
{
log_err(errno,id,"cannot create task");
- unlink(namebuf);
-
close(fds);
continue;
}
pt->ti_qs = task_save;
-
- close(fds);
} /* END while ((pdirent = readdir(dir)) != NULL) */
- closedir(dir);
+ close(fds);
/* SUCCESS */
@@ -2185,23 +2155,6 @@
job_save(pjob,SAVEJOB_FULL);
- strcpy(namebuf,path_jobs); /* job directory path */
- strcat(namebuf,pjob->ji_qs.ji_fileprefix);
- strcat(namebuf,JOB_TASKDIR_SUFFIX);
-
- if (mkdir(namebuf,0700) == -1)
- {
- log_err(-1,id,"cannot create temporary directory");
-
- job_purge(pjob);
-
- /* cannot create temporary job directory */
-
- SEND_ERR(PBSE_SYSTEM)
-
- goto done;
- }
-
sprintf(log_buffer,"JOIN JOB as node %d",
nodeid);
Index: src/server/job_func.c
===================================================================
--- src/server/job_func.c (revision 1249)
+++ src/server/job_func.c (working copy)
@@ -1120,7 +1120,11 @@
strcpy(namebuf,path_jobs); /* job directory path */
strcat(namebuf,pjob->ji_qs.ji_fileprefix);
strcat(namebuf,JOB_TASKDIR_SUFFIX);
- remtree(namebuf);
+ if (unlink(namebuf) < 0)
+ {
+ if (errno != ENOENT)
+ log_err(errno,id,msg_err_purgejob);
+ }
#if MOM_CHECKPOINT == 1
{
Index: src/server/req_quejob.c
===================================================================
--- src/server/req_quejob.c (revision 1249)
+++ src/server/req_quejob.c (working copy)
@@ -524,28 +524,6 @@
return;
}
-
- strcpy(namebuf,path_jobs); /* job directory path */
- strcat(namebuf,basename);
- strcat(namebuf,JOB_TASKDIR_SUFFIX);
-
- if ((mkdir(namebuf,0700) == -1) && (errno != EEXIST))
- {
- /* FAILURE */
-
- char tmpLine[1024];
-
- sprintf(tmpLine,"cannot create directory '%s'",
- namebuf);
-
- log_err(errno,tmpLine,msg_init_abt);
-
- job_purge(pj);
-
- req_reject(PBSE_SYSTEM,0,preq,NULL,tmpLine);
-
- return;
- }
} /* END else (pj != NULL) */
#endif /* PBS_MOM */
More information about the torquedev
mailing list