#! /bin/bash
#
# Copyright (C) 2010 Q-Leap Networks
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# Under Debian a copy can be found in /usr/share/common-licenses/GPL-3.
SCRIPTNAME="${0##*/}"
USAGE="usage: ${SCRIPTNAME} [-h] "
# where to find the snapshot-reference
snap_ref_file=${HOME}/.qlu/torque/jobid2ompi_snap_ref
force=false
while getopts :hf OPT
do
case $OPT in
h)
echo "${USAGE}"
exit
;;
f)
force=true
;;
*)
echo "${USAGE}" >&2
exit 2
esac
done
shift $[ OPTIND - 1 ]
jobid="$1"
if [ -z "$jobid" ]; then
echo "${SCRIPTNAME}: parameter missing" >&2
echo "${USAGE}" >&2
exit 2
fi
function get_snap_entry() {
local jobid="$1" snap_ref_file="$2"
if [ ! -f "$snap_ref_file" ]; then
echo "${FUNCNAME}(): no such file: \"${snap_ref_file}\"" >&2
return 1
fi
if [ -z "$jobid" ]; then
echo "${FUNCNAME}(): parameter missing" >&2
return 2
fi
# take the last match
# DONE allow only a number as job id
# if job id consists entirely of numbers
# then match everything up to the first dot
awk '{
if(jid ~ /^[0-9]*$/) {
split($1,arr_jid,/\./)
jid_saved=arr_jid[1]
} else {
jid_saved=$1
}
}
jid_saved == jid {
snap_ref=$0
} END {
print snap_ref
}' jid="$jobid" \
"${snap_ref_file}"
}
function get_nodelist() {
local nodefile="$1"
if [ ! -f "$nodefile" ]; then
return 0
fi
sort "$nodefile" | uniq -c | awk '{print $1}' | sort -n | \
uniq -c | awk 'BEGIN{
ORS="";
firstline=1
} {
if(firstline){
firstline=0
} else {
print "+"
}
print $1 "*" $2;
}'
}
snap_entry=$(get_snap_entry "$jobid" "$snap_ref_file")
if [ -z "$snap_entry" ]; then
echo "${SCRIPTNAME}: no matching snapshot reference for job \"$jobid\"" >&2
exit 1
fi
snap_ref=$(echo "$snap_entry" | awk '{print $2}')
saved_nodelist=$(echo "$snap_entry" | awk '{print $3}')
nodelist=$(get_nodelist "${PBS_NODEFILE}")
if [ "$saved_nodelist" != "$nodelist" ]; then
echo "Saved node geometry ($saved_nodelist) differs from current ($nodelist)" >&2
if $force; then
echo "Continuing as you request" >&2
else
echo "To continue run this script with the option \"-f\"" >&2
exit 1
fi
fi
ompi-restart "$snap_ref"