#! /bin/bash # # Copyright (C) 2010 Q-Leap Networks # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Under Debian a copy can be found in /usr/share/common-licenses/GPL-3. SCRIPTNAME="${0##*/}" USAGE="usage: ${SCRIPTNAME} [-h] " # where to find the snapshot-reference snap_ref_file=${HOME}/.qlu/torque/jobid2ompi_snap_ref force=false while getopts :hf OPT do case $OPT in h) echo "${USAGE}" exit ;; f) force=true ;; *) echo "${USAGE}" >&2 exit 2 esac done shift $[ OPTIND - 1 ] jobid="$1" if [ -z "$jobid" ]; then echo "${SCRIPTNAME}: parameter missing" >&2 echo "${USAGE}" >&2 exit 2 fi function get_snap_entry() { local jobid="$1" snap_ref_file="$2" if [ ! -f "$snap_ref_file" ]; then echo "${FUNCNAME}(): no such file: \"${snap_ref_file}\"" >&2 return 1 fi if [ -z "$jobid" ]; then echo "${FUNCNAME}(): parameter missing" >&2 return 2 fi # take the last match # DONE allow only a number as job id # if job id consists entirely of numbers # then match everything up to the first dot awk '{ if(jid ~ /^[0-9]*$/) { split($1,arr_jid,/\./) jid_saved=arr_jid[1] } else { jid_saved=$1 } } jid_saved == jid { snap_ref=$0 } END { print snap_ref }' jid="$jobid" \ "${snap_ref_file}" } function get_nodelist() { local nodefile="$1" if [ ! -f "$nodefile" ]; then return 0 fi sort "$nodefile" | uniq -c | awk '{print $1}' | sort -n | \ uniq -c | awk 'BEGIN{ ORS=""; firstline=1 } { if(firstline){ firstline=0 } else { print "+" } print $1 "*" $2; }' } snap_entry=$(get_snap_entry "$jobid" "$snap_ref_file") if [ -z "$snap_entry" ]; then echo "${SCRIPTNAME}: no matching snapshot reference for job \"$jobid\"" >&2 exit 1 fi snap_ref=$(echo "$snap_entry" | awk '{print $2}') saved_nodelist=$(echo "$snap_entry" | awk '{print $3}') nodelist=$(get_nodelist "${PBS_NODEFILE}") if [ "$saved_nodelist" != "$nodelist" ]; then echo "Saved node geometry ($saved_nodelist) differs from current ($nodelist)" >&2 if $force; then echo "Continuing as you request" >&2 else echo "To continue run this script with the option \"-f\"" >&2 exit 1 fi fi ompi-restart "$snap_ref"