-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpegasus-glidein
executable file
·202 lines (161 loc) · 4.48 KB
/
pegasus-glidein
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/bin/bash
set -e
function usage()
{
cat >&2 <<EOF
Usage: pegasus-glidein [-h] [-w PATH] [-h HOURS] [-s START] [-t TOKEN] -c HOST
-h Show this help message.
-c HOST The hostname of the HTCondor central manager.
-t TOKEN The HTCondor token used for authentication.
-s START Start expression for the STARTD.
-h HOURS Number of hours to accept new jobs (defaults to 12)
-w PATH Use PATH as work directory. If not specified, \$TMP is used.
-u URL URL to a HTCondor tarball, if you do not want the default.
EOF
exit 0
}
function cleanup()
{
# prevent mulitple cleanups
if [ "x$CLEAN_UP_DONE" = "x1" ]; then
return
fi
export CLEAN_UP_DONE=1
kill -9 $TAIL_PID
kill $MASTER_PID >/dev/null 2>&1
wait $MASTER_PID
echo
echo "Cleaning up..."
rm -rf $_CONDOR_LOCAL_DIR
}
#######################################################################
#
# defaults
#
START_EXPR="True"
HOURS_UNTIL_RETIREMENT=12
CONDOR_TARBALL="https://research.cs.wisc.edu/htcondor/tarball/current/23.9.6/release/condor-23.9.6-x86_64_AlmaLinux8-stripped.tar.gz"
#######################################################################
#
# main
#
while getopts "hc:t:s:h:w:u:" arg; do
case $arg in
h)
usage
;;
c)
CONDOR_HOST=$OPTARG
;;
t)
TOKEN=$OPTARG
;;
s)
START_EXPR=$OPTARG
;;
h)
HOURS_UNTIL_RETIREMENT=$OPTARG
;;
w)
WORK_DIR=$OPTARG
;;
u)
CONDOR_TARBALL=$OPTARG
;;
\?)
usage
;;
:)
usage
;;
esac
done
if [ "x$CONDOR_HOST" == "x" ]; then
echo "-c is a required parameter" >&2
usage
fi
if [ "x$TOKEN" == "x" ]; then
echo "-t is a required parameter" >&2
usage
fi
# empty PATH?
if [ "x$PATH" = "x" ]; then
export PATH=/usr/bin:/bin
fi
# workdir
if [ "x$WORK_DIR" == "x" ]; then
if [ "x$TMP" == "x" ]; then
WORK_DIR=/tmp
else
WORK_DIR=$TMP
fi
fi
WORK_DIR=`mktemp -d $WORK_DIR/htcondor-local.XXXXXXXX`
echo "Work dir is $WORK_DIR"
cd $WORK_DIR
# download HTCondor
echo "Downloading and setting up HTCondor..."
wget -q -O condor.tar.gz $CONDOR_TARBALL
tar xzf condor.tar.gz
rm -f condor.tar.gz
mv condor-* condor
export _CONDOR_RELEASE_DIR=$WORK_DIR/condor
export PATH=$_CONDOR_RELEASE_DIR/bin:$_CONDOR_RELEASE_DIR/sbin:$PATH
# temp directory for this instance
export _CONDOR_LOCAL_DIR="$WORK_DIR/var"
echo
echo "HTCondor Local directory for this glidein is $_CONDOR_LOCAL_DIR"
trap cleanup INT TERM
mkdir -p $_CONDOR_LOCAL_DIR/condor/etc/tokens.d
mkdir -p $_CONDOR_LOCAL_DIR/execute
mkdir -p $_CONDOR_LOCAL_DIR/log
mkdir -p $_CONDOR_LOCAL_DIR/spool
# condor config
export CONDOR_CONFIG=$WORK_DIR/condor/etc/condor_config
export MY_HOSTNAME=`hostname -f`
cat >$CONDOR_CONFIG <<EOF
# HTCondor config generated by pegasus-glidein
DAEMON_LIST = MASTER, STARTD
CONDOR_HOST = $CONDOR_HOST
CCB_ADDRESS = $CONDOR_HOST
PRIVATE_NETWORK_NAME = $MY_HOSTNAME
UID_DOMAIN = $MY_HOSTNAME
FILESYSTEM_DOMAIN = $MY_HOSTNAME
LOCAL_DIR=$_CONDOR_LOCAL_DIR
LOG=\$(LOCAL_DIR)/log
UPDATE_COLLECTOR_WITH_TCP = True
SEC_DEFAULT_AUTHENTICATION = REQUIRE
SEC_DEFAULT_AUTHENTICATION_METHODS = FS, IDTOKEN
SEC_TOKEN_DIRECTORY = $WORK_DIR/condor/etc/condor/tokens.d
# dynamic slots
SLOT_TYPE_1 = cpus=100%, memory=100%, disk=100%, swap=100%
SLOT_TYPE_1_PARTITIONABLE = TRUE
NUM_SLOTS = 1
NUM_SLOTS_TYPE_1 = 1
# default policy
START = $START_EXPR
SUSPEND = False
CONTINUE = True
PREEMPT = False
KILL = False
EOF
# handle SLURM_NTASKS/OMP_NUM_THREADS
TOTAL_CORES=$(cat /proc/cpuinfo | egrep '^processor' | wc -l)
if [ "x$SLURM_NTASKS" != "x" ]; then
echo "NUM_CPUS = $SLURM_NTASKS" >>$CONDOR_CONFIG
echo "SLOT_TYPE_1 = cpus=$SLURM_NTASKS, memory=$SLURM_NTASKS/$TOTAL_CORES, disk=$SLURM_NTASKS/$TOTAL_CORES, swap=$SLURM_NTASKS/$TOTAL_CORES" >>$CONDOR_CONFIG
fi
# set the pool password
echo "Setting the token"
echo "$TOKEN" >$WORK_DIR/condor/etc/condor/tokens.d/glidein.token
chmod 600 $WORK_DIR/condor/etc/condor/tokens.d/glidein.token
condor_token_list
echo "Starting HTCondor..."
touch $_CONDOR_LOCAL_DIR/log/MasterLog $_CONDOR_LOCAL_DIR/log/StartLog
tail -F $_CONDOR_LOCAL_DIR/log/MasterLog $_CONDOR_LOCAL_DIR/log/StartLog &
TAIL_PID=$!
MINS_UNTIL_RETIREMENT=$(($HOURS_UNTIL_RETIREMENT * 60))
condor_master -f -r $MINS_UNTIL_RETIREMENT &
MASTER_PID=$!
wait $MASTER_PID
cleanup