Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into feature/remove_fin…
Browse files Browse the repository at this point in the history
…ddate

* origin/develop:
  Jenkins Pipeline updates for Canceling Jobs (NOAA-EMC#2307)
  Ocean/ice product generation for GFS and GEFS (NOAA-EMC#2286)
  • Loading branch information
KateFriedman-NOAA committed Feb 14, 2024
2 parents d837f40 + 1aaef05 commit 6e01467
Show file tree
Hide file tree
Showing 50 changed files with 1,414 additions and 1,637 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ fix/gsi
fix/lut
fix/mom6
fix/orog
fix/reg2grb2
fix/sfc_climo
fix/ugwd
fix/verif
Expand Down Expand Up @@ -99,6 +98,9 @@ parm/post/postxconfig-NT-GFS-WAFS.txt
parm/post/postxconfig-NT-GFS.txt
parm/post/postxconfig-NT-gefs-aerosol.txt
parm/post/postxconfig-NT-gefs-chem.txt
parm/post/ocean.csv
parm/post/ice.csv
parm/post/ocnicepost.nml.jinja2
parm/ufs/noahmptable.tbl
parm/ufs/model_configure.IN
parm/ufs/MOM_input_*.IN
Expand Down Expand Up @@ -137,7 +139,6 @@ sorc/radmon_bcor.fd
sorc/radmon_time.fd
sorc/rdbfmsua.fd
sorc/recentersigp.fd
sorc/reg2grb2.fd
sorc/supvit.fd
sorc/syndat_getjtbul.fd
sorc/syndat_maksynrc.fd
Expand All @@ -147,6 +148,7 @@ sorc/tocsbufr.fd
sorc/upp.fd
sorc/vint.fd
sorc/webtitle.fd
sorc/ocnicepost.fd

# Ignore scripts from externals
#------------------------------
Expand Down
71 changes: 40 additions & 31 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pipeline {

options {
skipDefaultCheckout()
buildDiscarder(logRotator(numToKeepStr: '2'))
parallelsAlwaysFailFast()
}

stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR
Expand Down Expand Up @@ -45,14 +45,14 @@ pipeline {
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])])
HOME = "${WORKSPACE}/TESTDIR"
commonworkspace = "${WORKSPACE}"
sh(script: "mkdir -p ${HOME}/RUNTESTS", returnStatus: true)
sh(script: "mkdir -p ${HOME}/RUNTESTS")
pullRequest.addLabel("CI-${Machine}-Building")
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Ready") }) {
pullRequest.removeLabel("CI-${Machine}-Ready")
}
}
}
}
}

stage('Build System') {
matrix {
Expand All @@ -71,35 +71,41 @@ pipeline {
steps {
script {
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to build the system on per system basis under the common workspace HOME
sh(script: "mkdir -p ${HOMEgfs}", returnStatus: true)
sh(script: "mkdir -p ${HOMEgfs}")
ws(HOMEgfs) {
env.MACHINE_ID = machine // MACHINE_ID is used in the build scripts to determine the machine and is added to the shell environment
if (fileExists("${HOMEgfs}/sorc/BUILT_semaphor")) { // if the system is already built, skip the build in the case of re-runs
sh(script: "cat ${HOMEgfs}/sorc/BUILT_semaphor", returnStdout: true).trim() // TODO: and user configurable control to manage build semphore
ws(commonworkspace) { pullRequest.comment("Cloned PR already built (or build skipped) on ${machine} in directory ${HOMEgfs}") }
pullRequest.comment("Cloned PR already built (or build skipped) on ${machine} in directory ${HOMEgfs}<br>Still doing a checkout to get the latest changes")
sh(script: 'source workflow/gw_setup.sh; git pull --recurse-submodules')
dir('sorc') {
sh(script: './link_workflow.sh')
}
} else {
checkout scm
sh(script: 'source workflow/gw_setup.sh;which git;git --version;git submodule update --init --recursive', returnStatus: true)
sh(script: 'source workflow/gw_setup.sh;which git;git --version;git submodule update --init --recursive')
def builds_file = readYaml file: 'ci/cases/yamls/build.yaml'
def build_args_list = builds_file['builds']
def build_args = build_args_list[system].join(' ').trim().replaceAll('null', '')
dir("${HOMEgfs}/sorc") {
sh(script: "${build_args}", returnStatus: true)
sh(script: './link_workflow.sh', returnStatus: true)
sh(script: "echo ${HOMEgfs} > BUILT_semaphor", returnStatus: true)
sh(script: "${build_args}")
sh(script: './link_workflow.sh')
sh(script: "echo ${HOMEgfs} > BUILT_semaphor")
}
}
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Building") }) {
pullRequest.removeLabel("CI-${Machine}-Building")
}
pullRequest.addLabel("CI-${Machine}-Running")
}
if (env.CHANGE_ID && system == 'gfs') {
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Building") }) {
pullRequest.removeLabel("CI-${Machine}-Building")
}
pullRequest.addLabel("CI-${Machine}-Running")
}
}
}
}
}
}
}
}
}

stage('Run Tests') {
matrix {
Expand All @@ -108,19 +114,19 @@ pipeline {
axis {
name 'Case'
// TODO add dynamic list of cases from env vars (needs addtional plugins)
values 'C48_ATM', 'C48_S2SWA_gefs', 'C48_S2SW', 'C96_atm3DVar', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atmsnowDA'
values 'C48_ATM', 'C48_S2SWA_gefs', 'C48_S2SW', 'C96_atm3DVar', 'C96C48_hybatmDA', 'C96_atmsnowDA' // 'C48mx500_3DVarAOWCDA'
}
}
stages {
stage('Create Experiment') {
steps {
script {
sh(script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp", returnStatus: true)
sh(script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp")
def yaml_case = readYaml file: "${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp"
system = yaml_case.experiment.system
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to populate the XML on per system basis
env.RUNTESTS = "${HOME}/RUNTESTS"
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStatus: true)
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml")
}
}
}
Expand All @@ -130,16 +136,27 @@ pipeline {
HOMEgfs = "${HOME}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
ws(HOMEgfs) {
pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${HOME}/RUNTESTS ${Case}", returnStdout: true).trim()
pullRequest.comment("**Running** experiment: ${Case} on ${Machine}<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}", returnStatus: true)
} catch (Exception e) {
// pullRequest.comment("**Running** experiment: ${Case} on ${Machine}<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`")
err = sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}")
if (err != 0) {
pullRequest.comment("**FAILURE** running experiment: ${Case} on ${Machine}")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_all_batch_jobs ${HOME}/RUNTESTS")
ws(HOME) {
if (fileExists('RUNTESTS/error.logs')) {
def fileContent = readFile 'RUNTESTS/error.logs'
def lines = fileContent.readLines()
for (line in lines) {
echo "archiving: ${line}"
archiveArtifacts artifacts: "${line}", fingerprint: true
}
}
}
error("Failed to run experiments ${Case} on ${Machine}")
}
pullRequest.comment("**SUCCESS** running experiment: ${Case} on ${Machine}")
// pullRequest.comment("**SUCCESS** running experiment: ${Case} on ${Machine}")
}
}

}
}
}
Expand Down Expand Up @@ -175,14 +192,6 @@ pipeline {
def timestamp = new Date().format('MM dd HH:mm:ss', TimeZone.getTimeZone('America/New_York'))
pullRequest.comment("**CI FAILED** ${Machine} at ${timestamp}<br>Built and ran in directory `${HOME}`")
}
if (fileExists('${HOME}/RUNTESTS/ci.log')) {
def fileContent = readFile '${HOME}/RUNTESTS/ci.log'
fileContent.eachLine { line ->
if (line.contains('.log')) {
archiveArtifacts artifacts: "${line}", fingerprint: true
}
}
}
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion ci/cases/pr/C48mx500_3DVarAOWCDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ arguments:
start: warm
yaml: {{ HOMEgfs }}/ci/cases/yamls/soca_gfs_defaults_ci.yaml

skip_ci_on_hosts:
skip_ci_on_host:
- orion
- hera
- hercules
16 changes: 9 additions & 7 deletions ci/scripts/run-check_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pslot=${2:-${pslot:-?}} # Name of the experiment being tested by this scr
# TODO: Make this configurable (for now all scripts run from gfs for CI at runtime)
HOMEgfs="${TEST_DIR}/gfs"
RUNTESTS="${TEST_DIR}/RUNTESTS"
run_check_logfile="${RUNTESTS}/ci-run_check.log"

# Source modules and setup logging
echo "Source modules."
Expand Down Expand Up @@ -77,15 +78,16 @@ while true; do
{
echo "Experiment ${pslot} Terminated with ${num_failed} tasks failed at $(date)" || true
echo "Experiment ${pslot} Terminated: *FAILED*"
} >> "${RUNTESTS}/ci.log"

} | tee -a "${run_check_logfile}"
error_logs=$(rocotostat -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs rocotocheck -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true
{
echo "Error logs:"
echo "${error_logs}"
} >> "${RUNTESTS}/ci.log"
sed -i "s/\`\`\`//2g" "${RUNTESTS}/ci.log"
sacct --format=jobid,jobname%35,WorkDir%100,stat | grep "${pslot}" | grep "${pr}\/RUNTESTS" | awk '{print $1}' | xargs scancel || true
} | tee -a "${run_check_logfile}"
# rm -f "${RUNTESTS}/error.logs"
for log in ${error_logs}; do
echo "RUNTESTS${log#*RUNTESTS}" >> "${RUNTESTS}/error.logs"
done
rc=1
break
fi
Expand All @@ -95,8 +97,7 @@ while true; do
echo "Experiment ${pslot} Completed at $(date)" || true
echo "with ${num_succeeded} successfully completed jobs" || true
echo "Experiment ${pslot} Completed: *SUCCESS*"
} >> "${RUNTESTS}/ci.log"
sed -i "s/\`\`\`//2g" "${RUNTESTS}/ci.log"
} | tee -a "${run_check_logfile}"
rc=0
break
fi
Expand All @@ -107,3 +108,4 @@ while true; do
done

exit "${rc}"

8 changes: 8 additions & 0 deletions ci/scripts/utils/ci_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ function get_pslot () {

}

function cancel_all_batch_jobs () {
local RUNTESTS="${1}"
pslot_list=$(get_pslot_list "${RUNTESTS}")
for pslot in ${pslot_list}; do
cancel_batch_jobs "${pslot}"
done
}

function create_experiment () {

local yaml_config="${1}"
Expand Down
7 changes: 7 additions & 0 deletions env/HERA.env
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,13 @@ elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation on Hera

elif [[ "${step}" = "oceanice_products" ]]; then

nth_max=$((npe_node_max / npe_node_oceanice_products))

export NTHREADS_OCNICEPOST=${nth_oceanice_products:-1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))
Expand Down
10 changes: 10 additions & 0 deletions env/HERCULES.env
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,20 @@ case ${step} in
[[ ${NTHREADS_UPP} -gt ${nth_max} ]] && export NTHREADS_UPP=${nth_max}
export APRUN_UPP="${launcher} -n ${npe_upp} --cpus-per-task=${NTHREADS_UPP}"
;;

"atmos_products")

export USE_CFP="YES" # Use MPMD for downstream product generation
;;

"oceanice_products")

nth_max=$((npe_node_max / npe_node_oceanice_products))

export NTHREADS_OCNICEPOST=${nth_oceanice_products:-1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"
;;

"ecen")

nth_max=$((npe_node_max / npe_node_ecen))
Expand Down
7 changes: 7 additions & 0 deletions env/JET.env
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,13 @@ elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation

elif [[ "${step}" = "oceanice_products" ]]; then

nth_max=$((npe_node_max / npe_node_oceanice_products))

export NTHREADS_OCNICEPOST=${nth_oceanice_products:-1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))
Expand Down
7 changes: 7 additions & 0 deletions env/ORION.env
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation

elif [[ "${step}" = "oceanice_products" ]]; then

nth_max=$((npe_node_max / npe_node_oceanice_products))

export NTHREADS_OCNICEPOST=${nth_oceanice_products:-1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))
Expand Down
7 changes: 7 additions & 0 deletions env/S4.env
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation

elif [[ "${step}" = "oceanice_products" ]]; then

nth_max=$((npe_node_max / npe_node_oceanice_products))

export NTHREADS_OCNICEPOST=${nth_oceanice_products:-1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))
Expand Down
7 changes: 7 additions & 0 deletions env/WCOSS2.env
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation

elif [[ "${step}" = "oceanice_products" ]]; then

nth_max=$((npe_node_max / npe_node_oceanice_products))

export NTHREADS_OCNICEPOST=${nth_oceanice_products:-1}
export APRUN_OCNICEPOST="${launcher} -n 1 -ppn ${npe_node_oceanice_products} --cpu-bind depth --depth ${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))
Expand Down
5 changes: 2 additions & 3 deletions jobs/JGLOBAL_ARCHIVE
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@ YMD=${PDY} HH=${cyc} generate_com -rx COM_ATMOS_ANALYSIS COM_ATMOS_BUFR COM_ATMO
COM_ATMOS_TRACK COM_ATMOS_WMO \
COM_CHEM_HISTORY COM_CHEM_ANALYSIS\
COM_MED_RESTART \
COM_ICE_HISTORY COM_ICE_INPUT COM_ICE_RESTART \
COM_ICE_HISTORY COM_ICE_INPUT COM_ICE_RESTART COM_ICE_GRIB \
COM_OBS COM_TOP \
COM_OCEAN_HISTORY COM_OCEAN_INPUT COM_OCEAN_RESTART COM_OCEAN_XSECT COM_OCEAN_2D COM_OCEAN_3D \
COM_OCEAN_HISTORY COM_OCEAN_INPUT COM_OCEAN_RESTART COM_OCEAN_GRIB COM_OCEAN_NETCDF \
COM_OCEAN_ANALYSIS \
COM_WAVE_GRID COM_WAVE_HISTORY COM_WAVE_STATION \
COM_ATMOS_OZNMON COM_ATMOS_RADMON COM_ATMOS_MINMON COM_CONF

for grid in "0p25" "0p50" "1p00"; do
YMD=${PDY} HH=${cyc} GRID=${grid} generate_com -rx "COM_ATMOS_GRIB_${grid}:COM_ATMOS_GRIB_GRID_TMPL"
YMD=${PDY} HH=${cyc} GRID=${grid} generate_com -rx "COM_OCEAN_GRIB_${grid}:COM_OCEAN_GRIB_GRID_TMPL"
done

###############################################################
Expand Down
Loading

0 comments on commit 6e01467

Please sign in to comment.