Skip to content

Commit

Permalink
Merge pull request #45 from UM-Bridge/hpc-fix
Browse files Browse the repository at this point in the history
Automatically clears url txt files and stops existing HQ server before starting new HQ server
  • Loading branch information
linusseelinger authored Jan 17, 2024
2 parents 1662173 + 3c13ef2 commit e27fdd9
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
12 changes: 11 additions & 1 deletion hpc/LoadBalancer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ void create_directory_if_not_existing(std::string directory) {
}
}

void clear_url(std::string directory) {
for (auto& file : std::filesystem::directory_iterator(directory)) {
if (std::regex_match(file.path().filename().string(), std::regex("url-\\d+\\.txt"))) {
std::filesystem::remove(file);
}
}
}

std::string get_hostname() {
char hostname[HOST_NAME_MAX];
gethostname(hostname, HOST_NAME_MAX);
Expand All @@ -39,6 +47,8 @@ int main(int argc, char *argv[])
{
create_directory_if_not_existing("urls");
create_directory_if_not_existing("sub-jobs");
clear_url("urls");
std::system("hq server stop &> /dev/null");

// Read environment variables for configuration
char const *port_cstr = std::getenv("PORT");
Expand Down Expand Up @@ -71,4 +81,4 @@ int main(int argc, char *argv[])
std::cout << "Load balancer running on host " << get_hostname()
<< " and bound to 0.0.0.0:" << port << std::endl;
umbridge::serveModels(LB_ptr_vector, "0.0.0.0", port, false);
}
}
6 changes: 3 additions & 3 deletions hpc/LoadBalancer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ std::string getCommandOutput(const std::string command)
}

// Check for every 100 ms, wait for maximum 20 second
bool waitForFile(const std::string &filename, int time_out = 20)
bool waitForFile(const std::string &filename, int time_out = 100000)
{
auto start_time = std::chrono::steady_clock::now();
auto timeout = std::chrono::seconds(time_out); // wait for maximum 10 seconds
Expand Down Expand Up @@ -116,14 +116,14 @@ std::string submitHQJob()

++i;
std::cout << "Waiting for job " << job_id << " to start." << std::endl;
} while (waitForHQJobState(job_id, "RUNNING") == false && i < 3 && waitForFile("./urls/url-" + job_id + ".txt", 10) == false);
} while (waitForHQJobState(job_id, "RUNNING") == false && i < 3 && waitForFile("./urls/url-" + job_id + ".txt", 100000) == false);
// Wait for the HQ Job to start
// Also wait until job is running and url file is written
// Try maximum 3 times

std::cout << "Job " << job_id << " started." << std::endl;
// Check if the job is running
if (waitForHQJobState(job_id, "RUNNING") == false || waitForFile("./urls/url-" + job_id + ".txt", 10) == false)
if (waitForHQJobState(job_id, "RUNNING") == false || waitForFile("./urls/url-" + job_id + ".txt", 100000) == false)
{
std::cout << "Submit job failure." << std::endl;
exit(-1);
Expand Down

0 comments on commit e27fdd9

Please sign in to comment.