diff --git a/api/boinc_api.cpp b/api/boinc_api.cpp
index be7b345d3e9..6b1709f6d09 100644
--- a/api/boinc_api.cpp
+++ b/api/boinc_api.cpp
@@ -1052,7 +1052,8 @@ int boinc_report_app_status_aux(
double _fraction_done,
int other_pid,
double _bytes_sent,
- double _bytes_received
+ double _bytes_received,
+ double wss
) {
char msg_buf[MSG_CHANNEL_SIZE], buf[1024];
if (standalone) return 0;
@@ -1081,6 +1082,10 @@ int boinc_report_app_status_aux(
sprintf(buf, "%d\n", ac_state);
strlcat(msg_buf, buf, sizeof(msg_buf));
}
+ if (wss) {
+ sprintf(buf, "%f\n", wss);
+ strlcat(msg_buf, buf, sizeof(msg_buf));
+ }
#ifdef MSGS_FROM_FILE
if (fout) {
fputs(msg_buf, fout);
@@ -1100,7 +1105,7 @@ int boinc_report_app_status(
double _fraction_done
){
return boinc_report_app_status_aux(
- cpu_time, checkpoint_cpu_time, _fraction_done, 0, 0, 0
+ cpu_time, checkpoint_cpu_time, _fraction_done, 0, 0, 0, 0
);
}
diff --git a/api/boinc_api.h b/api/boinc_api.h
index acb65bb8452..758cf7e55b9 100644
--- a/api/boinc_api.h
+++ b/api/boinc_api.h
@@ -138,7 +138,8 @@ extern int boinc_upload_status(std::string& name);
extern char* boinc_msg_prefix(char*, int);
extern int boinc_report_app_status_aux(
double cpu_time, double checkpoint_cpu_time, double _fraction_done,
- int other_pid, double bytes_sent, double bytes_received
+ int other_pid, double bytes_sent, double bytes_received,
+ double wss
);
extern int boinc_temporary_exit(
int delay, const char* reason=NULL, bool is_notice=false
diff --git a/client/app.cpp b/client/app.cpp
index eda742eb22b..8fbfba2c773 100644
--- a/client/app.cpp
+++ b/client/app.cpp
@@ -110,6 +110,7 @@ ACTIVE_TASK::ACTIVE_TASK() {
peak_disk_usage = 0;
once_ran_edf = false;
+ wss_from_app = 0;
fraction_done = 0;
fraction_done_elapsed_time = 0;
first_fraction_done = 0;
@@ -420,6 +421,8 @@ void ACTIVE_TASK_SET::get_memory_usage() {
// at least on Windows. Use the VM size instead.
//
pi.working_set_size_smoothed = atp->wup->rsc_memory_bound;
+ } else if (atp->wss_from_app > 0) {
+ pi.working_set_size_smoothed = .5*(pi.working_set_size_smoothed + atp->wss_from_app);
} else {
pi.working_set_size_smoothed = .5*(pi.working_set_size_smoothed + pi.working_set_size);
}
diff --git a/client/app.h b/client/app.h
index d7b306bea2f..72534512ef8 100644
--- a/client/app.h
+++ b/client/app.h
@@ -53,12 +53,14 @@ typedef int PROCESS_ID;
// Represents a job in progress.
-// When an active task is created, it is assigned a "slot"
+// When a job is started, it is assigned a "slot"
// which determines the directory it runs in.
-// This doesn't change over the life of the active task;
-// thus the task can use the slot directory for temp files
+// This doesn't change over the life of the job;
+// so it can use the slot directory for temp files
// that BOINC doesn't know about.
+// If you add anything, initialize it in the constructor
+//
struct ACTIVE_TASK {
#ifdef _WIN32
HANDLE process_handle, shm_handle;
@@ -100,8 +102,12 @@ struct ACTIVE_TASK {
// most recent CPU time reported by app
bool once_ran_edf;
- // END OF ITEMS SAVED IN STATE FILE
+ // END OF ITEMS SAVED IN STATE FILES
+ double wss_from_app;
+ // work set size reported by the app
+ // (e.g. docker_wrapper does this).
+ // If nonzero, use this instead of procinfo data
double fraction_done;
// App's estimate of how much of the work unit is done.
// Passed from the application via an API call;
diff --git a/client/app_control.cpp b/client/app_control.cpp
index b03a9875db1..14c628461cb 100644
--- a/client/app_control.cpp
+++ b/client/app_control.cpp
@@ -1439,8 +1439,23 @@ bool ACTIVE_TASK::get_app_status_msg() {
}
}
}
- parse_double(msg_buf, "", current_cpu_time);
- parse_double(msg_buf, "", checkpoint_cpu_time);
+ if (parse_double(msg_buf, "", current_cpu_time)) {
+ if (current_cpu_time < 0) {
+ msg_printf(result->project, MSG_INFO,
+ "app reporting negative CPU: %f", current_cpu_time
+ );
+ current_cpu_time = 0;
+ }
+ }
+ if (parse_double(msg_buf, "", checkpoint_cpu_time)) {
+ if (checkpoint_cpu_time < 0) {
+ msg_printf(result->project, MSG_INFO,
+ "app reporting negative checkpoint CPU: %f", checkpoint_cpu_time
+ );
+ checkpoint_cpu_time = 0;
+ }
+ }
+ parse_double(msg_buf, "", wss_from_app);
parse_double(msg_buf, "", result->fpops_per_cpu_sec);
parse_double(msg_buf, "", result->fpops_cumulative);
parse_double(msg_buf, "", result->intops_per_cpu_sec);
@@ -1470,18 +1485,6 @@ bool ACTIVE_TASK::get_app_status_msg() {
if (parse_int(msg_buf, "", i)) {
sporadic_ac_state = (SPORADIC_AC_STATE)i;
}
- if (current_cpu_time < 0) {
- msg_printf(result->project, MSG_INFO,
- "app reporting negative CPU: %f", current_cpu_time
- );
- current_cpu_time = 0;
- }
- if (checkpoint_cpu_time < 0) {
- msg_printf(result->project, MSG_INFO,
- "app reporting negative checkpoint CPU: %f", checkpoint_cpu_time
- );
- checkpoint_cpu_time = 0;
- }
return true;
}
diff --git a/client/app_test.cpp b/client/app_test.cpp
index 62b9abebd31..57a2a862cfd 100644
--- a/client/app_test.cpp
+++ b/client/app_test.cpp
@@ -26,7 +26,7 @@
// input/output files, attributes, etc.
// It currently has several test cases, selected with #ifdef
// - build the BOINC client with these changes
-// - make a BOINC data directory, say 'test'
+// - Linux: make a BOINC data directory, say 'test'
// (or you can use an existing BOINC data directory,
// in which case the client will also run existing jobs)
// - make a directory test/slots/app_test
@@ -224,6 +224,20 @@ void CLIENT_STATE::app_test_init() {
*make_file(app->project, "Dockerfile_copy", "Dockerfile", INPUT_FILE, true)
);
#endif
+#ifdef APP_DOCKER_WRAPPER_MOUNT
+ av->app_files.push_back(
+ *make_file(app->project, "docker_wrapper.exe", NULL, MAIN_PROG, false)
+ );
+ av->app_files.push_back(
+ *make_file(app->project, "worker", NULL, INPUT_FILE, false)
+ );
+ av->app_files.push_back(
+ *make_file(app->project, "job_copy.toml", "job.toml", INPUT_FILE, true)
+ );
+ av->app_files.push_back(
+ *make_file(app->project, "Dockerfile_copy", "Dockerfile", INPUT_FILE, true)
+ );
+#endif
// can put other stuff here like
#if 0
@@ -243,11 +257,17 @@ void CLIENT_STATE::app_test_init() {
);
#endif
#ifdef APP_DOCKER_WRAPPER_COPY
+ wu->command_line = "--verbose";
wu->input_files.push_back(
*make_file(proj, "infile", "in", INPUT_FILE, true)
);
#endif
-
+#ifdef APP_DOCKER_WRAPPER_MOUNT
+ wu->command_line = "--verbose";
+ wu->input_files.push_back(
+ *make_file(proj, "infile", "in", INPUT_FILE, false)
+ );
+#endif
RESULT *result = make_result(av, wu);
////////////// OUTPUT FILES /////////////////
@@ -262,6 +282,11 @@ void CLIENT_STATE::app_test_init() {
*make_file(proj, "outfile", "out", OUTPUT_FILE, true)
);
#endif
+#ifdef APP_DOCKER_WRAPPER_MOUNT
+ result->output_files.push_back(
+ *make_file(proj, "outfile", "out", OUTPUT_FILE, false)
+ );
+#endif
// tell the client not to get work or run benchmarks
//
diff --git a/samples/docker_wrapper/docker_wrapper.cpp b/samples/docker_wrapper/docker_wrapper.cpp
index 70d9df0039b..d31ebe6948d 100644
--- a/samples/docker_wrapper/docker_wrapper.cpp
+++ b/samples/docker_wrapper/docker_wrapper.cpp
@@ -30,7 +30,7 @@
// this is the first run of the job
// if the image doesn't already exist
// build image with 'docker build'
-// (need a log around the above?)
+// (need a lock around the above?)
// create the container with -v to mount slot, project dirs
// copy input files as needed
// start container
@@ -42,7 +42,7 @@
// image name
// name: lower case letters, digits, separators (. _ -); max 4096 chars
// tag: max 128 chars
-// in the universal model, each WU has a different image
+// in the universal model, each WU must have a different image
// so we'll use: boinc____
//
// container name:
@@ -54,7 +54,7 @@
// image name: boinc
// container name: boinc
// slot dir: .
-// project dir: project/
+// project dir (mount mode): project/
// enable standalone tests on Win
//
@@ -79,14 +79,16 @@ using std::string;
using std::vector;
#define POLL_PERIOD 1.0
+#define STATUS_PERIOD 10
+ // reports status this often
enum JOB_STATUS {JOB_IN_PROGRESS, JOB_SUCCESS, JOB_FAIL};
struct RSC_USAGE {
- double cpu_time;
+ double cpu_frac;
double wss;
void clear() {
- cpu_time = 0;
+ cpu_frac = 0;
wss = 0;
}
};
@@ -127,7 +129,7 @@ char container_name[512];
APP_INIT_DATA aid;
CONFIG config;
bool running;
-bool verbose = true;
+bool verbose = false;
const char* config_file = "job.toml";
const char* dockerfile = "Dockerfile";
const char* cli_prog;
@@ -217,7 +219,24 @@ inline int run_docker_command(char* cmd, vector &out) {
retval = read_from_pipe(
ctl_wc.out_read, ctl_wc.proc_handle, output, TIMEOUT, "EOM"
);
- if (retval) return retval;
+ if (retval) {
+ const char* msg = "";
+ switch (retval) {
+ case PROC_DIED:
+ msg = "Process died";
+ break;
+ case TIMEOUT:
+ msg = "Timeout";
+ break;
+ case READ_ERROR:
+ msg = "Read Error";
+ break;
+ default:
+ break;
+ }
+ fprintf(stderr, "read_from_pipe() error: %s\n", msg);
+ return retval;
+ }
out = split(output, '\n');
#else
retval = run_command(cmd, out);
@@ -248,8 +267,9 @@ int image_exists(bool &exists) {
sprintf(cmd, "%s images", cli_prog);
int retval = run_docker_command(cmd, out);
if (retval) return retval;
+ string image_name_space = image_name + string(" ");
for (string line: out) {
- if (line.find(image_name) != string::npos) {
+ if (line.find(image_name_space) != string::npos) {
exists = true;
return 0;
}
@@ -301,7 +321,7 @@ int container_exists(bool &exists) {
int retval;
vector out;
- sprintf(cmd, "%s ps --filter \"name=%s\"",
+ sprintf(cmd, "%s ps --all --filter \"name=%s\"",
cli_prog, container_name
);
retval = run_docker_command(cmd, out);
@@ -442,7 +462,10 @@ void poll_client_msgs() {
}
}
-JOB_STATUS poll_app(RSC_USAGE &ru) {
+// check whether job has exited
+// Note: on both Podman and Docker this takes significant CPU time
+// (like .03 sec) so do it infrequently (like 5 sec)
+JOB_STATUS poll_app() {
char cmd[1024];
vector out;
int retval;
@@ -461,6 +484,48 @@ JOB_STATUS poll_app(RSC_USAGE &ru) {
return JOB_FAIL;
}
+// get CPU and mem usage
+// This is also surprisingly slow
+int get_stats(RSC_USAGE &ru) {
+ char cmd[1024];
+ vector out;
+ int retval;
+ size_t n;
+
+ sprintf(cmd,
+ "%s stats --no-stream --format \"{{.CPUPerc}} {{.MemUsage}}\" %s",
+ cli_prog, container_name
+ );
+ retval = run_docker_command(cmd, out);
+ if (retval) return -1;
+ if (out.empty()) return -1;
+ const char *buf = out[0].c_str();
+ // output is like
+ // 0.00% 420KiB / 503.8GiB
+ double cpu_pct, mem;
+ char mem_unit;
+ n = sscanf(buf, "%lf%% %lf%c", &cpu_pct, &mem, &mem_unit);
+ if (n != 3) return -1;
+ switch (mem_unit) {
+ case 'G':
+ case 'g':
+ mem *= GIGA; break;
+ case 'M':
+ case 'm':
+ mem *= MEGA; break;
+ case 'K':
+ case 'k':
+ mem *= KILO; break;
+ case 'B':
+ case 'b':
+ break;
+ default: return -1;
+ }
+ ru.cpu_frac = cpu_pct/100.;
+ ru.wss = mem;
+ return 0;
+}
+
#ifdef _WIN32
// find a WSL distro with Docker and set up a command link to it
//
@@ -528,6 +593,7 @@ int main(int argc, char** argv) {
boinc_init_options(&options);
if (boinc_is_standalone()) {
+ verbose = true;
strcpy(image_name, "boinc");
strcpy(container_name, "boinc");
strcpy(aid.project_dir, "./project");
@@ -543,6 +609,14 @@ int main(int argc, char** argv) {
}
if (verbose) config.print();
+ if (sporadic) {
+ retval = boinc_sporadic_dir(".");
+ if (retval) {
+ fprintf(stderr, "can't create sporadic files\n");
+ boinc_finish(retval);
+ }
+ }
+
#ifdef _WIN32
retval = wsl_init();
if (retval) {
@@ -578,18 +652,38 @@ int main(int argc, char** argv) {
boinc_finish(1);
}
running = true;
- while (1) {
+ double cpu_time = 0;
+ for (int i=0; ; i++) {
poll_client_msgs();
- switch(poll_app(ru)) {
- case JOB_FAIL:
- cleanup();
- boinc_finish(1);
- break;
- case JOB_SUCCESS:
- copy_files_from_container();
- cleanup();
- boinc_finish(0);
- break;
+ if (i%STATUS_PERIOD == 0) {
+ switch(poll_app()) {
+ case JOB_FAIL:
+ cleanup();
+ boinc_finish(1);
+ break;
+ case JOB_SUCCESS:
+ copy_files_from_container();
+ cleanup();
+ boinc_finish(0);
+ break;
+ default:
+ break;
+ }
+ retval = get_stats(ru);
+ if (!retval) {
+ cpu_time += STATUS_PERIOD*ru.cpu_frac;
+ if (verbose) {
+ fprintf(stderr, "reporting CPU %f WSS %f\n", cpu_time, ru.wss);
+ }
+ boinc_report_app_status_aux(
+ cpu_time,
+ 0, // checkpoint CPU time
+ 0, // frac done
+ 0, // other PID
+ 0,0, // bytes send/received
+ ru.wss
+ );
+ }
}
boinc_sleep(POLL_PERIOD);
}
diff --git a/samples/docker_wrapper/test_copy/in b/samples/docker_wrapper/test_copy/infile
similarity index 100%
rename from samples/docker_wrapper/test_copy/in
rename to samples/docker_wrapper/test_copy/infile
diff --git a/samples/vboxwrapper/vbox_common.cpp b/samples/vboxwrapper/vbox_common.cpp
index 42664be0208..cd42a816ef1 100644
--- a/samples/vboxwrapper/vbox_common.cpp
+++ b/samples/vboxwrapper/vbox_common.cpp
@@ -316,7 +316,8 @@ void VBOX_VM::report_clean(
fraction_done,
vm_pid,
bytes_sent,
- bytes_received
+ bytes_received,
+ 0
);
}
diff --git a/samples/vboxwrapper/vboxwrapper.cpp b/samples/vboxwrapper/vboxwrapper.cpp
index f2392a2916c..efc478aae00 100644
--- a/samples/vboxwrapper/vboxwrapper.cpp
+++ b/samples/vboxwrapper/vboxwrapper.cpp
@@ -890,7 +890,8 @@ int main(int argc, char** argv) {
fraction_done,
pVM->vm_pid,
bytes_sent,
- bytes_received
+ bytes_received,
+ 0
);
// Wait for up to 5 minutes for the VM to switch states.
@@ -1389,7 +1390,8 @@ int main(int argc, char** argv) {
fraction_done,
pVM->vm_pid,
bytes_sent,
- bytes_received
+ bytes_received,
+ 0
);
if (!retval) {