diff --git a/.github/workflows/ubuntu-bionic.yml b/.github/workflows/ubuntu-bionic.yml index f6abfd7da..e6ec813f7 100644 --- a/.github/workflows/ubuntu-bionic.yml +++ b/.github/workflows/ubuntu-bionic.yml @@ -66,7 +66,7 @@ jobs: echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - name: Configure CMake - timeout-minutes: 5 + timeout-minutes: 10 run: cmake --version && if [ -z "${{ matrix.mpi }}" ]; then USE_MPI=OFF; else USE_MPI=ON; fi && diff --git a/.github/workflows/ubuntu-focal-external-rocm.yml b/.github/workflows/ubuntu-focal-external-rocm.yml index 6f2d4296d..0524bc5ea 100644 --- a/.github/workflows/ubuntu-focal-external-rocm.yml +++ b/.github/workflows/ubuntu-focal-external-rocm.yml @@ -48,7 +48,7 @@ jobs: echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:/opt/dyninst/lib:/opt/elfutils/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - name: Configure CMake - timeout-minutes: 5 + timeout-minutes: 10 run: git config --global --add safe.directory ${PWD} && cmake --version && diff --git a/.github/workflows/ubuntu-focal-external.yml b/.github/workflows/ubuntu-focal-external.yml index 072a12d76..7a355af65 100644 --- a/.github/workflows/ubuntu-focal-external.yml +++ b/.github/workflows/ubuntu-focal-external.yml @@ -76,7 +76,7 @@ jobs: echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - name: Configure CMake - timeout-minutes: 5 + timeout-minutes: 10 run: git config --global --add safe.directory ${PWD} && cmake --version && diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index ed51386ff..de61f9e09 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -40,7 +40,7 @@ jobs: echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - name: Configure CMake - timeout-minutes: 5 + timeout-minutes: 10 run: cmake --version && if [ -z "${{ matrix.mpi }}" ]; then USE_MPI=OFF; else USE_MPI=ON; fi && diff --git a/VERSION b/VERSION index 9084fa2f7..524cb5524 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.0 +1.1.1 diff --git a/source/docs/features.md b/source/docs/features.md index bd6a3a49a..a8d9ab5bf 100644 --- a/source/docs/features.md +++ b/source/docs/features.md @@ -34,6 +34,7 @@ manage extensions, resources, data, etc. - Built-in MPI support - Kokkos-Tools support +- OpenMP-Tools (OMPT) ### GPU Metrics @@ -66,7 +67,6 @@ manage extensions, resources, data, etc. ### Third-party API support -- OpenMP-Tools (OMPT) - TAU - LIKWID - Caliper diff --git a/source/docs/python.md b/source/docs/python.md index 6505c6df2..0d7fae4f7 100644 --- a/source/docs/python.md +++ b/source/docs/python.md @@ -124,26 +124,26 @@ Using `omnitrace-python ./example.py` with `OMNITRACE_USE_TIMEMORY=ON` and `OMNI | LABEL | COUNT | DEPTH | METRIC | SUM | |---------------------------------------------------|--------|--------|------------|--------| | |0>>> run | 1 | 0 | trip_count | 1 | -| |0>>> |_fib | 10946 | 1 | trip_count | 10946 | -| |0>>> |_fib | 4181 | 2 | trip_count | 4181 | -| |0>>> |_fib | 2584 | 3 | trip_count | 2584 | -| |0>>> |_fib | 1597 | 4 | trip_count | 1597 | -| |0>>> |_fib | 987 | 5 | trip_count | 987 | -| |0>>> |_fib | 610 | 6 | trip_count | 610 | -| |0>>> |_fib | 377 | 7 | trip_count | 377 | -| |0>>> |_fib | 233 | 8 | trip_count | 233 | -| |0>>> |_fib | 144 | 9 | trip_count | 144 | -| |0>>> |_fib | 89 | 10 | trip_count | 89 | -| |0>>> |_fib | 55 | 11 | trip_count | 55 | -| |0>>> |_fib | 34 | 12 | trip_count | 34 | -| |0>>> |_fib | 21 | 13 | trip_count | 21 | -| |0>>> |_fib | 13 | 14 | trip_count | 13 | -| |0>>> |_fib | 8 | 15 | trip_count | 8 | -| |0>>> |_fib | 5 | 16 | trip_count | 5 | -| |0>>> |_fib | 3 | 17 | trip_count | 3 | -| |0>>> |_fib | 2 | 18 | trip_count | 2 | -| |0>>> |_fib | 1 | 19 | trip_count | 1 | -| |0>>> |_fib | 1 | 20 | trip_count | 1 | +| |0>>> |_fib | 1 | 1 | trip_count | 1 | +| |0>>> |_fib | 2 | 2 | trip_count | 2 | +| |0>>> |_fib | 4 | 3 | trip_count | 4 | +| |0>>> |_fib | 8 | 4 | trip_count | 8 | +| |0>>> |_fib | 16 | 5 | trip_count | 16 | +| |0>>> |_fib | 32 | 6 | trip_count | 32 | +| |0>>> |_fib | 64 | 7 | trip_count | 64 | +| |0>>> |_fib | 128 | 8 | trip_count | 128 | +| |0>>> |_fib | 256 | 9 | trip_count | 256 | +| |0>>> |_fib | 512 | 10 | trip_count | 512 | +| |0>>> |_fib | 1024 | 11 | trip_count | 1024 | +| |0>>> |_fib | 2026 | 12 | trip_count | 2026 | +| |0>>> |_fib | 3632 | 13 | trip_count | 3632 | +| |0>>> |_fib | 5020 | 14 | trip_count | 5020 | +| |0>>> |_fib | 4760 | 15 | trip_count | 4760 | +| |0>>> |_fib | 2942 | 16 | trip_count | 2942 | +| |0>>> |_fib | 1152 | 17 | trip_count | 1152 | +| |0>>> |_fib | 274 | 18 | trip_count | 274 | +| |0>>> |_fib | 36 | 19 | trip_count | 36 | +| |0>>> |_fib | 2 | 20 | trip_count | 2 | | |0>>> |_inefficient | 1 | 1 | trip_count | 1 | |-------------------------------------------------------------------------------------------| ``` @@ -205,26 +205,26 @@ The results for both of the source-level instrumentation modes are identical to | LABEL | COUNT | DEPTH | METRIC | SUM | |---------------------------------------------------|--------|--------|------------|--------| | |0>>> run | 1 | 0 | trip_count | 1 | -| |0>>> |_fib | 10946 | 1 | trip_count | 10946 | -| |0>>> |_fib | 4181 | 2 | trip_count | 4181 | -| |0>>> |_fib | 2584 | 3 | trip_count | 2584 | -| |0>>> |_fib | 1597 | 4 | trip_count | 1597 | -| |0>>> |_fib | 987 | 5 | trip_count | 987 | -| |0>>> |_fib | 610 | 6 | trip_count | 610 | -| |0>>> |_fib | 377 | 7 | trip_count | 377 | -| |0>>> |_fib | 233 | 8 | trip_count | 233 | -| |0>>> |_fib | 144 | 9 | trip_count | 144 | -| |0>>> |_fib | 89 | 10 | trip_count | 89 | -| |0>>> |_fib | 55 | 11 | trip_count | 55 | -| |0>>> |_fib | 34 | 12 | trip_count | 34 | -| |0>>> |_fib | 21 | 13 | trip_count | 21 | -| |0>>> |_fib | 13 | 14 | trip_count | 13 | -| |0>>> |_fib | 8 | 15 | trip_count | 8 | -| |0>>> |_fib | 5 | 16 | trip_count | 5 | -| |0>>> |_fib | 3 | 17 | trip_count | 3 | -| |0>>> |_fib | 2 | 18 | trip_count | 2 | -| |0>>> |_fib | 1 | 19 | trip_count | 1 | -| |0>>> |_fib | 1 | 20 | trip_count | 1 | +| |0>>> |_fib | 1 | 1 | trip_count | 1 | +| |0>>> |_fib | 2 | 2 | trip_count | 2 | +| |0>>> |_fib | 4 | 3 | trip_count | 4 | +| |0>>> |_fib | 8 | 4 | trip_count | 8 | +| |0>>> |_fib | 16 | 5 | trip_count | 16 | +| |0>>> |_fib | 32 | 6 | trip_count | 32 | +| |0>>> |_fib | 64 | 7 | trip_count | 64 | +| |0>>> |_fib | 128 | 8 | trip_count | 128 | +| |0>>> |_fib | 256 | 9 | trip_count | 256 | +| |0>>> |_fib | 512 | 10 | trip_count | 512 | +| |0>>> |_fib | 1024 | 11 | trip_count | 1024 | +| |0>>> |_fib | 2026 | 12 | trip_count | 2026 | +| |0>>> |_fib | 3632 | 13 | trip_count | 3632 | +| |0>>> |_fib | 5020 | 14 | trip_count | 5020 | +| |0>>> |_fib | 4760 | 15 | trip_count | 4760 | +| |0>>> |_fib | 2942 | 16 | trip_count | 2942 | +| |0>>> |_fib | 1152 | 17 | trip_count | 1152 | +| |0>>> |_fib | 274 | 18 | trip_count | 274 | +| |0>>> |_fib | 36 | 19 | trip_count | 36 | +| |0>>> |_fib | 2 | 20 | trip_count | 2 | | |0>>> |_inefficient | 1 | 1 | trip_count | 1 | |-------------------------------------------------------------------------------------------| ``` diff --git a/source/docs/user_api.md b/source/docs/user_api.md index a6c93093e..2d452fa7b 100644 --- a/source/docs/user_api.md +++ b/source/docs/user_api.md @@ -125,7 +125,7 @@ custom_push_region(const char* name) ### User API Output ```console -$ omnitrace -l --min-address-range=0 --min-address-range-loop=0 --min-instructions=8 -E custom_push_region -o -- ./user-api +$ omnitrace -l --min-instructions=8 -E custom_push_region -o -- ./user-api ... $ export OMNITRACE_USE_TIMEMORY=ON $ export OMNITRACE_USE_PID=OFF @@ -150,63 +150,83 @@ Pushing custom region :: initialization [./user-api.inst] Iterations: 100 [./user-api.inst] fibonacci(20)... Pushing custom region :: thread_creation -Pushing custom region :: run(20) x 100 Pushing custom region :: thread_wait Pushing custom region :: run(20) x 100 Pushing custom region :: run(20) x 100 Pushing custom region :: run(20) x 100 Pushing custom region :: run(20) x 100 +Pushing custom region :: run(20) x 100 [./user-api.inst] fibonacci(20) x 4 = 3382500 +[omnitrace][86267][0][omnitrace_finalize] finalizing... -[omnitrace][2637959][0] omnitrace : 2.716905 sec wall_clock, 1.216 mb peak_rss, 3.680000 sec cpu_clock, 135.4 % cpu_util [laps: 1] -[omnitrace][2637959][0] user-api.inst/thread-0 : 2.715708 sec wall_clock, 2.354223 sec thread_cpu_clock, 86.7 % thread_cpu_util, 1.216 mb peak_rss [laps: 1] -[omnitrace][2637959][0] user-api.inst/thread-1 : 0.329802 sec wall_clock, 0.329739 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.000 mb peak_rss [laps: 1] -[omnitrace][2637959][0] user-api.inst/thread-2 : 0.355981 sec wall_clock, 0.335795 sec thread_cpu_clock, 94.3 % thread_cpu_util, 0.528 mb peak_rss [laps: 1] -[omnitrace][2637959][0] user-api.inst/thread-3 : 0.341329 sec wall_clock, 0.331214 sec thread_cpu_clock, 97.0 % thread_cpu_util, 0.456 mb peak_rss [laps: 1] -[omnitrace][2637959][0] user-api.inst/thread-4 : 0.360631 sec wall_clock, 0.330374 sec thread_cpu_clock, 91.6 % thread_cpu_util, 0.600 mb peak_rss [laps: 1] -[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.json'... -[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.tree.json'... -[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.txt'... +[omnitrace][86267][0] omnitrace : 5.190895 sec wall_clock, 2.748 mb peak_rss, 6.330000 sec cpu_clock, 121.9 % cpu_util [laps: 1] +[omnitrace][86267][0] user-api.inst/thread-0 : 5.078713 sec wall_clock, 4.722415 sec thread_cpu_clock, 93.0 % thread_cpu_util, 1.276 mb peak_rss [laps: 1] +[omnitrace][86267][0] user-api.inst/thread-1 : 0.322248 sec wall_clock, 0.322191 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.000 mb peak_rss [laps: 1] +[omnitrace][86267][0] user-api.inst/thread-2 : 0.323255 sec wall_clock, 0.323194 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.000 mb peak_rss [laps: 1] +[omnitrace][86267][0] user-api.inst/thread-3 : 0.323569 sec wall_clock, 0.323484 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.092 mb peak_rss [laps: 1] +[omnitrace][86267][0] user-api.inst/thread-4 : 0.324178 sec wall_clock, 0.324057 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.184 mb peak_rss [laps: 1] +[omnitrace][86267][0] Post-processing 51 cpu frequency and memory usage entries... +[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.json'... +[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.tree.json'... +[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.txt'... -[metadata::manager::finalize]> Outputting 'omnitrace-example-output/metadata.json' and 'omnitrace-example-output/functions.json'... +[omnitrace][manager::finalize][metadata]> Outputting 'omnitrace-user-api.inst-output/metadata.json' and 'omnitrace-user-api.inst-output/functions.json'... +[omnitrace][86267][0][omnitrace_finalize] Finalized $ cat omnitrace-example-output/wall_clock.txt -|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER) | -|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | MIN | MAX | VAR | STDDEV | % SELF | -|-----------------------------------------------------------------------|---------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------| -| |0>>> ./user-api.inst | 1 | 0 | wall_clock | sec | 2.715611 | 2.715611 | 2.715611 | 2.715611 | 0.000000 | 0.000000 | 0.0 | -| |0>>> |_initialization | 1 | 1 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 | -| |0>>> |_thread_creation | 1 | 1 | wall_clock | sec | 0.000170 | 0.000170 | 0.000170 | 0.000170 | 0.000000 | 0.000000 | 100.0 | -| |0>>> |_thread_wait | 1 | 1 | wall_clock | sec | 0.360751 | 0.360751 | 0.360751 | 0.360751 | 0.000000 | 0.000000 | 0.0 | -| |1>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.329472 | 0.329472 | 0.329472 | 0.329472 | 0.000000 | 0.000000 | 100.0 | -| |3>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.331028 | 0.331028 | 0.331028 | 0.331028 | 0.000000 | 0.000000 | 100.0 | -| |2>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.335554 | 0.335554 | 0.335554 | 0.335554 | 0.000000 | 0.000000 | 100.0 | -| |4>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.330220 | 0.330220 | 0.330220 | 0.330220 | 0.000000 | 0.000000 | 100.0 | -| |0>>> |_run | 1 | 1 | wall_clock | sec | 2.354618 | 2.354618 | 2.354618 | 2.354618 | 0.000000 | 0.000000 | 0.0 | -| |0>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 2.354600 | 2.354600 | 2.354600 | 2.354600 | 0.000000 | 0.000000 | 48.3 | -| |0>>> |_fib | 1094600 | 3 | wall_clock | sec | 1.217671 | 0.000001 | 0.000000 | 0.000055 | 0.000000 | 0.000002 | 41.3 | -| |0>>> |_fib | 418100 | 4 | wall_clock | sec | 0.714197 | 0.000002 | 0.000000 | 0.000050 | 0.000000 | 0.000002 | 38.1 | -| |0>>> |_fib | 258400 | 5 | wall_clock | sec | 0.441874 | 0.000002 | 0.000000 | 0.000047 | 0.000000 | 0.000002 | 37.9 | -| |0>>> |_fib | 159700 | 6 | wall_clock | sec | 0.274224 | 0.000002 | 0.000000 | 0.000044 | 0.000000 | 0.000002 | 37.9 | -| |0>>> |_fib | 98700 | 7 | wall_clock | sec | 0.170399 | 0.000002 | 0.000000 | 0.000042 | 0.000000 | 0.000002 | 37.7 | -| |0>>> |_fib | 61000 | 8 | wall_clock | sec | 0.106093 | 0.000002 | 0.000000 | 0.000039 | 0.000000 | 0.000002 | 37.5 | -| |0>>> |_fib | 37700 | 9 | wall_clock | sec | 0.066316 | 0.000002 | 0.000000 | 0.000036 | 0.000000 | 0.000002 | 40.2 | -| |0>>> |_fib | 23300 | 10 | wall_clock | sec | 0.039640 | 0.000002 | 0.000000 | 0.000033 | 0.000000 | 0.000002 | 38.2 | -| |0>>> |_fib | 14400 | 11 | wall_clock | sec | 0.024504 | 0.000002 | 0.000000 | 0.000030 | 0.000000 | 0.000002 | 37.9 | -| |0>>> |_fib | 8900 | 12 | wall_clock | sec | 0.015219 | 0.000002 | 0.000000 | 0.000027 | 0.000000 | 0.000002 | 38.1 | -| |0>>> |_fib | 5500 | 13 | wall_clock | sec | 0.009417 | 0.000002 | 0.000000 | 0.000024 | 0.000000 | 0.000002 | 38.3 | -| |0>>> |_fib | 3400 | 14 | wall_clock | sec | 0.005806 | 0.000002 | 0.000000 | 0.000021 | 0.000000 | 0.000002 | 38.4 | -| |0>>> |_fib | 2100 | 15 | wall_clock | sec | 0.003576 | 0.000002 | 0.000000 | 0.000019 | 0.000000 | 0.000002 | 38.4 | -| |0>>> |_fib | 1300 | 16 | wall_clock | sec | 0.002201 | 0.000002 | 0.000000 | 0.000016 | 0.000000 | 0.000002 | 40.3 | -| |0>>> |_fib | 800 | 17 | wall_clock | sec | 0.001315 | 0.000002 | 0.000000 | 0.000014 | 0.000000 | 0.000002 | 42.1 | -| |0>>> |_fib | 500 | 18 | wall_clock | sec | 0.000762 | 0.000002 | 0.000000 | 0.000010 | 0.000000 | 0.000001 | 42.1 | -| |0>>> |_fib | 300 | 19 | wall_clock | sec | 0.000441 | 0.000001 | 0.000000 | 0.000008 | 0.000000 | 0.000001 | 47.8 | -| |0>>> |_fib | 200 | 20 | wall_clock | sec | 0.000230 | 0.000001 | 0.000000 | 0.000006 | 0.000000 | 0.000001 | 49.0 | -| |0>>> |_fib | 100 | 21 | wall_clock | sec | 0.000117 | 0.000001 | 0.000001 | 0.000003 | 0.000000 | 0.000000 | 84.5 | -| |0>>> |_fib | 100 | 22 | wall_clock | sec | 0.000018 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 | -| |0>>> std::vector >::~vector | 1 | 0 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | -|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER) | +|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | MIN | MAX | VAR | STDDEV | % SELF | +|---------------------------------------------------------------------------------|--------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------| +| |0>>> ./user-api.inst | 1 | 0 | wall_clock | sec | 5.078521 | 5.078521 | 5.078521 | 5.078521 | 0.000000 | 0.000000 | 0.0 | +| |0>>> |_initialization | 1 | 1 | wall_clock | sec | 0.000004 | 0.000004 | 0.000004 | 0.000004 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_thread_creation | 1 | 1 | wall_clock | sec | 0.000159 | 0.000159 | 0.000159 | 0.000159 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_thread_wait | 1 | 1 | wall_clock | sec | 0.355307 | 0.355307 | 0.355307 | 0.355307 | 0.000000 | 0.000000 | 0.0 | +| |0>>> |_std::vector >::begin | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::vector >::end | 1 | 2 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_pthread_join | 4 | 2 | wall_clock | sec | 0.355257 | 0.088814 | 0.000001 | 0.333144 | 0.026559 | 0.162970 | 100.0 | +| |2>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000032 | 0.000032 | 0.000032 | 0.000032 | 0.000000 | 0.000000 | 100.0 | +| |1>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000036 | 0.000036 | 0.000036 | 0.000036 | 0.000000 | 0.000000 | 100.0 | +| |3>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000034 | 0.000034 | 0.000034 | 0.000034 | 0.000000 | 0.000000 | 100.0 | +| |4>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000039 | 0.000039 | 0.000039 | 0.000039 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_run | 1 | 1 | wall_clock | sec | 4.722993 | 4.722993 | 4.722993 | 4.722993 | 0.000000 | 0.000000 | 0.0 | +| |0>>> |_std::char_traits::length | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::distance | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::operator+, std::allocator > | 2 | 2 | wall_clock | sec | 0.000002 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 4.722951 | 4.722951 | 4.722951 | 4.722951 | 0.000000 | 0.000000 | 0.0 | +| |0>>> |_run [{94,25}-{96,25}] | 1 | 3 | wall_clock | sec | 4.722925 | 4.722925 | 4.722925 | 4.722925 | 0.000000 | 0.000000 | 0.0 | +| |0>>> |_fib | 100 | 4 | wall_clock | sec | 4.722718 | 0.047227 | 0.046713 | 0.051987 | 0.000000 | 0.000625 | 0.0 | +| |0>>> |_fib | 200 | 5 | wall_clock | sec | 4.722302 | 0.023612 | 0.017827 | 0.034091 | 0.000032 | 0.005627 | 0.0 | +| |0>>> |_fib | 400 | 6 | wall_clock | sec | 4.721485 | 0.011804 | 0.006790 | 0.023003 | 0.000016 | 0.004024 | 0.0 | +| |0>>> |_fib | 800 | 7 | wall_clock | sec | 4.719858 | 0.005900 | 0.002564 | 0.016078 | 0.000006 | 0.002498 | 0.1 | +| |0>>> |_fib | 1600 | 8 | wall_clock | sec | 4.716572 | 0.002948 | 0.000977 | 0.011849 | 0.000002 | 0.001465 | 0.1 | +| |0>>> |_fib | 3200 | 9 | wall_clock | sec | 4.709918 | 0.001472 | 0.000371 | 0.008246 | 0.000001 | 0.000831 | 0.3 | +| |0>>> |_fib | 6400 | 10 | wall_clock | sec | 4.696775 | 0.000734 | 0.000140 | 0.005111 | 0.000000 | 0.000461 | 0.6 | +| |0>>> |_fib | 12800 | 11 | wall_clock | sec | 4.670093 | 0.000365 | 0.000050 | 0.003166 | 0.000000 | 0.000253 | 1.1 | +| |0>>> |_fib | 25600 | 12 | wall_clock | sec | 4.617496 | 0.000180 | 0.000017 | 0.001959 | 0.000000 | 0.000137 | 2.3 | +| |0>>> |_fib | 51200 | 13 | wall_clock | sec | 4.512671 | 0.000088 | 0.000004 | 0.001212 | 0.000000 | 0.000074 | 4.6 | +| |0>>> |_fib | 102400 | 14 | wall_clock | sec | 4.304142 | 0.000042 | 0.000000 | 0.000752 | 0.000000 | 0.000039 | 9.6 | +| |0>>> |_fib | 202600 | 15 | wall_clock | sec | 3.892580 | 0.000019 | 0.000000 | 0.000469 | 0.000000 | 0.000021 | 19.0 | +| |0>>> |_fib | 363200 | 16 | wall_clock | sec | 3.151143 | 0.000009 | 0.000000 | 0.000293 | 0.000000 | 0.000011 | 33.2 | +| |0>>> |_fib | 502000 | 17 | wall_clock | sec | 2.105217 | 0.000004 | 0.000000 | 0.000183 | 0.000000 | 0.000006 | 49.1 | +| |0>>> |_fib | 476000 | 18 | wall_clock | sec | 1.071652 | 0.000002 | 0.000000 | 0.000114 | 0.000000 | 0.000004 | 63.6 | +| |0>>> |_fib | 294200 | 19 | wall_clock | sec | 0.390193 | 0.000001 | 0.000000 | 0.000071 | 0.000000 | 0.000003 | 75.3 | +| |0>>> |_fib | 115200 | 20 | wall_clock | sec | 0.096190 | 0.000001 | 0.000000 | 0.000043 | 0.000000 | 0.000002 | 84.4 | +| |0>>> |_fib | 27400 | 21 | wall_clock | sec | 0.015020 | 0.000001 | 0.000000 | 0.000025 | 0.000000 | 0.000001 | 91.1 | +| |0>>> |_fib | 3600 | 22 | wall_clock | sec | 0.001336 | 0.000000 | 0.000000 | 0.000013 | 0.000000 | 0.000001 | 96.3 | +| |0>>> |_fib | 200 | 23 | wall_clock | sec | 0.000050 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::char_traits::length | 1 | 3 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::distance | 1 | 3 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::operator+, std::allocator > | 2 | 3 | wall_clock | sec | 0.000001 | 0.000001 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::operator& | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +| |0>>> std::vector >::~vector | 1 | 0 | wall_clock | sec | 0.000045 | 0.000045 | 0.000045 | 0.000045 | 0.000000 | 0.000000 | 32.7 | +| |0>>> |_std::thread::~thread | 4 | 1 | wall_clock | sec | 0.000030 | 0.000007 | 0.000007 | 0.000009 | 0.000000 | 0.000001 | 31.2 | +| |0>>> |_std::thread::joinable | 4 | 2 | wall_clock | sec | 0.000021 | 0.000005 | 0.000005 | 0.000006 | 0.000000 | 0.000001 | 89.4 | +| |0>>> |_std::thread::id::id | 4 | 3 | wall_clock | sec | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::operator== | 4 | 3 | wall_clock | sec | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::allocator_traits >::deallocate | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +| |0>>> |_std::allocator::~allocator | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 | +|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ``` diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 08f9db5f4..bbe92dab7 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -696,8 +696,10 @@ omnitrace_init_tooling_hidden() if(get_state() != State::Finalized) { if(get_use_sampling()) sampling::shutdown(); - if(thread_data::instance()) - thread_data::instance()->stop(); + auto& _thr_bundle = thread_data::instance(); + if(_thr_bundle && _thr_bundle->get() && + _thr_bundle->get()->get_is_running()) + _thr_bundle->stop(); } } }; (void) _thread_setup; @@ -744,6 +746,7 @@ omnitrace_init_tooling_hidden() _data.allocator.destroy(_v); _data.allocator.deallocate(_v, 1); _data.bundles.erase(_data.bundles.begin() + (i - 1)); + break; } } }; @@ -1122,7 +1125,6 @@ omnitrace_finalize_hidden(void) if(itr && itr->get() && !itr->get()->get_is_running()) { - continue; std::string _msg = JOIN("", *itr); auto _pos = _msg.find(">>> "); if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5); diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index 7d1389000..030c6e245 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -109,6 +109,8 @@ pthread_create_gotcha::wrapper::wrapper(routine_t _routine, void* _arg, void* pthread_create_gotcha::wrapper::operator()() const { + using thread_bundle_data_t = thread_data; + if(is_shutdown && *is_shutdown) { if(m_promise) m_promise->set_value(); @@ -134,6 +136,10 @@ pthread_create_gotcha::wrapper::operator()() const if(_tid >= 0) { get_execution_time(_tid)->second = comp::wall_clock::record(); + auto& _thr_bundle = thread_bundle_data_t::instance(); + if(_thr_bundle && _thr_bundle->get() && + _thr_bundle->get()->get_is_running()) + _thr_bundle->stop(); pthread_create_gotcha::shutdown(_tid); } set_thread_state(ThreadState::Completed); @@ -146,6 +152,13 @@ pthread_create_gotcha::wrapper::operator()() const { _tid = threading::get_id(); threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str()); + if(!thread_bundle_data_t::instances().at(_tid)) + { + thread_data::construct( + TIMEMORY_JOIN("", get_exe_name(), "/thread-", threading::get_id()), + quirk::config{}); + thread_bundle_data_t::instances().at(_tid)->start(); + } if(bundles && bundles_mutex) { std::unique_lock _lk{ *bundles_mutex }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5656107fb..abd88ee9c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -886,12 +886,15 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS}) omnitrace_add_python_test( NAME python-source-check - COMMAND ${OMNITRACE_CAT_COMMAND} + COMMAND + ${_PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py + -l run fib fib fib fib fib inefficient _sum -c 5 5 10 20 30 10 5 5 -d 0 1 + 2 3 4 5 1 2 -m trip_count -i PYTHON_VERSION ${_VERSION} - FILE omnitrace-tests-output/python-source/${_VERSION}/trip_count.txt - PASS_REGEX - "(\\\| \\\|0>>> run \\\| 5).*(\\\| \\\|0>>> \\\|_fib \\\| 40).*(\\\| \\\|0>>> \\\|_fib \\\| 5).*(\\\| \\\|0>>> \\\|_inefficient \\\| 5).*(\\\| \\\|0>>> \\\|__sum \\\| 5)" + FILE omnitrace-tests-output/python-source/${_VERSION}/trip_count.json DEPENDS python-source-${_VERSION} + PASS_REGEX + "omnitrace-tests-output/python-source/${_VERSION}/trip_count.json validated" ENVIRONMENT "${_python_environment}") else() omnitrace_message( diff --git a/tests/validate-timemory-json.py b/tests/validate-timemory-json.py new file mode 100755 index 000000000..f915fc718 --- /dev/null +++ b/tests/validate-timemory-json.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +import sys +import json +import argparse + + +def validate_json(data, labels, counts, depths): + expected = [] + for litr, citr, ditr in zip(labels, counts, depths): + entry = [] + _label = litr + if ditr > 0: + _label = "{}|_{}".format(" " * (ditr - 1), litr) + entry = [_label, citr, ditr] + expected.append(entry) + + for ditr, eitr in zip(data, expected): + _prefix = ditr["prefix"] + _depth = ditr["depth"] + _count = ditr["entry"]["laps"] + _idx = _prefix.find(">>>") + if _idx is not None: + _prefix = _prefix[(_idx + 4) :] + if _prefix != eitr[0]: + raise RuntimeError(f"Mismatched prefix: {_prefix} vs. {eitr[0]}") + if _count != eitr[1]: + raise RuntimeError(f"Mismatched depth: {_depth} vs. {eitr[2]}") + if _depth != eitr[2]: + raise RuntimeError(f"Mismatched depth: {_depth} vs. {eitr[2]}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--metric", type=str, help="JSON metric", required=True) + parser.add_argument( + "-l", "--labels", nargs="+", type=str, help="Expected labels", default=[] + ) + parser.add_argument( + "-c", "--counts", nargs="+", type=int, help="Expected counts", default=[] + ) + parser.add_argument( + "-d", "--depths", nargs="+", type=int, help="Expected depths", default=[] + ) + parser.add_argument("-i", "--input", type=str, help="Input file", required=True) + + args = parser.parse_args() + + if len(args.labels) != len(args.counts) or len(args.labels) != len(args.depths): + raise RuntimeError( + "The same number of labels, counts, and depths must be specified" + ) + + ret = 0 + with open(args.input) as f: + data = json.load(f) + try: + validate_json( + data["timemory"][args.metric]["ranks"][0]["graph"], + args.labels, + args.counts, + args.depths, + ) + except RuntimeError as e: + print(f"{e}") + ret = 1 + if ret == 0: + print(f"{args.input} validated") + sys.exit(ret)