diff --git a/src/gpu.cu b/src/gpu.cu index 7d00c6b..6fbb239 100644 --- a/src/gpu.cu +++ b/src/gpu.cu @@ -1866,7 +1866,7 @@ void GpuThread::run() { uint64_t scaled_total_inputs = stage.total_inputs * stage.inputs_multiplier; auto [scaled_input_speed, input_speed_unit] = scale_si(scaled_total_inputs / stage.total_time); auto [scaled_output_speed, output_speed_unit] = scale_si(stage.total_outputs / stage.total_time); - std::printf("%-20s - %9.3f ms | %7.3f %% | %12" PRIu64 " -> %12" PRIu64 + std::printf("%-20s - %9.3f ms | %7.3f %% | %16" PRIu64 " -> %12" PRIu64 " | 1 in %11.3f | %7.3f %cips | %7.3f %cops\n", stage.name.c_str(), stage.total_time * 1e3, stage.total_time / host_total_time * 100.0, @@ -1882,7 +1882,7 @@ void GpuThread::run() { auto [scaled_input_speed, input_speed_unit] = scale_si(total_inputs / host_total_time); auto [scaled_output_speed, output_speed_unit] = scale_si(total_outputs / host_total_time); std::printf( - "total - %9.3f ms | %7.3f %% | %12" PRIu64 + "total - %9.3f ms | %7.3f %% | %16" PRIu64 " -> %12" PRIu64 " | | %7.3f %cips | %7.3f %cops\n", host_total_time * 1e3, kernel_total_time / host_total_time * 100.0, total_inputs, total_outputs, scaled_input_speed, input_speed_unit, diff --git a/src/main.cpp b/src/main.cpp index bf6603c..449b2d9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,12 +14,19 @@ #include #include #include +#include #include #include #include #include #include +static std::atomic_bool running{true}; + +static void signal_handler(int) { + running.store(false, std::memory_order_relaxed); +} + #ifdef NO_GPU constexpr bool no_gpu = true; #else @@ -244,7 +251,10 @@ int main_inner(int argc, char **argv) { } #endif - for (size_t i = 0;; i++) { + std::signal(SIGINT, signal_handler); + std::signal(SIGTERM, signal_handler); + + for (size_t i = 0; running.load(std::memory_order_relaxed); i++) { if (threads != 0) { std::lock_guard lock(cpu_outputs.mutex); while (!cpu_outputs.queue.empty()) { @@ -264,10 +274,20 @@ int main_inner(int argc, char **argv) { std::this_thread::sleep_for(std::chrono::seconds(1)); } + std::printf("\nShutting down...\n"); + #ifndef NO_GPU for (auto &thread : gpu_threads) { (*thread).stop(); } + std::printf("Waiting for GPU batches to finish...\n"); + for (auto &thread : gpu_threads) { + (*thread).join(); + } + { + uint64_t total_seeds_checked = seed_range.pos.load(std::memory_order_relaxed) - start_seed; + std::printf("Start seed: %" PRIi64 ", Total seeds checked: %" PRIu64 "\n", start_seed, total_seeds_checked); + } #endif #ifndef NO_CPU for (auto &thread : cpu_threads) { @@ -283,11 +303,6 @@ int main_inner(int argc, char **argv) { } #endif -#ifndef NO_GPU - for (auto &thread : gpu_threads) { - (*thread).join(); - } -#endif #ifndef NO_CPU for (auto &thread : cpu_threads) { (*thread).join();