LCOV - code coverage report
Current view: top level - Source - energyMonitor.cpp (source / functions) Coverage Total Hit
Test: coverage Lines: 43.0 % 135 58
Test Date: 2026-03-02 16:42:41 Functions: 59.1 % 22 13

            Line data    Source code
       1              : #include "energyMonitor.hpp"
       2              : 
       3              : #include <filesystem>
       4              : 
       5              : namespace {
       6              :    std::unique_ptr<EnergyMonitor> energyMonitor;
       7              : }
       8              : 
       9          208 : EnergyMonitor* getEnergyMonitor() { 
      10          208 :    return energyMonitor.get(); 
      11              : }
      12              : 
      13           22 : EnergyMonitor::EnergyMonitor(int period_ms): 
      14           22 :    _period(period_ms), 
      15           44 :    _ncores_phys(std::max(1, countPhysicalCores())) {}
      16              : 
      17           22 : EnergyMonitor::~EnergyMonitor() { 
      18           22 :    stop(); 
      19           22 : }
      20              : 
      21           22 : void EnergyMonitor::start() {
      22           22 :    _startTime = Clock::now();
      23           22 :    _domains = findEnergyDomains();
      24           22 :    if (_domains.empty()) {
      25           22 :       Logging::LogIt(Logging::logWarn) << "No RAPL energy domains found";
      26           22 :       return;
      27              :    }
      28              : 
      29            0 :    for (const auto& d : _domains) {
      30            0 :       const long long v = readUj(d.energy_path.c_str());
      31            0 :       if (v < 0) warnPermission(d.energy_path);
      32            0 :       _lastEnergy[d.energy_path] = v;
      33              :    }
      34              : 
      35              :    _running = true;
      36            0 :    _worker = std::thread(&EnergyMonitor::loop, this);
      37              : }
      38              : 
      39           43 : void EnergyMonitor::stop() {
      40              :    _running = false;
      41           43 :    if (_worker.joinable()) _worker.join();
      42           43 : }
      43              : 
      44            0 : double EnergyMonitor::cpuW() const { 
      45            0 :    return _cpuWLocal.load(std::memory_order_relaxed); 
      46              : }
      47              : 
      48            0 : double EnergyMonitor::ramW() const { 
      49            0 :    return _ramWLocal.load(std::memory_order_relaxed); 
      50              : }
      51              : 
      52              : #ifdef WITH_MPI
      53              : PowerResult EnergyMonitor::reducePower(int root, MPI_Comm comm) const {
      54              :    const double cpu = cpuW();
      55              :    const double ram = ramW();
      56              : 
      57              :    PowerResult result{0.0, 0.0};
      58              :    MPI_Reduce(&cpu, &result.cpu_sum, 1, MPI_DOUBLE, MPI_SUM, root, comm);
      59              :    MPI_Reduce(&ram, &result.ram_sum, 1, MPI_DOUBLE, MPI_SUM, root, comm);
      60              : 
      61              :    return result;
      62              : }
      63              : #endif
      64              : 
      65          229 : void EnergyMonitor::reportValues(double cpu, double ram, const std::string& unit, Logging::LogLevel level) const {
      66              :    if (Distributed::isMainProcess()) {
      67          229 :       Logging::LogIt(level) << "CPU " << cpu << " " << unit 
      68              :                             << ", RAM " << ram << " " << unit;
      69              :    }
      70          229 : }
      71              : 
      72            0 : void EnergyMonitor::reportPower(Logging::LogLevel level) const {
      73              : #ifdef WITH_MPI
      74              :    const PowerResult result = reducePower();
      75              :    if (Distributed::isMainProcess()) {
      76              :       reportValues(result.cpu_sum, result.ram_sum, "W", level);
      77              :    }
      78              : #else
      79            0 :    reportValues(cpuW(), ramW(), "W", level);
      80              : #endif
      81            0 : }
      82              : 
      83          229 : void EnergyMonitor::reportEnergy(TimeType durationMs, Logging::LogLevel level) const {
      84          229 :    const double hours = durationMs / 1000.0 / 3600.0;
      85              : #ifdef WITH_MPI
      86              :    const PowerResult result = reducePower();
      87              :    if (Distributed::isMainProcess()) {
      88              :       reportValues(result.cpu_sum * hours, result.ram_sum * hours, "Wh", level);
      89              :    }
      90              : #else
      91          229 :    reportValues(cpuW() * hours, ramW() * hours, "Wh", level);
      92              : #endif
      93          229 : }
      94              : 
      95          229 : void EnergyMonitor::reportCost(TimeType durationMs, Logging::LogLevel level) const {
      96          229 :    const double hours = durationMs / 1000.0 / 3600.0;
      97              : #ifdef WITH_MPI
      98              :    const PowerResult result = reducePower();
      99              :    if (Distributed::isMainProcess()) {
     100              :       const double cpuCost = (result.cpu_sum * hours) / 1000.0 * kWhPriceEuro;
     101              :       const double ramCost = (result.ram_sum * hours) / 1000.0 * kWhPriceEuro;
     102              :       Logging::LogIt(level) << "CPU " << cpuCost << " €"
     103              :                             << ", RAM " << ramCost << " €"
     104              :                             << " (" << kWhPriceEuro << " €/kWh)";
     105              :    }
     106              : #else
     107          229 :    const double cpuCost = (cpuW() * hours) / 1000.0 * kWhPriceEuro;
     108          229 :    const double ramCost = (ramW() * hours) / 1000.0 * kWhPriceEuro;
     109              :    if (Distributed::isMainProcess()) {
     110          229 :       Logging::LogIt(level) << "CPU " << cpuCost << " €"
     111              :                             << ", RAM " << ramCost << " €"
     112              :                             << " (" << kWhPriceEuro << " €/kWh)";
     113              :    }
     114              : #endif
     115          229 : }
     116              : 
     117            0 : CpuTimes EnergyMonitor::readCpuTimes() {
     118            0 :    std::ifstream f("/proc/stat");
     119              :    std::string   line;
     120            0 :    CpuTimes      t{};
     121            0 :    if (std::getline(f, line)) {
     122            0 :       std::istringstream ss(line);
     123              :       std::string        cpu;
     124            0 :       ss >> cpu >> t.user >> t.nice >> t.system >> t.idle >> t.iowait >> t.irq >> t.softirq >> t.steal;
     125            0 :    }
     126            0 :    return t;
     127            0 : }
     128              : 
     129            0 : double EnergyMonitor::computeLoadFraction(const CpuTimes& prev, const CpuTimes& cur) {
     130            0 :    const long long prevIdle  = prev.idle + prev.iowait;
     131            0 :    const long long curIdle   = cur.idle + cur.iowait;
     132            0 :    const long long prevTotal = prev.user + prev.nice + prev.system + prevIdle + prev.irq + prev.softirq + prev.steal;
     133            0 :    const long long curTotal  = cur.user + cur.nice + cur.system + curIdle + cur.irq + cur.softirq + cur.steal;
     134            0 :    const long long deltaTotal = curTotal - prevTotal;
     135            0 :    const long long deltaIdle  = curIdle - prevIdle;
     136              : 
     137            0 :    if (deltaTotal <= 0) return 0.0;
     138            0 :    return double(deltaTotal - deltaIdle) / double(deltaTotal);
     139              : }
     140              : 
     141           22 : int EnergyMonitor::countPhysicalCores() {
     142           22 :    std::ifstream f("/proc/cpuinfo");
     143              :    std::string   line;
     144              :    std::set<std::pair<int, int>> cores;
     145              :    int phys = -1, core = -1;
     146              : 
     147         2486 :    while (std::getline(f, line)) {
     148         2464 :       if (line.find("physical id") != std::string::npos)
     149          176 :          phys = std::stoi(line.substr(line.find(":") + 1));
     150         2376 :       else if (line.find("core id") != std::string::npos) {
     151           88 :          core = std::stoi(line.substr(line.find(":") + 1));
     152          176 :          if (phys >= 0 && core >= 0) cores.insert({phys, core});
     153              :       }
     154              :    }
     155           22 :    if (!cores.empty()) return static_cast<int>(cores.size());
     156              : #if defined(_SC_NPROCESSORS_ONLN)
     157            0 :    return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
     158              : #else
     159              :    const unsigned int n = std::thread::hardware_concurrency();
     160              :    return static_cast<int>(n > 0 ? n : 1u);
     161              : #endif
     162           22 : }
     163              : 
     164            0 : long long EnergyMonitor::readUj(const char* path) {
     165            0 :    std::ifstream f(path);
     166            0 :    if (!f) return -1;
     167            0 :    long long v = 0;
     168              :    f >> v;
     169            0 :    return v;
     170            0 : }
     171              : 
     172            0 : std::string EnergyMonitor::readFile(const std::string& path) {
     173            0 :    std::ifstream f(path);
     174              :    std::string   s;
     175            0 :    std::getline(f, s);
     176            0 :    return s;
     177            0 : }
     178              : 
     179            0 : void EnergyMonitor::warnPermission(const std::string& p) {
     180              :    static std::atomic<bool> warned{false};
     181            0 :    if (!warned.exchange(true)) {
     182            0 :       Logging::LogIt(Logging::logWarn) << "\n[WARNING] Cannot read RAPL energy counters.\n"
     183              :                                        << "Check permissions on:\n  " << p << "\n"
     184              :                                        << "Try: sudo chmod a+r /sys/class/powercap/*/energy_uj\n";
     185              :    }
     186            0 : }
     187              : 
     188           22 : std::vector<EnergyDomain> EnergyMonitor::findEnergyDomains() const {
     189           22 :    std::vector<EnergyDomain> out;
     190           66 :    for (const auto& p : std::filesystem::directory_iterator("/sys/class/powercap")) {
     191            0 :       const auto energy = p.path() / "energy_uj";
     192            0 :       const auto name   = p.path() / "name";
     193              : 
     194              :       if (std::filesystem::exists(energy) && std::filesystem::exists(name)) {
     195              :          EnergyDomain d;
     196            0 :          d.energy_path = energy.string();
     197            0 :          d.name        = readFile(name.string());
     198            0 :          out.push_back(d);
     199              :       }
     200            0 :    }
     201           22 :    return out;
     202            0 : }
     203              : 
     204            0 : void EnergyMonitor::loop() {
     205            0 :    CpuTimes prevCpu = readCpuTimes();
     206              : 
     207            0 :    while (_running) {
     208            0 :       const auto t0     = std::chrono::steady_clock::now();
     209              :       const auto eStart = _lastEnergy;
     210              : 
     211            0 :       std::this_thread::sleep_for(std::chrono::milliseconds(_period));
     212              : 
     213              :       double cpuJ = 0.0;
     214              :       double ramJ = 0.0;
     215              : 
     216            0 :       for (const auto& d : _domains) {
     217            0 :          const long long e = readUj(d.energy_path.c_str());
     218            0 :          if (e < 0) continue;
     219              : 
     220            0 :          const double deltaJ = (e - eStart.at(d.energy_path)) * 1e-6;
     221            0 :          _lastEnergy[d.energy_path] = e;
     222              : 
     223              :          std::string lname = d.name;
     224            0 :          for (auto& c : lname) c = std::tolower(c);
     225              : 
     226            0 :          if (lname.find("package") != std::string::npos || lname.find("cpu") != std::string::npos)
     227            0 :             cpuJ += deltaJ;
     228            0 :          else if (lname.find("dram") != std::string::npos || lname.find("mem") != std::string::npos)
     229            0 :             ramJ += deltaJ;
     230              :       }
     231              : 
     232            0 :       const auto   t1       = std::chrono::steady_clock::now();
     233              :       const double dt       = std::chrono::duration<double>(t1 - t0).count();
     234            0 :       const CpuTimes curCpu = readCpuTimes();
     235            0 :       const double loadFrac = computeLoadFraction(prevCpu, curCpu);
     236            0 :       prevCpu               = curCpu;
     237              : 
     238            0 :       double activePhys = loadFrac * _ncores_phys;
     239            0 :       if (activePhys < 0.1) activePhys = 1.0;
     240              : 
     241            0 :       _cpuWLocal.store(cpuJ / dt / activePhys, std::memory_order_relaxed);
     242            0 :       _ramWLocal.store(ramJ / dt / activePhys, std::memory_order_relaxed);
     243              :    }
     244            0 : }
     245              : 
     246              : namespace {
     247              :    struct EnergyMonitorInitializer {
     248           22 :       static void create(int period_ms = 500) {
     249           44 :          energyMonitor = std::make_unique<EnergyMonitor>(period_ms);
     250           22 :       }
     251              :       static void destroy() {
     252              :          energyMonitor.reset();
     253              :       }
     254              :    };
     255              : }
     256              : 
     257           22 : void initEnergyMonitor(int period_ms) {
     258           22 :    EnergyMonitorInitializer::create(period_ms);
     259           22 :    if (energyMonitor) energyMonitor->start();
     260           22 : }
     261              : 
     262           21 : void finalizeEnergyMonitor() {
     263           21 :    if (energyMonitor) {
     264           21 :       energyMonitor->stop();
     265              :       const TimeType totalDuration = getTimeDiff(energyMonitor->getStartTime());
     266              :       if (Distributed::isMainProcess()) {
     267           21 :          Logging::LogIt(Logging::logInfo) << "Total execution time: " << totalDuration << " ms";
     268           21 :          energyMonitor->reportEnergy(totalDuration, Logging::logInfo);
     269           21 :          energyMonitor->reportCost(totalDuration, Logging::logInfo);
     270              :       }
     271              :       EnergyMonitorInitializer::destroy();
     272              :    }
     273           21 : }
        

Generated by: LCOV version 2.0-1