From 05a3750b03200e3244e2e34280a49f7f7ddb76d8 Mon Sep 17 00:00:00 2001 From: localhorst Date: Sat, 2 May 2026 12:25:21 +0200 Subject: [PATCH] cleanup and shred multi-armed bandit --- include/reHDD.h | 2 +- include/shred.h | 56 +++++++-------- scripts/reHDDLogUploader.bash | 6 -- scripts/reHDDLogUploader.service | 18 ----- scripts/reHDDLogUploader.timer | 11 --- src/shred.cpp | 119 ++++++++++++++++++++++--------- 6 files changed, 113 insertions(+), 99 deletions(-) delete mode 100644 scripts/reHDDLogUploader.bash delete mode 100644 scripts/reHDDLogUploader.service delete mode 100644 scripts/reHDDLogUploader.timer diff --git a/include/reHDD.h b/include/reHDD.h index d63f7f0..f4e0af4 100644 --- a/include/reHDD.h +++ b/include/reHDD.h @@ -8,7 +8,7 @@ #ifndef REHDD_H_ #define REHDD_H_ -#define REHDD_VERSION "V1.4.0-dev" +#define REHDD_VERSION "V1.4.0" // Drive handling Settings #define WORSE_HOURS 19200 // mark drive if at this limit or beyond diff --git a/include/shred.h b/include/shred.h index 5949e1e..ecf5dcc 100644 --- a/include/shred.h +++ b/include/shred.h @@ -18,26 +18,22 @@ #include #include -// Adaptive chunk size optimization - uncomment to enable -#define ADAPTIVE_CHUNK_SIZE - +// Adaptive chunk size optimization with multi-armed bandit - always enabled // Chunk size configuration -#define CHUNK_SIZE_START 1024 * 1024 * 32 // Starting chunk size: 32MB -#define CHUNK_SIZE_MIN 1024 * 1024 * 4 // Minimum chunk size: 4MB -#define CHUNK_SIZE_MAX 1024 * 1024 * 128 // Maximum chunk size: 128MB -#define CHUNK_SIZE_STEP_UP 1024 * 1024 * 2 // Increase step: 2MB -#define CHUNK_SIZE_STEP_DOWN 1024 * 1024 * 4 // Decrease step: 4MB -#define CHUNK_MEASURE_INTERVAL 64 // Measure performance every 64 chunks +#define CHUNK_SIZE_START 1024 * 1024 * 32 // Starting chunk size: 32MB +#define CHUNK_SIZE_MIN 1024 * 1024 * 16 // Minimum chunk size: 16MB (increased from 4MB to prevent premature convergence) +#define CHUNK_SIZE_MAX 1024 * 1024 * 128 // Maximum chunk size: 128MB +#define CHUNK_SIZE_STEP_UP 1024 * 1024 * 4 // Increase step: 4MB (symmetric with step down) +#define CHUNK_SIZE_STEP_DOWN 1024 * 1024 * 4 // Decrease step: 4MB (symmetric exploration) +#define CHUNK_MEASURE_INTERVAL 64 // Measure performance every 64 chunks -#ifdef ADAPTIVE_CHUNK_SIZE -// Use max buffer size when adaptive mode is enabled +// Multi-armed bandit exploration parameters +#define EXPLORATION_EPSILON 0.10 // 10% exploration rate (epsilon-greedy) +#define REEXPLORATION_INTERVAL 500 // Force re-exploration every 500 chunks + +// Buffer sizes - always use maximum for adaptive mode #define CHUNK_SIZE CHUNK_SIZE_MAX #define TFNG_DATA_SIZE CHUNK_SIZE_MAX -#else -// Use fixed chunk size when adaptive mode is disabled -#define CHUNK_SIZE CHUNK_SIZE_START -#define TFNG_DATA_SIZE CHUNK_SIZE -#endif // #define DEMO_DRIVE_SIZE 1024*1024*256L // 256MB // #define DEMO_DRIVE_SIZE 1024*1024*1024L // 1GB @@ -52,47 +48,49 @@ protected: public: Shred(); ~Shred(); - int shredDrive(Drive* drive, int* ipSignalFd); + int shredDrive(Drive *drive, int *ipSignalFd); private: fileDescriptor randomSrcFileDiscr; fileDescriptor driveFileDiscr; -#ifdef ADAPTIVE_CHUNK_SIZE - unsigned char* caTfngData; // Dynamic buffer allocation for adaptive mode - unsigned char* caReadBuffer; // Dynamic buffer allocation for adaptive mode -#else - unsigned char caTfngData[TFNG_DATA_SIZE]; - unsigned char caReadBuffer[CHUNK_SIZE]; -#endif + unsigned char *caTfngData; + unsigned char *caReadBuffer; unsigned long ulDriveByteSize; unsigned long ulDriveByteOverallCount = 0; // all bytes shredded in all iterations + checking -> used for progress calculation double d32Percent = 0.0; double d32TmpPercent = 0.0; -#ifdef ADAPTIVE_CHUNK_SIZE // Adaptive chunk size optimization members size_t currentChunkSize; size_t bestChunkSize; unsigned int chunkCounter; + unsigned int totalChunkCounter; // Total chunks written (for periodic re-exploration) std::chrono::high_resolution_clock::time_point measurementStartTime; double bestThroughputMBps; double lastThroughputMBps; unsigned long bytesWrittenInMeasurement; bool throughputIncreasing; + // Multi-armed bandit exploration state + bool explorationMode; // Currently in exploration mode? + size_t explorationChunkSize; // Chunk size being tested during exploration + // Adaptive methods void startMeasurement(); - void evaluateThroughput(Drive* drive); - void adjustChunkSize(Drive* drive); + void evaluateThroughput(Drive *drive); + void adjustChunkSize(Drive *drive); size_t getCurrentChunkSize() const; -#endif + + // Multi-armed bandit methods + bool shouldExplore(); // Decide: explore or exploit? + void performExploration(Drive *drive); // Execute exploration phase inline double calcProgress(); int iRewindDrive(fileDescriptor file); long getDriveSizeInBytes(fileDescriptor file); - unsigned int uiCalcChecksum(fileDescriptor file, Drive* drive, int* ipSignalFd); + unsigned int uiCalcChecksum(fileDescriptor file, Drive *drive, int *ipSignalFd); void cleanup(); }; diff --git a/scripts/reHDDLogUploader.bash b/scripts/reHDDLogUploader.bash deleted file mode 100644 index b384aa2..0000000 --- a/scripts/reHDDLogUploader.bash +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# remove comment for the following to activate log telemetie -curl -k -T /root/reHDD/reHDD.log -u "__Place_your_token_here__:" -H 'X-Requested-With: XMLHttpRequest' https://schuttercloud.com/public.php/webdav/`echo $(date '+%Y-%m-%d_%H-%M')`_reHDD.log -rm -f /root/reHDD/reHDD.log - - diff --git a/scripts/reHDDLogUploader.service b/scripts/reHDDLogUploader.service deleted file mode 100644 index 724addd..0000000 --- a/scripts/reHDDLogUploader.service +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=reHDD log uploader -After=syslog.target -After=network.target -After=network-online.target -Wants=network-online.target - -[Service] -Type=oneshot -User=root -Group=root -RemainAfterExit=yes -ExecStart=/usr/bin/bash /root/reHDD/scripts/reHDDLogUploader.bash - -[Install] -WantedBy=multi-user.target - - diff --git a/scripts/reHDDLogUploader.timer b/scripts/reHDDLogUploader.timer deleted file mode 100644 index 86cc3f6..0000000 --- a/scripts/reHDDLogUploader.timer +++ /dev/null @@ -1,11 +0,0 @@ -[Unit] -Description=reHDD log uploader timer - -[Timer] -OnActiveSec=30s -OnBootSec=10min -OnUnitActiveSec=12h - -[Install] -WantedBy=basic.target - diff --git a/src/shred.cpp b/src/shred.cpp index faac86a..7cdbacd 100644 --- a/src/shred.cpp +++ b/src/shred.cpp @@ -6,6 +6,8 @@ */ #include "../include/reHDD.h" +#include // For rand(), srand() +#include // For time() to seed random number generator using namespace std; #ifdef __cplusplus @@ -21,7 +23,9 @@ const static char *randomsrc = (char *)"/dev/urandom"; Shred::Shred() { -#ifdef ADAPTIVE_CHUNK_SIZE + // Seed random number generator for epsilon-greedy exploration + srand(static_cast(time(nullptr))); + // Allocate aligned buffers for maximum chunk size if (posix_memalign((void **)&caTfngData, 4096, CHUNK_SIZE_MAX) != 0) { @@ -39,19 +43,25 @@ Shred::Shred() currentChunkSize = CHUNK_SIZE_START; bestChunkSize = CHUNK_SIZE_START; chunkCounter = 0; + totalChunkCounter = 0; // Track total chunks for periodic re-exploration bestThroughputMBps = 0.0; lastThroughputMBps = 0.0; bytesWrittenInMeasurement = 0; throughputIncreasing = true; - Logger::logThis()->info("Adaptive chunk size optimization ENABLED - Starting with " + + // Initialize multi-armed bandit exploration state + explorationMode = false; + explorationChunkSize = CHUNK_SIZE_START; + + Logger::logThis()->info("Adaptive chunk size optimization ENABLED (Multi-Armed Bandit) - Starting with " + to_string(currentChunkSize / (1024 * 1024)) + " MB chunks"); -#endif + Logger::logThis()->info("Exploration strategy: " + to_string((int)(EXPLORATION_EPSILON * 100)) + + "% epsilon-greedy + periodic re-exploration every " + + to_string(REEXPLORATION_INTERVAL) + " chunks"); } Shred::~Shred() { -#ifdef ADAPTIVE_CHUNK_SIZE if (caTfngData != nullptr) { free(caTfngData); @@ -62,10 +72,8 @@ Shred::~Shred() free(caReadBuffer); caReadBuffer = nullptr; } -#endif } -#ifdef ADAPTIVE_CHUNK_SIZE /** * \brief Start performance measurement interval * \return void @@ -125,7 +133,55 @@ void Shred::evaluateThroughput(Drive *drive) } /** - * \brief Adjust chunk size based on throughput trend + * \brief Determine if we should explore (epsilon-greedy + periodic re-exploration) + * \return true if should explore, false if should exploit + */ +bool Shred::shouldExplore() +{ + // Periodic re-exploration: every REEXPLORATION_INTERVAL chunks + if (totalChunkCounter > 0 && (totalChunkCounter % REEXPLORATION_INTERVAL) == 0) + { + return true; + } + + // Epsilon-greedy: random exploration with probability EXPLORATION_EPSILON + double randomValue = static_cast(rand()) / RAND_MAX; + return (randomValue < EXPLORATION_EPSILON); +} + +/** + * \brief Perform exploration - try a random chunk size + * \param pointer to Drive instance + * \return void + */ +void Shred::performExploration(Drive *drive) +{ + size_t savedChunkSize = currentChunkSize; + + // Generate random chunk size between MIN and MAX (aligned to 4MB boundaries) + size_t numSteps = (CHUNK_SIZE_MAX - CHUNK_SIZE_MIN) / CHUNK_SIZE_STEP_UP; + size_t randomStep = rand() % (numSteps + 1); + explorationChunkSize = CHUNK_SIZE_MIN + (randomStep * CHUNK_SIZE_STEP_UP); + + // Clamp to valid range + if (explorationChunkSize < CHUNK_SIZE_MIN) + explorationChunkSize = CHUNK_SIZE_MIN; + if (explorationChunkSize > CHUNK_SIZE_MAX) + explorationChunkSize = CHUNK_SIZE_MAX; + + // Enter exploration mode + explorationMode = true; + currentChunkSize = explorationChunkSize; + + Logger::logThis()->info("EXPLORATION MODE: Testing " + + to_string(explorationChunkSize / (1024 * 1024)) + " MB chunks " + + "(was " + to_string(savedChunkSize / (1024 * 1024)) + " MB, best: " + + to_string(bestChunkSize / (1024 * 1024)) + " MB)" + + " - Drive: " + drive->getSerial()); +} + +/** + * \brief Adjust chunk size based on throughput trend (Multi-Armed Bandit) * \param pointer to Drive instance * \return void */ @@ -133,9 +189,28 @@ void Shred::adjustChunkSize(Drive *drive) { size_t oldChunkSize = currentChunkSize; + // Check if we should explore instead of exploit + if (shouldExplore()) + { + performExploration(drive); + return; + } + + // Exit exploration mode if we were in it + if (explorationMode) + { + explorationMode = false; + currentChunkSize = bestChunkSize; // Return to best known chunk size + Logger::logThis()->info("EXPLORATION ENDED - Returning to best known: " + + to_string(bestChunkSize / (1024 * 1024)) + " MB" + + " - Drive: " + drive->getSerial()); + return; + } + + // Normal exploitation mode: hill-climbing with symmetric steps if (throughputIncreasing) { - // Throughput is improving - increase chunk size + // Throughput is improving - increase chunk size (symmetric step) currentChunkSize += CHUNK_SIZE_STEP_UP; // Clamp to maximum @@ -149,7 +224,7 @@ void Shred::adjustChunkSize(Drive *drive) } else { - // Throughput decreased - decrease chunk size to find sweet spot + // Throughput decreased - decrease chunk size (symmetric step) if (currentChunkSize > CHUNK_SIZE_STEP_DOWN) { currentChunkSize -= CHUNK_SIZE_STEP_DOWN; @@ -182,7 +257,6 @@ size_t Shred::getCurrentChunkSize() const { return currentChunkSize; } -#endif /** * \brief shred drive with shred @@ -233,14 +307,12 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) const char *cpDrivePath = sDrivePath.c_str(); unsigned char ucKey[TFNG_KEY_SIZE]; -#ifdef ADAPTIVE_CHUNK_SIZE // Validate buffers were allocated if (caTfngData == nullptr || caReadBuffer == nullptr) { Logger::logThis()->error("Shred-Task: Aligned buffers not allocated! - Drive: " + drive->getSerial()); return -1; } -#endif // Open random source Logger::logThis()->info("Shred-Task: Opening random source: " + string(randomsrc) + " - Drive: " + drive->getSerial()); @@ -356,10 +428,8 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) Logger::logThis()->info("Shred-Task: Bytes-Size of Drive: " + to_string(this->ulDriveByteSize) + " - Drive: " + drive->getSerial()); #endif -#ifdef ADAPTIVE_CHUNK_SIZE // Start first measurement interval startMeasurement(); -#endif // Main shredding loop for (unsigned int uiShredIterationCounter = 0U; uiShredIterationCounter < SHRED_ITERATIONS; uiShredIterationCounter++) { @@ -370,30 +440,18 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) if (uiShredIterationCounter == (SHRED_ITERATIONS - 1)) { // last shred iteration --> overwrite (just the write chunk) bytes with zeros instead with random data -#ifdef ADAPTIVE_CHUNK_SIZE memset(caTfngData, 0U, CHUNK_SIZE_MAX); -#else - memset(caTfngData, 0U, CHUNK_SIZE); -#endif } while (ulDriveByteCounter < ulDriveByteSize) { -#ifdef ADAPTIVE_CHUNK_SIZE size_t activeChunkSize = getCurrentChunkSize(); -#else - size_t activeChunkSize = CHUNK_SIZE; -#endif int iBytesToShred = 0; if (uiShredIterationCounter != (SHRED_ITERATIONS - 1)) { -#ifdef ADAPTIVE_CHUNK_SIZE tfng_prng_genrandom(caTfngData, activeChunkSize); -#else - tfng_prng_genrandom(caTfngData, TFNG_DATA_SIZE); -#endif } if ((ulDriveByteSize - ulDriveByteCounter) < activeChunkSize) @@ -433,16 +491,15 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) ulDriveByteCounter += iByteShredded; ulDriveByteOverallCount += iByteShredded; -#ifdef ADAPTIVE_CHUNK_SIZE bytesWrittenInMeasurement += iByteShredded; chunkCounter++; + totalChunkCounter++; // Track total chunks for periodic re-exploration // Evaluate throughput after measurement interval if (chunkCounter >= CHUNK_MEASURE_INTERVAL) { evaluateThroughput(drive); } -#endif d32Percent = this->calcProgress(); @@ -493,12 +550,10 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) } } -#ifdef ADAPTIVE_CHUNK_SIZE Logger::logThis()->info("Shred completed - Optimal chunk size: " + to_string(bestChunkSize / (1024 * 1024)) + " MB, " + "Best throughput: " + to_string((int)bestThroughputMBps) + " MB/s" + " - Drive: " + drive->getSerial()); -#endif // All shred iterations completed successfully tfng_prng_seedkey(NULL); @@ -645,11 +700,7 @@ unsigned int Shred::uiCalcChecksum(fileDescriptor file, Drive *drive, int *ipSig Logger::logThis()->info("Check-Task: Starting checksum verification - Drive: " + drive->getSerial()); -#ifdef ADAPTIVE_CHUNK_SIZE size_t checkChunkSize = CHUNK_SIZE_MAX; -#else - size_t checkChunkSize = CHUNK_SIZE; -#endif while (ulDriveByteCounter < ulDriveByteSize) {