diff --git a/README.md b/README.md index a72e989..ec454b5 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ * process multiple drives at once ## Download USB Image ## -See reHDD-Bootable how the live image created: https://git.mosad.xyz/localhorst/reHDD-Bootable +See reHDD-Bootable how the live image is created: https://git.mosad.xyz/localhorst/reHDD-Bootable Use [Etcher](https://www.balena.io/etcher/#download) or `dd` to create an bootable USB drive . diff --git a/include/reHDD.h b/include/reHDD.h index d63f7f0..f4e0af4 100644 --- a/include/reHDD.h +++ b/include/reHDD.h @@ -8,7 +8,7 @@ #ifndef REHDD_H_ #define REHDD_H_ -#define REHDD_VERSION "V1.4.0-dev" +#define REHDD_VERSION "V1.4.0" // Drive handling Settings #define WORSE_HOURS 19200 // mark drive if at this limit or beyond diff --git a/include/shred.h b/include/shred.h index 635f265..e89738a 100644 --- a/include/shred.h +++ b/include/shred.h @@ -16,9 +16,25 @@ #include #include #include +#include -#define CHUNK_SIZE 1024 * 1024 * 32 // amount of bytes that are overwritten at once --> 32MB -#define TFNG_DATA_SIZE CHUNK_SIZE // amount of bytes used by tfng +// Adaptive chunk size optimization with multi-armed bandit - always enabled +// Chunk size configuration +#define CHUNK_SIZE_START 1024 * 1024 * 32 // Starting chunk size: 32MB +#define CHUNK_SIZE_MIN 1024 * 1024 * 16 // Minimum chunk size: 16MB (increased from 4MB to prevent premature convergence) +#define CHUNK_SIZE_MAX 1024 * 1024 * 128 // Maximum chunk size: 128MB +#define CHUNK_SIZE_STEP_UP 1024 * 1024 * 4 // Increase step: 4MB (symmetric with step down) +#define CHUNK_SIZE_STEP_DOWN 1024 * 1024 * 4 // Decrease step: 4MB (symmetric exploration) +#define CHUNK_MEASURE_INTERVAL 64 // Measure performance every 64 chunks +#define WARMUP_MEASUREMENTS 16 // Skip first 16 measurements (cache writes) + +// Multi-armed bandit exploration parameters +#define EXPLORATION_EPSILON 0.10 // 10% exploration rate (epsilon-greedy) +#define REEXPLORATION_INTERVAL 500 // Force re-exploration every 500 chunks + +// Buffer sizes - always use maximum for adaptive mode +#define CHUNK_SIZE CHUNK_SIZE_MAX +#define TFNG_DATA_SIZE CHUNK_SIZE_MAX // #define DEMO_DRIVE_SIZE 1024*1024*256L // 256MB // #define DEMO_DRIVE_SIZE 1024*1024*1024L // 1GB @@ -38,13 +54,41 @@ public: private: fileDescriptor randomSrcFileDiscr; fileDescriptor driveFileDiscr; - unsigned char caTfngData[TFNG_DATA_SIZE]; - unsigned char caReadBuffer[CHUNK_SIZE]; + + unsigned char *caTfngData; + unsigned char *caReadBuffer; + unsigned long ulDriveByteSize; unsigned long ulDriveByteOverallCount = 0; // all bytes shredded in all iterations + checking -> used for progress calculation double d32Percent = 0.0; double d32TmpPercent = 0.0; + // Adaptive chunk size optimization members + size_t currentChunkSize; + size_t bestChunkSize; + unsigned int chunkCounter; + unsigned int totalChunkCounter; // Total chunks written (for periodic re-exploration) + unsigned int warmupCounter; // Count warm-up measurements + std::chrono::high_resolution_clock::time_point measurementStartTime; + double bestThroughputMBps; + double lastThroughputMBps; + unsigned long bytesWrittenInMeasurement; + bool throughputIncreasing; + + // Multi-armed bandit exploration state + bool explorationMode; // Currently in exploration mode? + size_t explorationChunkSize; // Chunk size being tested during exploration + + // Adaptive methods + void startMeasurement(); + void evaluateThroughput(Drive *drive); + void adjustChunkSize(Drive *drive); + size_t getCurrentChunkSize() const; + + // Multi-armed bandit methods + bool shouldExplore(); // Decide: explore or exploit? + void performExploration(Drive *drive); // Execute exploration phase + inline double calcProgress(); int iRewindDrive(fileDescriptor file); long getDriveSizeInBytes(fileDescriptor file); diff --git a/scripts/reHDDLogUploader.bash b/scripts/reHDDLogUploader.bash deleted file mode 100644 index b384aa2..0000000 --- a/scripts/reHDDLogUploader.bash +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# remove comment for the following to activate log telemetie -curl -k -T /root/reHDD/reHDD.log -u "__Place_your_token_here__:" -H 'X-Requested-With: XMLHttpRequest' https://schuttercloud.com/public.php/webdav/`echo $(date '+%Y-%m-%d_%H-%M')`_reHDD.log -rm -f /root/reHDD/reHDD.log - - diff --git a/scripts/reHDDLogUploader.service b/scripts/reHDDLogUploader.service deleted file mode 100644 index 724addd..0000000 --- a/scripts/reHDDLogUploader.service +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=reHDD log uploader -After=syslog.target -After=network.target -After=network-online.target -Wants=network-online.target - -[Service] -Type=oneshot -User=root -Group=root -RemainAfterExit=yes -ExecStart=/usr/bin/bash /root/reHDD/scripts/reHDDLogUploader.bash - -[Install] -WantedBy=multi-user.target - - diff --git a/scripts/reHDDLogUploader.timer b/scripts/reHDDLogUploader.timer deleted file mode 100644 index 86cc3f6..0000000 --- a/scripts/reHDDLogUploader.timer +++ /dev/null @@ -1,11 +0,0 @@ -[Unit] -Description=reHDD log uploader timer - -[Timer] -OnActiveSec=30s -OnBootSec=10min -OnUnitActiveSec=12h - -[Install] -WantedBy=basic.target - diff --git a/src/shred.cpp b/src/shred.cpp index c37299a..2550e06 100644 --- a/src/shred.cpp +++ b/src/shred.cpp @@ -6,6 +6,8 @@ */ #include "../include/reHDD.h" +#include // For rand(), srand() +#include // For time() to seed random number generator using namespace std; #ifdef __cplusplus @@ -21,10 +23,306 @@ const static char *randomsrc = (char *)"/dev/urandom"; Shred::Shred() { + // Seed random number generator for epsilon-greedy exploration + srand(static_cast(time(nullptr))); + + // Allocate aligned buffers for maximum chunk size + if (posix_memalign((void **)&caTfngData, 4096, CHUNK_SIZE_MAX) != 0) + { + Logger::logThis()->error("Failed to allocate aligned buffer for tfng data"); + caTfngData = nullptr; + } + + if (posix_memalign((void **)&caReadBuffer, 4096, CHUNK_SIZE_MAX) != 0) + { + Logger::logThis()->error("Failed to allocate aligned buffer for read buffer"); + caReadBuffer = nullptr; + } + + // Initialize adaptive tracking variables + currentChunkSize = CHUNK_SIZE_START; + bestChunkSize = CHUNK_SIZE_START; + chunkCounter = 0; + totalChunkCounter = 0; // Track total chunks for periodic re-exploration + warmupCounter = 0; // Track warm-up measurements + bestThroughputMBps = 0.0; + lastThroughputMBps = 0.0; + bytesWrittenInMeasurement = 0; + throughputIncreasing = true; + + // Initialize multi-armed bandit exploration state + explorationMode = false; + explorationChunkSize = CHUNK_SIZE_START; + + Logger::logThis()->info("Adaptive chunk size optimization ENABLED (Multi-Armed Bandit) - Starting with " + + to_string(currentChunkSize / (1024 * 1024)) + " MB chunks"); + Logger::logThis()->info("Configuration: min=" + to_string(CHUNK_SIZE_MIN / (1024 * 1024)) + + "MB, max=" + to_string(CHUNK_SIZE_MAX / (1024 * 1024)) + + "MB, step=" + to_string(CHUNK_SIZE_STEP_UP / (1024 * 1024)) + "MB"); + Logger::logThis()->info("Exploration: " + to_string((int)(EXPLORATION_EPSILON * 100)) + + "% epsilon-greedy + periodic every " + to_string(REEXPLORATION_INTERVAL) + " chunks"); + Logger::logThis()->info("Warm-up: First " + to_string(WARMUP_MEASUREMENTS) + " measurements ignored (cold start protection)"); } Shred::~Shred() { + if (caTfngData != nullptr) + { + free(caTfngData); + caTfngData = nullptr; + } + if (caReadBuffer != nullptr) + { + free(caReadBuffer); + caReadBuffer = nullptr; + } +} + +/** + * \brief Start performance measurement interval + * \return void + */ +void Shred::startMeasurement() +{ + measurementStartTime = std::chrono::high_resolution_clock::now(); + bytesWrittenInMeasurement = 0; + chunkCounter = 0; +} + +/** + * \brief shred drive with shred + * \param pointer of Drive instance + * \param file descriptor for signaling + * \return 0 on success, -1 on error + */ +void Shred::evaluateThroughput(Drive *drive) +{ + auto measurementEndTime = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = measurementEndTime - measurementStartTime; + double elapsedSeconds = elapsed.count(); + + if (elapsedSeconds > 0.0) + { + double throughputMBps = (bytesWrittenInMeasurement / (1024.0 * 1024.0)) / elapsedSeconds; + lastThroughputMBps = throughputMBps; + + // Warm-up period - ignore first measurements + warmupCounter++; + bool isWarmup = (warmupCounter <= WARMUP_MEASUREMENTS); + + if (isWarmup) + { + Logger::logThis()->info("WARM-UP #" + to_string(warmupCounter) + "/" + to_string(WARMUP_MEASUREMENTS) + + " - ChunkSize: " + to_string(currentChunkSize / (1024 * 1024)) + " MB, " + + "Throughput: " + to_string((int)throughputMBps) + " MB/s (not used for optimization)" + + " - Drive: " + drive->getSerial()); + } + else + { + Logger::logThis()->info("Throughput measurement - ChunkSize: " + + to_string(currentChunkSize / (1024 * 1024)) + " MB, " + + "Throughput: " + to_string((int)throughputMBps) + " MB/s, " + + "Best: " + to_string((int)bestThroughputMBps) + " MB/s" + + " - Drive: " + drive->getSerial()); + + // Check if this is better than our best (only after warm-up) + if (throughputMBps > bestThroughputMBps) + { + bestThroughputMBps = throughputMBps; + bestChunkSize = currentChunkSize; + throughputIncreasing = true; + + Logger::logThis()->info("NEW BEST throughput: " + to_string((int)bestThroughputMBps) + + " MB/s with " + to_string(currentChunkSize / (1024 * 1024)) + + " MB chunks - Drive: " + drive->getSerial()); + } + else if (currentChunkSize == bestChunkSize) + { + // Update best throughput when measuring at best chunk size + // This ensures bestThroughputMBps reflects CURRENT performance, not old burst + if (throughputMBps < bestThroughputMBps) + { + Logger::logThis()->info("Updating best throughput: " + + to_string((int)bestThroughputMBps) + " MB/s -> " + + to_string((int)throughputMBps) + " MB/s " + + "(sustained performance at best chunk size: " + + to_string(bestChunkSize / (1024 * 1024)) + " MB)" + + " - Drive: " + drive->getSerial()); + bestThroughputMBps = throughputMBps; + } + throughputIncreasing = false; + } + else + { + throughputIncreasing = false; + } + } + } + + // Adjust chunk size for next measurement interval (skip during warm-up) + if (warmupCounter > WARMUP_MEASUREMENTS) + { + adjustChunkSize(drive); + } + + // Start new measurement + startMeasurement(); +} + +/** + * \brief Determine if we should explore (epsilon-greedy + periodic re-exploration) + * \return true if should explore, false if should exploit + */ +bool Shred::shouldExplore() +{ + // Periodic re-exploration: every REEXPLORATION_INTERVAL chunks + if (totalChunkCounter > 0 && (totalChunkCounter % REEXPLORATION_INTERVAL) == 0) + { + return true; + } + + // Epsilon-greedy: random exploration with probability EXPLORATION_EPSILON + double randomValue = static_cast(rand()) / RAND_MAX; + return (randomValue < EXPLORATION_EPSILON); +} + +/** + * \brief Perform exploration - try a random chunk size + * \param pointer to Drive instance + * \return void + */ +void Shred::performExploration(Drive *drive) +{ + size_t savedChunkSize = currentChunkSize; + + // Generate random chunk size between MIN and MAX (aligned to STEP boundaries) + // Calculate in MB to avoid overflow + size_t minMB = CHUNK_SIZE_MIN / (1024 * 1024); + size_t maxMB = CHUNK_SIZE_MAX / (1024 * 1024); + size_t stepMB = CHUNK_SIZE_STEP_UP / (1024 * 1024); + + // Number of possible steps: (max - min) / step + size_t numSteps = (maxMB - minMB) / stepMB; + + // Generate random step: 0 to numSteps (inclusive) + // Using proper modulo to ensure range [0, numSteps] + int randVal = rand(); + size_t randomStep = static_cast(randVal) % (numSteps + 1); + + // Calculate exploration chunk size in MB, then convert to bytes + size_t explorationMB = minMB + (randomStep * stepMB); + explorationChunkSize = explorationMB * 1024 * 1024; + + // Clamp to valid range (safety check) + if (explorationChunkSize < CHUNK_SIZE_MIN) + explorationChunkSize = CHUNK_SIZE_MIN; + if (explorationChunkSize > CHUNK_SIZE_MAX) + explorationChunkSize = CHUNK_SIZE_MAX; + + // Enter exploration mode + explorationMode = true; + currentChunkSize = explorationChunkSize; + + // Enhanced logging with debug info + Logger::logThis()->info("EXPLORATION MODE: Testing " + + to_string(explorationChunkSize / (1024 * 1024)) + " MB chunks " + + "(randomStep=" + to_string(randomStep) + "/" + to_string(numSteps) + + ", rand=" + to_string(randVal) + ", " + + "was " + to_string(savedChunkSize / (1024 * 1024)) + " MB, " + + "best: " + to_string(bestChunkSize / (1024 * 1024)) + " MB)" + + " - Drive: " + drive->getSerial()); +} + +/** + * \brief Adjust chunk size based on throughput trend (Multi-Armed Bandit) + * \param pointer to Drive instance + * \return void + */ +void Shred::adjustChunkSize(Drive *drive) +{ + size_t oldChunkSize = currentChunkSize; + + // Check if we should explore instead of exploit + if (shouldExplore()) + { + performExploration(drive); + return; + } + + // Exit exploration mode if we were in it + if (explorationMode) + { + explorationMode = false; + + // CRITICAL: Return to best known chunk size, not current + if (currentChunkSize != bestChunkSize) + { + currentChunkSize = bestChunkSize; + Logger::logThis()->info("EXPLORATION ENDED - Returning to best known: " + + to_string(bestChunkSize / (1024 * 1024)) + " MB" + + " (exploration tested " + to_string(oldChunkSize / (1024 * 1024)) + " MB)" + + " - Drive: " + drive->getSerial()); + } + else + { + Logger::logThis()->info("EXPLORATION ENDED - Staying at current best: " + + to_string(bestChunkSize / (1024 * 1024)) + " MB" + + " - Drive: " + drive->getSerial()); + } + return; + } + + // Normal exploitation mode: hill-climbing with symmetric steps + if (throughputIncreasing) + { + // Throughput is improving - increase chunk size (symmetric step) + currentChunkSize += CHUNK_SIZE_STEP_UP; + + // Clamp to maximum + if (currentChunkSize > CHUNK_SIZE_MAX) + { + currentChunkSize = CHUNK_SIZE_MAX; + Logger::logThis()->info("Reached maximum chunk size: " + + to_string(currentChunkSize / (1024 * 1024)) + " MB" + + " - Drive: " + drive->getSerial()); + } + } + else + { + // Throughput decreased - decrease chunk size (symmetric step) + if (currentChunkSize > CHUNK_SIZE_STEP_DOWN) + { + currentChunkSize -= CHUNK_SIZE_STEP_DOWN; + } + + // Clamp to minimum + if (currentChunkSize < CHUNK_SIZE_MIN) + { + currentChunkSize = CHUNK_SIZE_MIN; + Logger::logThis()->info("Reached minimum chunk size: " + + to_string(currentChunkSize / (1024 * 1024)) + " MB" + + " (best remains: " + to_string(bestChunkSize / (1024 * 1024)) + " MB)" + + " - Drive: " + drive->getSerial()); + } + } + + if (oldChunkSize != currentChunkSize) + { + Logger::logThis()->info("Adjusted chunk size: " + + to_string(oldChunkSize / (1024 * 1024)) + " MB -> " + + to_string(currentChunkSize / (1024 * 1024)) + " MB" + + " (best: " + to_string(bestChunkSize / (1024 * 1024)) + " MB)" + + " - Drive: " + drive->getSerial()); + } +} + +/** + * \brief Get current chunk size for adaptive mode + * \return current chunk size in bytes + */ +size_t Shred::getCurrentChunkSize() const +{ + return currentChunkSize; } /** @@ -76,6 +374,13 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) const char *cpDrivePath = sDrivePath.c_str(); unsigned char ucKey[TFNG_KEY_SIZE]; + // Validate buffers were allocated + if (caTfngData == nullptr || caReadBuffer == nullptr) + { + Logger::logThis()->error("Shred-Task: Aligned buffers not allocated! - Drive: " + drive->getSerial()); + return -1; + } + // Open random source Logger::logThis()->info("Shred-Task: Opening random source: " + string(randomsrc) + " - Drive: " + drive->getSerial()); randomSrcFileDiscr = open(randomsrc, O_RDONLY | O_LARGEFILE); @@ -182,14 +487,16 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) } Drive::ShredSpeed shredSpeed = drive->sShredSpeed.load(); - shredSpeed.chronoShredTimestamp = std::chrono::system_clock::now(); // set inital timestamp for speed metric - shredSpeed.ulSpeedMetricBytesWritten = 0U; // uses to calculate speed metric + shredSpeed.chronoShredTimestamp = std::chrono::system_clock::now(); + shredSpeed.ulSpeedMetricBytesWritten = 0U; drive->sShredSpeed.store(shredSpeed); #ifdef LOG_LEVEL_HIGH Logger::logThis()->info("Shred-Task: Bytes-Size of Drive: " + to_string(this->ulDriveByteSize) + " - Drive: " + drive->getSerial()); #endif + // Start first measurement interval + startMeasurement(); // Main shredding loop for (unsigned int uiShredIterationCounter = 0U; uiShredIterationCounter < SHRED_ITERATIONS; uiShredIterationCounter++) { @@ -200,44 +507,27 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) if (uiShredIterationCounter == (SHRED_ITERATIONS - 1)) { // last shred iteration --> overwrite (just the write chunk) bytes with zeros instead with random data - memset(caTfngData, 0U, CHUNK_SIZE); + memset(caTfngData, 0U, CHUNK_SIZE_MAX); } while (ulDriveByteCounter < ulDriveByteSize) { - // Check if task was aborted - if (drive->state.load() != Drive::TaskState::SHRED_ACTIVE) - { - Logger::logThis()->info("Shred-Task: Aborted by user at " + to_string(d32Percent) + - "% in iteration " + to_string(uiShredIterationCounter + 1) + - " - Drive: " + drive->getSerial()); - drive->setTaskPercentage(0); - d32Percent = 0.00; - d32TmpPercent = 0.00; - cleanup(); - - // CRITICAL: Mark as NOT shredded on abort - drive->state = Drive::TaskState::NONE; - drive->bWasShredded = false; - drive->bWasChecked = false; - return -1; - } + size_t activeChunkSize = getCurrentChunkSize(); int iBytesToShred = 0; if (uiShredIterationCounter != (SHRED_ITERATIONS - 1)) { - // Generate random data for this chunk - tfng_prng_genrandom(caTfngData, TFNG_DATA_SIZE); + tfng_prng_genrandom(caTfngData, activeChunkSize); } - if ((ulDriveByteSize - ulDriveByteCounter) < CHUNK_SIZE) + if ((ulDriveByteSize - ulDriveByteCounter) < activeChunkSize) { iBytesToShred = (ulDriveByteSize - ulDriveByteCounter); } else { - iBytesToShred = CHUNK_SIZE; + iBytesToShred = activeChunkSize; } int iByteShredded = write(driveFileDiscr, caTfngData, iBytesToShred); @@ -267,7 +557,19 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) ulDriveByteCounter += iByteShredded; ulDriveByteOverallCount += iByteShredded; + + bytesWrittenInMeasurement += iByteShredded; + chunkCounter++; + totalChunkCounter++; // Track total chunks for periodic re-exploration + + // Evaluate throughput after measurement interval + if (chunkCounter >= CHUNK_MEASURE_INTERVAL) + { + evaluateThroughput(drive); + } + d32Percent = this->calcProgress(); + #ifdef LOG_LEVEL_HIGH Logger::logThis()->info("Shred-Task: ByteCount: " + to_string(ulDriveByteCounter) + " - iteration: " + to_string((uiShredIterationCounter + 1)) + @@ -277,12 +579,23 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) if ((d32Percent - d32TmpPercent) >= 0.01) { - // set shred percantage + // set shred percentage drive->setTaskPercentage(d32TmpPercent); d32TmpPercent = d32Percent; // signal process in shredding write(*ipSignalFd, "A", 1); } + + if (drive->state != Drive::TaskState::SHRED_ACTIVE) + { + drive->setTaskPercentage(0); + d32Percent = 0.00; + d32TmpPercent = 0.00; + ulDriveByteCounter = 0U; + Logger::logThis()->info("Aborted shred for: " + drive->getModelName() + "-" + drive->getSerial()); + cleanup(); + return -1; + } } Logger::logThis()->info("Shred-Task: Iteration " + to_string(uiShredIterationCounter + 1) + "/" + @@ -304,12 +617,17 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd) } } + Logger::logThis()->info("Shred completed - Optimal chunk size: " + + to_string(bestChunkSize / (1024 * 1024)) + " MB, " + + "Best throughput: " + to_string((int)bestThroughputMBps) + " MB/s" + + " - Drive: " + drive->getSerial()); + // All shred iterations completed successfully tfng_prng_seedkey(NULL); // ONLY mark as shredded if ALL iterations completed AND fsync succeeded drive->bWasShredded = true; - Logger::logThis()->info("Shred-Task finished successfully - Drive: " + drive->getModelName() + "-" + drive->getSerial() + " @" + address.str()); + Logger::logThis()->info("Shred-Task finished - Drive: " + drive->getModelName() + "-" + drive->getSerial() + " @" + address.str()); #ifdef ZERO_CHECK drive->state = Drive::TaskState::CHECK_ACTIVE; @@ -449,6 +767,8 @@ unsigned int Shred::uiCalcChecksum(fileDescriptor file, Drive *drive, int *ipSig Logger::logThis()->info("Check-Task: Starting checksum verification - Drive: " + drive->getSerial()); + size_t checkChunkSize = CHUNK_SIZE_MAX; + while (ulDriveByteCounter < ulDriveByteSize) { // Check if task was aborted @@ -459,14 +779,13 @@ unsigned int Shred::uiCalcChecksum(fileDescriptor file, Drive *drive, int *ipSig } int iBytesToCheck = 0; - - if ((ulDriveByteSize - ulDriveByteCounter) < CHUNK_SIZE) + if ((ulDriveByteSize - ulDriveByteCounter) < checkChunkSize) { iBytesToCheck = (ulDriveByteSize - ulDriveByteCounter); } else { - iBytesToCheck = CHUNK_SIZE; + iBytesToCheck = checkChunkSize; } int iReadBytes = read(file, caReadBuffer, iBytesToCheck); @@ -534,4 +853,4 @@ void Shred::cleanup() close(randomSrcFileDiscr); randomSrcFileDiscr = -1; } -} \ No newline at end of file +}