cleanup and shred multi-armed bandit

This commit is contained in:
2026-05-02 12:25:21 +02:00
parent 716ab5614f
commit 05a3750b03
6 changed files with 113 additions and 99 deletions
+1 -1
View File
@@ -8,7 +8,7 @@
#ifndef REHDD_H_
#define REHDD_H_
#define REHDD_VERSION "V1.4.0-dev"
#define REHDD_VERSION "V1.4.0"
// Drive handling Settings
#define WORSE_HOURS 19200 // mark drive if at this limit or beyond
+27 -29
View File
@@ -18,26 +18,22 @@
#include <string.h>
#include <chrono>
// Adaptive chunk size optimization - uncomment to enable
#define ADAPTIVE_CHUNK_SIZE
// Adaptive chunk size optimization with multi-armed bandit - always enabled
// Chunk size configuration
#define CHUNK_SIZE_START 1024 * 1024 * 32 // Starting chunk size: 32MB
#define CHUNK_SIZE_MIN 1024 * 1024 * 4 // Minimum chunk size: 4MB
#define CHUNK_SIZE_MAX 1024 * 1024 * 128 // Maximum chunk size: 128MB
#define CHUNK_SIZE_STEP_UP 1024 * 1024 * 2 // Increase step: 2MB
#define CHUNK_SIZE_STEP_DOWN 1024 * 1024 * 4 // Decrease step: 4MB
#define CHUNK_MEASURE_INTERVAL 64 // Measure performance every 64 chunks
#define CHUNK_SIZE_START 1024 * 1024 * 32 // Starting chunk size: 32MB
#define CHUNK_SIZE_MIN 1024 * 1024 * 16 // Minimum chunk size: 16MB (increased from 4MB to prevent premature convergence)
#define CHUNK_SIZE_MAX 1024 * 1024 * 128 // Maximum chunk size: 128MB
#define CHUNK_SIZE_STEP_UP 1024 * 1024 * 4 // Increase step: 4MB (symmetric with step down)
#define CHUNK_SIZE_STEP_DOWN 1024 * 1024 * 4 // Decrease step: 4MB (symmetric exploration)
#define CHUNK_MEASURE_INTERVAL 64 // Measure performance every 64 chunks
#ifdef ADAPTIVE_CHUNK_SIZE
// Use max buffer size when adaptive mode is enabled
// Multi-armed bandit exploration parameters
#define EXPLORATION_EPSILON 0.10 // 10% exploration rate (epsilon-greedy)
#define REEXPLORATION_INTERVAL 500 // Force re-exploration every 500 chunks
// Buffer sizes - always use maximum for adaptive mode
#define CHUNK_SIZE CHUNK_SIZE_MAX
#define TFNG_DATA_SIZE CHUNK_SIZE_MAX
#else
// Use fixed chunk size when adaptive mode is disabled
#define CHUNK_SIZE CHUNK_SIZE_START
#define TFNG_DATA_SIZE CHUNK_SIZE
#endif
// #define DEMO_DRIVE_SIZE 1024*1024*256L // 256MB
// #define DEMO_DRIVE_SIZE 1024*1024*1024L // 1GB
@@ -52,47 +48,49 @@ protected:
public:
Shred();
~Shred();
int shredDrive(Drive* drive, int* ipSignalFd);
int shredDrive(Drive *drive, int *ipSignalFd);
private:
fileDescriptor randomSrcFileDiscr;
fileDescriptor driveFileDiscr;
#ifdef ADAPTIVE_CHUNK_SIZE
unsigned char* caTfngData; // Dynamic buffer allocation for adaptive mode
unsigned char* caReadBuffer; // Dynamic buffer allocation for adaptive mode
#else
unsigned char caTfngData[TFNG_DATA_SIZE];
unsigned char caReadBuffer[CHUNK_SIZE];
#endif
unsigned char *caTfngData;
unsigned char *caReadBuffer;
unsigned long ulDriveByteSize;
unsigned long ulDriveByteOverallCount = 0; // all bytes shredded in all iterations + checking -> used for progress calculation
double d32Percent = 0.0;
double d32TmpPercent = 0.0;
#ifdef ADAPTIVE_CHUNK_SIZE
// Adaptive chunk size optimization members
size_t currentChunkSize;
size_t bestChunkSize;
unsigned int chunkCounter;
unsigned int totalChunkCounter; // Total chunks written (for periodic re-exploration)
std::chrono::high_resolution_clock::time_point measurementStartTime;
double bestThroughputMBps;
double lastThroughputMBps;
unsigned long bytesWrittenInMeasurement;
bool throughputIncreasing;
// Multi-armed bandit exploration state
bool explorationMode; // Currently in exploration mode?
size_t explorationChunkSize; // Chunk size being tested during exploration
// Adaptive methods
void startMeasurement();
void evaluateThroughput(Drive* drive);
void adjustChunkSize(Drive* drive);
void evaluateThroughput(Drive *drive);
void adjustChunkSize(Drive *drive);
size_t getCurrentChunkSize() const;
#endif
// Multi-armed bandit methods
bool shouldExplore(); // Decide: explore or exploit?
void performExploration(Drive *drive); // Execute exploration phase
inline double calcProgress();
int iRewindDrive(fileDescriptor file);
long getDriveSizeInBytes(fileDescriptor file);
unsigned int uiCalcChecksum(fileDescriptor file, Drive* drive, int* ipSignalFd);
unsigned int uiCalcChecksum(fileDescriptor file, Drive *drive, int *ipSignalFd);
void cleanup();
};
-6
View File
@@ -1,6 +0,0 @@
#!/bin/bash
# remove comment for the following to activate log telemetie
curl -k -T /root/reHDD/reHDD.log -u "__Place_your_token_here__:" -H 'X-Requested-With: XMLHttpRequest' https://schuttercloud.com/public.php/webdav/`echo $(date '+%Y-%m-%d_%H-%M')`_reHDD.log
rm -f /root/reHDD/reHDD.log
-18
View File
@@ -1,18 +0,0 @@
[Unit]
Description=reHDD log uploader
After=syslog.target
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
User=root
Group=root
RemainAfterExit=yes
ExecStart=/usr/bin/bash /root/reHDD/scripts/reHDDLogUploader.bash
[Install]
WantedBy=multi-user.target
-11
View File
@@ -1,11 +0,0 @@
[Unit]
Description=reHDD log uploader timer
[Timer]
OnActiveSec=30s
OnBootSec=10min
OnUnitActiveSec=12h
[Install]
WantedBy=basic.target
+85 -34
View File
@@ -6,6 +6,8 @@
*/
#include "../include/reHDD.h"
#include <cstdlib> // For rand(), srand()
#include <ctime> // For time() to seed random number generator
using namespace std;
#ifdef __cplusplus
@@ -21,7 +23,9 @@ const static char *randomsrc = (char *)"/dev/urandom";
Shred::Shred()
{
#ifdef ADAPTIVE_CHUNK_SIZE
// Seed random number generator for epsilon-greedy exploration
srand(static_cast<unsigned int>(time(nullptr)));
// Allocate aligned buffers for maximum chunk size
if (posix_memalign((void **)&caTfngData, 4096, CHUNK_SIZE_MAX) != 0)
{
@@ -39,19 +43,25 @@ Shred::Shred()
currentChunkSize = CHUNK_SIZE_START;
bestChunkSize = CHUNK_SIZE_START;
chunkCounter = 0;
totalChunkCounter = 0; // Track total chunks for periodic re-exploration
bestThroughputMBps = 0.0;
lastThroughputMBps = 0.0;
bytesWrittenInMeasurement = 0;
throughputIncreasing = true;
Logger::logThis()->info("Adaptive chunk size optimization ENABLED - Starting with " +
// Initialize multi-armed bandit exploration state
explorationMode = false;
explorationChunkSize = CHUNK_SIZE_START;
Logger::logThis()->info("Adaptive chunk size optimization ENABLED (Multi-Armed Bandit) - Starting with " +
to_string(currentChunkSize / (1024 * 1024)) + " MB chunks");
#endif
Logger::logThis()->info("Exploration strategy: " + to_string((int)(EXPLORATION_EPSILON * 100)) +
"% epsilon-greedy + periodic re-exploration every " +
to_string(REEXPLORATION_INTERVAL) + " chunks");
}
Shred::~Shred()
{
#ifdef ADAPTIVE_CHUNK_SIZE
if (caTfngData != nullptr)
{
free(caTfngData);
@@ -62,10 +72,8 @@ Shred::~Shred()
free(caReadBuffer);
caReadBuffer = nullptr;
}
#endif
}
#ifdef ADAPTIVE_CHUNK_SIZE
/**
* \brief Start performance measurement interval
* \return void
@@ -125,7 +133,55 @@ void Shred::evaluateThroughput(Drive *drive)
}
/**
* \brief Adjust chunk size based on throughput trend
* \brief Determine if we should explore (epsilon-greedy + periodic re-exploration)
* \return true if should explore, false if should exploit
*/
bool Shred::shouldExplore()
{
// Periodic re-exploration: every REEXPLORATION_INTERVAL chunks
if (totalChunkCounter > 0 && (totalChunkCounter % REEXPLORATION_INTERVAL) == 0)
{
return true;
}
// Epsilon-greedy: random exploration with probability EXPLORATION_EPSILON
double randomValue = static_cast<double>(rand()) / RAND_MAX;
return (randomValue < EXPLORATION_EPSILON);
}
/**
* \brief Perform exploration - try a random chunk size
* \param pointer to Drive instance
* \return void
*/
void Shred::performExploration(Drive *drive)
{
size_t savedChunkSize = currentChunkSize;
// Generate random chunk size between MIN and MAX (aligned to 4MB boundaries)
size_t numSteps = (CHUNK_SIZE_MAX - CHUNK_SIZE_MIN) / CHUNK_SIZE_STEP_UP;
size_t randomStep = rand() % (numSteps + 1);
explorationChunkSize = CHUNK_SIZE_MIN + (randomStep * CHUNK_SIZE_STEP_UP);
// Clamp to valid range
if (explorationChunkSize < CHUNK_SIZE_MIN)
explorationChunkSize = CHUNK_SIZE_MIN;
if (explorationChunkSize > CHUNK_SIZE_MAX)
explorationChunkSize = CHUNK_SIZE_MAX;
// Enter exploration mode
explorationMode = true;
currentChunkSize = explorationChunkSize;
Logger::logThis()->info("EXPLORATION MODE: Testing " +
to_string(explorationChunkSize / (1024 * 1024)) + " MB chunks " +
"(was " + to_string(savedChunkSize / (1024 * 1024)) + " MB, best: " +
to_string(bestChunkSize / (1024 * 1024)) + " MB)" +
" - Drive: " + drive->getSerial());
}
/**
* \brief Adjust chunk size based on throughput trend (Multi-Armed Bandit)
* \param pointer to Drive instance
* \return void
*/
@@ -133,9 +189,28 @@ void Shred::adjustChunkSize(Drive *drive)
{
size_t oldChunkSize = currentChunkSize;
// Check if we should explore instead of exploit
if (shouldExplore())
{
performExploration(drive);
return;
}
// Exit exploration mode if we were in it
if (explorationMode)
{
explorationMode = false;
currentChunkSize = bestChunkSize; // Return to best known chunk size
Logger::logThis()->info("EXPLORATION ENDED - Returning to best known: " +
to_string(bestChunkSize / (1024 * 1024)) + " MB" +
" - Drive: " + drive->getSerial());
return;
}
// Normal exploitation mode: hill-climbing with symmetric steps
if (throughputIncreasing)
{
// Throughput is improving - increase chunk size
// Throughput is improving - increase chunk size (symmetric step)
currentChunkSize += CHUNK_SIZE_STEP_UP;
// Clamp to maximum
@@ -149,7 +224,7 @@ void Shred::adjustChunkSize(Drive *drive)
}
else
{
// Throughput decreased - decrease chunk size to find sweet spot
// Throughput decreased - decrease chunk size (symmetric step)
if (currentChunkSize > CHUNK_SIZE_STEP_DOWN)
{
currentChunkSize -= CHUNK_SIZE_STEP_DOWN;
@@ -182,7 +257,6 @@ size_t Shred::getCurrentChunkSize() const
{
return currentChunkSize;
}
#endif
/**
* \brief shred drive with shred
@@ -233,14 +307,12 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd)
const char *cpDrivePath = sDrivePath.c_str();
unsigned char ucKey[TFNG_KEY_SIZE];
#ifdef ADAPTIVE_CHUNK_SIZE
// Validate buffers were allocated
if (caTfngData == nullptr || caReadBuffer == nullptr)
{
Logger::logThis()->error("Shred-Task: Aligned buffers not allocated! - Drive: " + drive->getSerial());
return -1;
}
#endif
// Open random source
Logger::logThis()->info("Shred-Task: Opening random source: " + string(randomsrc) + " - Drive: " + drive->getSerial());
@@ -356,10 +428,8 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd)
Logger::logThis()->info("Shred-Task: Bytes-Size of Drive: " + to_string(this->ulDriveByteSize) + " - Drive: " + drive->getSerial());
#endif
#ifdef ADAPTIVE_CHUNK_SIZE
// Start first measurement interval
startMeasurement();
#endif
// Main shredding loop
for (unsigned int uiShredIterationCounter = 0U; uiShredIterationCounter < SHRED_ITERATIONS; uiShredIterationCounter++)
{
@@ -370,30 +440,18 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd)
if (uiShredIterationCounter == (SHRED_ITERATIONS - 1))
{
// last shred iteration --> overwrite (just the write chunk) bytes with zeros instead with random data
#ifdef ADAPTIVE_CHUNK_SIZE
memset(caTfngData, 0U, CHUNK_SIZE_MAX);
#else
memset(caTfngData, 0U, CHUNK_SIZE);
#endif
}
while (ulDriveByteCounter < ulDriveByteSize)
{
#ifdef ADAPTIVE_CHUNK_SIZE
size_t activeChunkSize = getCurrentChunkSize();
#else
size_t activeChunkSize = CHUNK_SIZE;
#endif
int iBytesToShred = 0;
if (uiShredIterationCounter != (SHRED_ITERATIONS - 1))
{
#ifdef ADAPTIVE_CHUNK_SIZE
tfng_prng_genrandom(caTfngData, activeChunkSize);
#else
tfng_prng_genrandom(caTfngData, TFNG_DATA_SIZE);
#endif
}
if ((ulDriveByteSize - ulDriveByteCounter) < activeChunkSize)
@@ -433,16 +491,15 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd)
ulDriveByteCounter += iByteShredded;
ulDriveByteOverallCount += iByteShredded;
#ifdef ADAPTIVE_CHUNK_SIZE
bytesWrittenInMeasurement += iByteShredded;
chunkCounter++;
totalChunkCounter++; // Track total chunks for periodic re-exploration
// Evaluate throughput after measurement interval
if (chunkCounter >= CHUNK_MEASURE_INTERVAL)
{
evaluateThroughput(drive);
}
#endif
d32Percent = this->calcProgress();
@@ -493,12 +550,10 @@ int Shred::shredDrive(Drive *drive, int *ipSignalFd)
}
}
#ifdef ADAPTIVE_CHUNK_SIZE
Logger::logThis()->info("Shred completed - Optimal chunk size: " +
to_string(bestChunkSize / (1024 * 1024)) + " MB, " +
"Best throughput: " + to_string((int)bestThroughputMBps) + " MB/s" +
" - Drive: " + drive->getSerial());
#endif
// All shred iterations completed successfully
tfng_prng_seedkey(NULL);
@@ -645,11 +700,7 @@ unsigned int Shred::uiCalcChecksum(fileDescriptor file, Drive *drive, int *ipSig
Logger::logThis()->info("Check-Task: Starting checksum verification - Drive: " + drive->getSerial());
#ifdef ADAPTIVE_CHUNK_SIZE
size_t checkChunkSize = CHUNK_SIZE_MAX;
#else
size_t checkChunkSize = CHUNK_SIZE;
#endif
while (ulDriveByteCounter < ulDriveByteSize)
{