Improve shred throughput with adaptive chunksize (#98)

Measure Throughput
     │
     ▼
throughput > best?
     │
 ┌───┴───┐
YES     NO
 │       │
 │       ▼
 │   currentChunk == bestChunk?
 │       │
 │   ┌───┴───┐
 │  YES     NO
 │   │       │
 │   ▼       ▼
 │ throughput < best?  Nichts tun
 │   │
 │ ┌─┴─┐
 │YES NO
 │ │   │
 │ ▼   ▼
 │UPDATE!
 │BEST!
 │
 ▼
UPDATE
BOTH!

Reviewed-on: #98
Co-authored-by: localhorst <localhorst@mosad.xyz>
Co-committed-by: localhorst <localhorst@mosad.xyz>
This commit was merged in pull request #98.
This commit is contained in:
2026-05-03 10:06:14 +02:00
committed by Hendrik Schutter
parent 55481b86fd
commit f0246a60d2
7 changed files with 399 additions and 71 deletions
+48 -4
View File
@@ -16,9 +16,25 @@
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <chrono>
#define CHUNK_SIZE 1024 * 1024 * 32 // amount of bytes that are overwritten at once --> 32MB
#define TFNG_DATA_SIZE CHUNK_SIZE // amount of bytes used by tfng
// Adaptive chunk size optimization with multi-armed bandit - always enabled
// Chunk size configuration
#define CHUNK_SIZE_START 1024 * 1024 * 32 // Starting chunk size: 32MB
#define CHUNK_SIZE_MIN 1024 * 1024 * 16 // Minimum chunk size: 16MB (increased from 4MB to prevent premature convergence)
#define CHUNK_SIZE_MAX 1024 * 1024 * 128 // Maximum chunk size: 128MB
#define CHUNK_SIZE_STEP_UP 1024 * 1024 * 4 // Increase step: 4MB (symmetric with step down)
#define CHUNK_SIZE_STEP_DOWN 1024 * 1024 * 4 // Decrease step: 4MB (symmetric exploration)
#define CHUNK_MEASURE_INTERVAL 64 // Measure performance every 64 chunks
#define WARMUP_MEASUREMENTS 16 // Skip first 16 measurements (cache writes)
// Multi-armed bandit exploration parameters
#define EXPLORATION_EPSILON 0.10 // 10% exploration rate (epsilon-greedy)
#define REEXPLORATION_INTERVAL 500 // Force re-exploration every 500 chunks
// Buffer sizes - always use maximum for adaptive mode
#define CHUNK_SIZE CHUNK_SIZE_MAX
#define TFNG_DATA_SIZE CHUNK_SIZE_MAX
// #define DEMO_DRIVE_SIZE 1024*1024*256L // 256MB
// #define DEMO_DRIVE_SIZE 1024*1024*1024L // 1GB
@@ -38,13 +54,41 @@ public:
private:
fileDescriptor randomSrcFileDiscr;
fileDescriptor driveFileDiscr;
unsigned char caTfngData[TFNG_DATA_SIZE];
unsigned char caReadBuffer[CHUNK_SIZE];
unsigned char *caTfngData;
unsigned char *caReadBuffer;
unsigned long ulDriveByteSize;
unsigned long ulDriveByteOverallCount = 0; // all bytes shredded in all iterations + checking -> used for progress calculation
double d32Percent = 0.0;
double d32TmpPercent = 0.0;
// Adaptive chunk size optimization members
size_t currentChunkSize;
size_t bestChunkSize;
unsigned int chunkCounter;
unsigned int totalChunkCounter; // Total chunks written (for periodic re-exploration)
unsigned int warmupCounter; // Count warm-up measurements
std::chrono::high_resolution_clock::time_point measurementStartTime;
double bestThroughputMBps;
double lastThroughputMBps;
unsigned long bytesWrittenInMeasurement;
bool throughputIncreasing;
// Multi-armed bandit exploration state
bool explorationMode; // Currently in exploration mode?
size_t explorationChunkSize; // Chunk size being tested during exploration
// Adaptive methods
void startMeasurement();
void evaluateThroughput(Drive *drive);
void adjustChunkSize(Drive *drive);
size_t getCurrentChunkSize() const;
// Multi-armed bandit methods
bool shouldExplore(); // Decide: explore or exploit?
void performExploration(Drive *drive); // Execute exploration phase
inline double calcProgress();
int iRewindDrive(fileDescriptor file);
long getDriveSizeInBytes(fileDescriptor file);