add script to find duplicates

This commit is contained in:
Hendrik Schutter 2023-02-22 16:48:57 +01:00
parent 251859e989
commit b8de81a302
1 changed files with 79 additions and 0 deletions

79
find_duplicates.py Normal file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, localhorst@mosad.xyz
Date of creation: 2023/02/22
Date of last modification: 2023/02/22
"""
import os
import sys
import time
import subprocess
import datetime
from dataclasses import dataclass
from tqdm import tqdm
import operator
@dataclass
class MediaFile:
name: str #without extension
extension: str #without dot
full_path: str
def supported_file_extension(filename):
if filename.endswith('.mp4') or filename.endswith('.mkv') or filename.endswith('.m4v'):
return True
return False
def get_number_of_files(path):
#filter(supported_file_extension, files)
return sum([len(list(filter(supported_file_extension, files))) for r, d, files in os.walk(path)])
def cut_file_name(filename, max_lenght, ellipsis="..."):
if len(filename) > max_lenght:
return filename[:max_lenght-len(ellipsis)] + ellipsis
else:
return filename
def scan_files(path):
total_numbers_to_scan = get_number_of_files(path)
media_files = list() #stores all found files with metadata
pbar = tqdm(total=total_numbers_to_scan) #print progress bar
for root, dirs, files in os.walk(path, topdown=True):
for name in filter(supported_file_extension, files):
pbar.set_description("Processing %s" % str("{:<32}".format(cut_file_name(name, 32))))
full_path = os.path.join(root, name)
media_files.append(MediaFile(name=os.path.splitext(name)[0], extension=os.path.splitext(name)[1], full_path=full_path))
pbar.update(1)
pbar.close()
return media_files
def print_all(media_files, path):
for media_file in media_files:
if (media_file.extension == ".mp4"):
#print(media_file.name)
file_test_path = path + media_file.name + ".mkv"
#print("Testing for: " + file_test_path)
if (os.path.isfile(file_test_path)):
print(media_file.full_path)
#os.remove(media_file.full_path)
def main() -> None:
if(len(sys.argv) != 2):
path = '.' #use current pwd
else:
path = sys.argv[1] #use arg0 as path
media_files = scan_files(path) #scan all media files
print("")
print_all(media_files, path)
if __name__ == "__main__":
main()