add script to find duplicates
This commit is contained in:
parent
251859e989
commit
b8de81a302
79
find_duplicates.py
Normal file
79
find_duplicates.py
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
""" Author: Hendrik Schutter, localhorst@mosad.xyz
|
||||
Date of creation: 2023/02/22
|
||||
Date of last modification: 2023/02/22
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import subprocess
|
||||
import datetime
|
||||
from dataclasses import dataclass
|
||||
from tqdm import tqdm
|
||||
import operator
|
||||
|
||||
@dataclass
|
||||
class MediaFile:
|
||||
name: str #without extension
|
||||
extension: str #without dot
|
||||
full_path: str
|
||||
|
||||
def supported_file_extension(filename):
|
||||
if filename.endswith('.mp4') or filename.endswith('.mkv') or filename.endswith('.m4v'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_number_of_files(path):
|
||||
#filter(supported_file_extension, files)
|
||||
return sum([len(list(filter(supported_file_extension, files))) for r, d, files in os.walk(path)])
|
||||
|
||||
def cut_file_name(filename, max_lenght, ellipsis="..."):
|
||||
if len(filename) > max_lenght:
|
||||
return filename[:max_lenght-len(ellipsis)] + ellipsis
|
||||
else:
|
||||
return filename
|
||||
|
||||
def scan_files(path):
|
||||
total_numbers_to_scan = get_number_of_files(path)
|
||||
|
||||
media_files = list() #stores all found files with metadata
|
||||
|
||||
pbar = tqdm(total=total_numbers_to_scan) #print progress bar
|
||||
|
||||
for root, dirs, files in os.walk(path, topdown=True):
|
||||
for name in filter(supported_file_extension, files):
|
||||
pbar.set_description("Processing %s" % str("{:<32}".format(cut_file_name(name, 32))))
|
||||
full_path = os.path.join(root, name)
|
||||
media_files.append(MediaFile(name=os.path.splitext(name)[0], extension=os.path.splitext(name)[1], full_path=full_path))
|
||||
pbar.update(1)
|
||||
pbar.close()
|
||||
return media_files
|
||||
|
||||
def print_all(media_files, path):
|
||||
for media_file in media_files:
|
||||
if (media_file.extension == ".mp4"):
|
||||
#print(media_file.name)
|
||||
file_test_path = path + media_file.name + ".mkv"
|
||||
#print("Testing for: " + file_test_path)
|
||||
if (os.path.isfile(file_test_path)):
|
||||
print(media_file.full_path)
|
||||
#os.remove(media_file.full_path)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if(len(sys.argv) != 2):
|
||||
path = '.' #use current pwd
|
||||
else:
|
||||
path = sys.argv[1] #use arg0 as path
|
||||
|
||||
media_files = scan_files(path) #scan all media files
|
||||
|
||||
print("")
|
||||
print_all(media_files, path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user