forked from localhorst/media_management_scripts
		
	add script to find duplicates
This commit is contained in:
		
							
								
								
									
										79
									
								
								find_duplicates.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								find_duplicates.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | ||||
| #!/usr/bin/env python3 | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| """ Author:                     Hendrik Schutter, localhorst@mosad.xyz | ||||
|     Date of creation:           2023/02/22 | ||||
|     Date of last modification:  2023/02/22 | ||||
| """ | ||||
|  | ||||
| import os | ||||
| import sys | ||||
| import time | ||||
| import subprocess | ||||
| import datetime | ||||
| from dataclasses import dataclass | ||||
| from tqdm import tqdm | ||||
| import operator | ||||
|  | ||||
| @dataclass | ||||
| class MediaFile: | ||||
|     name: str #without extension | ||||
|     extension: str #without dot | ||||
|     full_path: str | ||||
|  | ||||
| def supported_file_extension(filename): | ||||
|     if filename.endswith('.mp4') or filename.endswith('.mkv') or filename.endswith('.m4v'): | ||||
|         return True | ||||
|     return False | ||||
|  | ||||
| def get_number_of_files(path): | ||||
|     #filter(supported_file_extension, files) | ||||
|     return sum([len(list(filter(supported_file_extension, files))) for r, d, files in os.walk(path)]) | ||||
|  | ||||
| def cut_file_name(filename, max_lenght, ellipsis="..."): | ||||
|     if len(filename) > max_lenght: | ||||
|         return filename[:max_lenght-len(ellipsis)] + ellipsis | ||||
|     else: | ||||
|         return filename | ||||
|  | ||||
| def scan_files(path): | ||||
|     total_numbers_to_scan = get_number_of_files(path) | ||||
|  | ||||
|     media_files = list() #stores all found files with metadata | ||||
|  | ||||
|     pbar = tqdm(total=total_numbers_to_scan) #print progress bar | ||||
|      | ||||
|     for root, dirs, files in os.walk(path, topdown=True): | ||||
|         for name in  filter(supported_file_extension, files): | ||||
|             pbar.set_description("Processing %s" % str("{:<32}".format(cut_file_name(name, 32)))) | ||||
|             full_path = os.path.join(root, name) | ||||
|             media_files.append(MediaFile(name=os.path.splitext(name)[0], extension=os.path.splitext(name)[1], full_path=full_path)) | ||||
|             pbar.update(1) | ||||
|     pbar.close()    | ||||
|     return media_files | ||||
|  | ||||
| def print_all(media_files, path): | ||||
|     for media_file in media_files: | ||||
|         if (media_file.extension == ".mp4"): | ||||
|             #print(media_file.name) | ||||
|             file_test_path = path + media_file.name + ".mkv" | ||||
|             #print("Testing for: " + file_test_path) | ||||
|             if (os.path.isfile(file_test_path)): | ||||
|                 print(media_file.full_path) | ||||
|                 #os.remove(media_file.full_path) | ||||
|  | ||||
|  | ||||
| def main() -> None: | ||||
|     if(len(sys.argv) != 2): | ||||
|         path = '.' #use current pwd | ||||
|     else: | ||||
|         path = sys.argv[1] #use arg0 as path | ||||
|  | ||||
|     media_files = scan_files(path) #scan all media files | ||||
|      | ||||
|     print("") | ||||
|     print_all(media_files, path) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
		Reference in New Issue
	
	Block a user