From 4cb989db866e645f7f1927339d08ac90630f27ea Mon Sep 17 00:00:00 2001 From: localhorst Date: Fri, 15 Aug 2025 12:03:31 +0200 Subject: [PATCH] next --- check_Names_and_Paths.sh | 4 +- check_metadata.py | 147 +++++++++++++++++++++++++++++++++++++++ find_duplicates.py | 0 3 files changed, 149 insertions(+), 2 deletions(-) mode change 100644 => 100755 check_Names_and_Paths.sh create mode 100755 check_metadata.py mode change 100644 => 100755 find_duplicates.py diff --git a/check_Names_and_Paths.sh b/check_Names_and_Paths.sh old mode 100644 new mode 100755 index e973428..db03364 --- a/check_Names_and_Paths.sh +++ b/check_Names_and_Paths.sh @@ -58,7 +58,7 @@ do ###### check extension ###### ext="${filename##*.}" - if [ "$ext" != "mkv" ] && [ "$ext" != "mp4" ] + if [ "$ext" != "mkv" ] && [ "$ext" != "mp4" ] && [ "$ext" != "nfo" ] then echo "Incident: Incorrect extension: $file" fi @@ -117,7 +117,7 @@ for show in tvshows/*; do ###### check extension ###### ext="${episodename##*.}" - if [ "$ext" != "mkv" ] && [ "$ext" != "mp4" ] + if [ "$ext" != "mkv" ] && [ "$ext" != "mp4" ] && [ "$ext" != "nfo" ] then echo "Incident: Incorrect extension: $episode" fi diff --git a/check_metadata.py b/check_metadata.py new file mode 100755 index 0000000..d704a4c --- /dev/null +++ b/check_metadata.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" Author: Hendrik Schutter, localhorst@mosad.xyz + Date of creation: 2022/02/13 + Date of last modification: 2022/01/26 +""" + +import os +import sys +import time +import subprocess +import datetime +from dataclasses import dataclass +from tqdm import tqdm +import operator + + +@dataclass +class MediaFile: + name: str #without extension + extension: str #without dot + full_path: str + codec: str + size: int #bytes + resolution: (int , int) + duration: int #in sec + +def supported_file_extension(filename): + if filename.endswith('.mp4') or filename.endswith('.mkv') or filename.endswith('.m4v'): + return True + return False + +def get_length(filename): + result = subprocess.run(["ffprobe", "-v", "error", "-show_entries", + "format=duration", "-of", + "default=noprint_wrappers=1:nokey=1", filename], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + try: + length = float(result.stdout) + except ValueError: + length = 0.0 + return length + +def get_codec(filename): + result = subprocess.run(["ffprobe", "-v", "error", "-select_streams", "v:0", + "-show_entries", "stream=codec_name", + "-of", "default=noprint_wrappers=1:nokey=1", filename], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + return str(result.stdout.decode("utf-8")).rstrip("\n") + +def get_resolution(filename): + result = subprocess.run(["ffprobe", "-v", "error", "-select_streams", "v:0", + "-show_entries", "stream=width,height", + "-of", "default=noprint_wrappers=1:nokey=1", filename], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + try: + resolution = ((result.stdout.decode("utf-8").rstrip("\n").partition('\n')[0]), (result.stdout.decode("utf-8").rstrip("\n").partition('\n')[2])) + except ValueError: + resolution = (0, 0) + return resolution + +def get_number_of_files(path): + + #filter(supported_file_extension, files) + + return sum([len(list(filter(supported_file_extension, files))) for r, d, files in os.walk(path)]) + +def human_readable_size(size, decimal_places=2): + for unit in ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']: + if size < 1024.0 or unit == 'PiB': + break + size /= 1024.0 + return f"{size:.{decimal_places}f} {unit}" + +def cut_file_name(filename, max_lenght, ellipsis="..."): + if len(filename) > max_lenght: + return filename[:max_lenght-len(ellipsis)] + ellipsis + else: + return filename + +def scan_files(path): + total_numbers_to_scan = get_number_of_files(path) + + media_files = list() #stores all found files with metadata + + pbar = tqdm(total=total_numbers_to_scan) #print progress bar + + for root, dirs, files in os.walk(path, topdown=True): + for name in filter(supported_file_extension, files): + pbar.set_description("Processing %s" % str("{:<32}".format(cut_file_name(name, 32)))) + full_path = os.path.join(root, name) + media_files.append( MediaFile(name=os.path.basename(name), extension=os.path.splitext(name)[1], full_path=full_path, codec=get_codec(full_path), size=os.path.getsize(full_path), resolution=get_resolution(full_path), duration=get_length(full_path))) + pbar.update(1) + pbar.close() + return media_files + +def print_all(media_files): + media_files.sort(key=lambda x: x.size, reverse=True) + for media_file in media_files: + # print ("{:<64} | {:<8} | {:<16} | {:<8} | {:<16}".format(cut_file_name(media_file.name, 64), str(datetime.timedelta(seconds=media_file.duration)).split(".")[0], human_readable_size(media_file.size), media_file.codec, str(media_file.resolution[0])+"x"+str(media_file.resolution[1]))) + if int(media_file.resolution[0]) >= 1800 and media_file.codec != "av1" and media_file.codec != "vp9" and media_file.codec != "h265": + print ('"'+media_file.name+'" ', end='') + +def print_codecs(media_files): + codecs = list() + + for media_file in media_files: + if next((codec_type for codec_type in codecs if codec_type["name"] == media_file.codec), False): + #this codec type is already in list --> add occurrence + for codec_type in codecs: + if (codec_type["name"] == media_file.codec): + codec_type["occurrence"] +=1 + else: + #this const is NOT in list --> create occurrence dict and add to list + thisdict = { } + thisdict["name"] = media_file.codec + thisdict["occurrence"] = 1 + codecs.append(thisdict) + + #sort bases on occurrence + codecs.sort(key=lambda x: x["occurrence"], reverse=True) + + for codec_type in codecs: + print ("{:<8} | {:<16} | {:<8}".format(codec_type["name"], str(codec_type["occurrence"])+" files", str(round(float(float(100 / len(media_files)) * codec_type["occurrence"]),1))+"%")) + print("\ntotal "+ str(len(media_files)) + str(" files")) + +def main() -> None: + if(len(sys.argv) != 2): + path = '.' #use current pwd + else: + path = sys.argv[1] #use arg0 as path + + media_files = scan_files(path) #scan all media files + + print("") + print_all(media_files) + + print("") + print_codecs(media_files) + + +if __name__ == "__main__": + main() diff --git a/find_duplicates.py b/find_duplicates.py old mode 100644 new mode 100755