diff --git a/README.md b/README.md index e55d731..8402ab2 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ ![alt text](https://git.mosad.xyz/localhorst/media_management_scripts/raw/commit/e67c26a2bcbc6be4674bd0e852254d2afb481ea5/codec_visualizer/demo_output.png "demo output image") -# check_resolution +# check_metadata ## Usage @@ -34,11 +34,24 @@ ## Requirements - ffmpeg +- pip3 install tqdm ## Demo +``` +The_Movie_01.mkv | 2:18:21 | 1.57 GiB | h264 | 1920x1080 +The_Movie_02.mkv | 1:52:11 | 1.92 GiB | h264 | 1916x798 +The_Movie_03.mkv | 2:23:11 | 1.05 GiB | av1 | 1920x804 +... +The_Movie_04.mkv | 2:18:24 | 1.21 GiB | h264 | 720x560 -TODO +h264 | 3 files | 87.7% +hevc | 8 files | 6.3% +mpeg4 | 11 files | 2.7% +av1 | 10 files | 2.4% +vp9 | 4 files | 1.0% +total 36 files +``` # convert TODO diff --git a/check_metadata/check_metadata.py b/check_metadata/check_metadata.py new file mode 100644 index 0000000..b8df276 --- /dev/null +++ b/check_metadata/check_metadata.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" Author: Hendrik Schutter, localhorst@mosad.xyz + Date of creation: 2022/02/13 + Date of last modification: 2022/01/26 +""" + +import os +import sys +import time +import subprocess +import datetime +from dataclasses import dataclass +from tqdm import tqdm + +@dataclass +class MediaFile: + name: str #without extension + extension: str #without dot + full_path: str + codec: str + size: int #bytes + resolution: (int , int) + duration: int #in sec + +def supported_file_extension(filename): + if filename.endswith('.mp4') or filename.endswith('.mkv'): + return True + return False + +def get_length(filename): + result = subprocess.run(["ffprobe", "-v", "error", "-show_entries", + "format=duration", "-of", + "default=noprint_wrappers=1:nokey=1", filename], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + try: + length = float(result.stdout) + except ValueError: + length = 0.0 + return length + +def get_codec(filename): + result = subprocess.run(["ffprobe", "-v", "error", "-select_streams", "v:0", + "-show_entries", "stream=codec_name", + "-of", "default=noprint_wrappers=1:nokey=1", filename], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + return str(result.stdout.decode("utf-8")).rstrip("\n") + +def get_resolution(filename): + result = subprocess.run(["ffprobe", "-v", "error", "-select_streams", "v:0", + "-show_entries", "stream=width,height", + "-of", "default=noprint_wrappers=1:nokey=1", filename], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + try: + resolution = ((result.stdout.decode("utf-8").rstrip("\n").partition('\n')[0]), (result.stdout.decode("utf-8").rstrip("\n").partition('\n')[2])) + except ValueError: + resolution = (0, 0) + return resolution + +def get_number_of_files(path): + + #filter(supported_file_extension, files) + + return sum([len(list(filter(supported_file_extension, files))) for r, d, files in os.walk(path)]) + +def human_readable_size(size, decimal_places=2): + for unit in ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']: + if size < 1024.0 or unit == 'PiB': + break + size /= 1024.0 + return f"{size:.{decimal_places}f} {unit}" + +def cut_file_name(filename, max_lenght, ellipsis="..."): + if len(filename) > max_lenght: + return filename[:max_lenght-len(ellipsis)] + ellipsis + else: + return filename + +def scan_files(path): + total_numbers_to_scan = get_number_of_files(path) + + media_files = list() #stores all found files with metadata + + pbar = tqdm(total=total_numbers_to_scan) #print progress bar + + for root, dirs, files in os.walk(path, topdown=True): + for name in filter(supported_file_extension, files): + pbar.set_description("Processing %s" % str("{:<32}".format(cut_file_name(name, 32)))) + full_path = os.path.join(root, name) + media_files.append( MediaFile(name=os.path.basename(name), extension=os.path.splitext(name)[1], full_path=full_path, codec=get_codec(full_path), size=os.path.getsize(full_path), resolution=get_resolution(full_path), duration=get_length(full_path))) + pbar.update(1) + pbar.close() + return media_files + +def print_all(media_files): + for media_file in media_files: + print ("{:<64} | {:<8} | {:<16} | {:<8} | {:<16}".format(cut_file_name(media_file.name, 64), str(datetime.timedelta(seconds=media_file.duration)).split(".")[0], human_readable_size(media_file.size), media_file.codec, str(media_file.resolution[0])+"x"+str(media_file.resolution[1]))) + +def print_codecs(media_files): + codecs = list() + + for media_file in media_files: + if next((codec_type for codec_type in codecs if codec_type["name"] == media_file.codec), False): + #this codec type is already in list --> add occurrence + for codec_type in codecs: + if (codec_type["name"] == media_file.codec): + codec_type["occurrence"] +=1 + else: + #this const is NOT in list --> create occurrence dict and add to list + thisdict = { } + thisdict["name"] = media_file.codec + thisdict["occurrence"] = 1 + codecs.append(thisdict) + + #sort bases on occurrence + codecs.sort(key=lambda x: x["occurrence"], reverse=True) + + for codec_type in codecs: + print ("{:<8} | {:<8} | {:<8}".format(codec_type["name"], str(codec_type["occurrence"])+" files", str(round(float(float(100 / len(media_files)) * codec_type["occurrence"]),1))+"%")) + print("\ntotal "+ str(len(media_files)) + str(" files")) + +def main() -> None: + if(len(sys.argv) != 2): + path = '.' #use current pwd + else: + path = sys.argv[1] #use arg0 as path + + media_files = scan_files(path) #scan all media files + + print("") + print_all(media_files) + + print("") + print_codecs(media_files) + + +if __name__ == "__main__": + main()