using dataclass, codec statistics

This commit is contained in:
Hendrik Schutter 2022-02-26 00:14:42 +01:00
parent 5aa710156c
commit 06279c07ba
2 changed files with 157 additions and 2 deletions

View File

@ -17,7 +17,7 @@
![alt text](https://git.mosad.xyz/localhorst/media_management_scripts/raw/commit/e67c26a2bcbc6be4674bd0e852254d2afb481ea5/codec_visualizer/demo_output.png "demo output image")
# check_resolution
# check_metadata
## Usage
@ -34,11 +34,24 @@
## Requirements
- ffmpeg
- pip3 install tqdm
## Demo
```
The_Movie_01.mkv | 2:18:21 | 1.57 GiB | h264 | 1920x1080
The_Movie_02.mkv | 1:52:11 | 1.92 GiB | h264 | 1916x798
The_Movie_03.mkv | 2:23:11 | 1.05 GiB | av1 | 1920x804
...
The_Movie_04.mkv | 2:18:24 | 1.21 GiB | h264 | 720x560
TODO
h264 | 3 files | 87.7%
hevc | 8 files | 6.3%
mpeg4 | 11 files | 2.7%
av1 | 10 files | 2.4%
vp9 | 4 files | 1.0%
total 36 files
```
# convert
TODO

View File

@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, localhorst@mosad.xyz
Date of creation: 2022/02/13
Date of last modification: 2022/01/26
"""
import os
import sys
import time
import subprocess
import datetime
from dataclasses import dataclass
from tqdm import tqdm
@dataclass
class MediaFile:
name: str #without extension
extension: str #without dot
full_path: str
codec: str
size: int #bytes
resolution: (int , int)
duration: int #in sec
def supported_file_extension(filename):
if filename.endswith('.mp4') or filename.endswith('.mkv'):
return True
return False
def get_length(filename):
result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
"format=duration", "-of",
"default=noprint_wrappers=1:nokey=1", filename],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL)
try:
length = float(result.stdout)
except ValueError:
length = 0.0
return length
def get_codec(filename):
result = subprocess.run(["ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=codec_name",
"-of", "default=noprint_wrappers=1:nokey=1", filename],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL)
return str(result.stdout.decode("utf-8")).rstrip("\n")
def get_resolution(filename):
result = subprocess.run(["ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=width,height",
"-of", "default=noprint_wrappers=1:nokey=1", filename],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL)
try:
resolution = ((result.stdout.decode("utf-8").rstrip("\n").partition('\n')[0]), (result.stdout.decode("utf-8").rstrip("\n").partition('\n')[2]))
except ValueError:
resolution = (0, 0)
return resolution
def get_number_of_files(path):
#filter(supported_file_extension, files)
return sum([len(list(filter(supported_file_extension, files))) for r, d, files in os.walk(path)])
def human_readable_size(size, decimal_places=2):
for unit in ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']:
if size < 1024.0 or unit == 'PiB':
break
size /= 1024.0
return f"{size:.{decimal_places}f} {unit}"
def cut_file_name(filename, max_lenght, ellipsis="..."):
if len(filename) > max_lenght:
return filename[:max_lenght-len(ellipsis)] + ellipsis
else:
return filename
def scan_files(path):
total_numbers_to_scan = get_number_of_files(path)
media_files = list() #stores all found files with metadata
pbar = tqdm(total=total_numbers_to_scan) #print progress bar
for root, dirs, files in os.walk(path, topdown=True):
for name in filter(supported_file_extension, files):
pbar.set_description("Processing %s" % str("{:<32}".format(cut_file_name(name, 32))))
full_path = os.path.join(root, name)
media_files.append( MediaFile(name=os.path.basename(name), extension=os.path.splitext(name)[1], full_path=full_path, codec=get_codec(full_path), size=os.path.getsize(full_path), resolution=get_resolution(full_path), duration=get_length(full_path)))
pbar.update(1)
pbar.close()
return media_files
def print_all(media_files):
for media_file in media_files:
print ("{:<64} | {:<8} | {:<16} | {:<8} | {:<16}".format(cut_file_name(media_file.name, 64), str(datetime.timedelta(seconds=media_file.duration)).split(".")[0], human_readable_size(media_file.size), media_file.codec, str(media_file.resolution[0])+"x"+str(media_file.resolution[1])))
def print_codecs(media_files):
codecs = list()
for media_file in media_files:
if next((codec_type for codec_type in codecs if codec_type["name"] == media_file.codec), False):
#this codec type is already in list --> add occurrence
for codec_type in codecs:
if (codec_type["name"] == media_file.codec):
codec_type["occurrence"] +=1
else:
#this const is NOT in list --> create occurrence dict and add to list
thisdict = { }
thisdict["name"] = media_file.codec
thisdict["occurrence"] = 1
codecs.append(thisdict)
#sort bases on occurrence
codecs.sort(key=lambda x: x["occurrence"], reverse=True)
for codec_type in codecs:
print ("{:<8} | {:<8} | {:<8}".format(codec_type["name"], str(codec_type["occurrence"])+" files", str(round(float(float(100 / len(media_files)) * codec_type["occurrence"]),1))+"%"))
print("\ntotal "+ str(len(media_files)) + str(" files"))
def main() -> None:
if(len(sys.argv) != 2):
path = '.' #use current pwd
else:
path = sys.argv[1] #use arg0 as path
media_files = scan_files(path) #scan all media files
print("")
print_all(media_files)
print("")
print_codecs(media_files)
if __name__ == "__main__":
main()