ultimate-photo-digitizer/extract_photos.py

126 lines
3.9 KiB
Python
Raw Normal View History

2023-07-23 13:29:44 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
"""
2023-07-23 11:57:38 +02:00
import cv2
import sys
2023-07-23 13:29:44 +02:00
import os
import subprocess
2023-07-23 13:29:44 +02:00
2023-07-25 20:46:24 +02:00
number_of_photos_in_scan = 4
2023-07-25 20:59:12 +02:00
crop_height = 4.0 # offset in percent
crop_width = 4.0 # offset in percent
2023-07-25 20:26:32 +02:00
2023-07-23 13:29:44 +02:00
def auto_crop_scan(src_path, output_dir):
areas = list()
areas.clear()
2023-07-23 13:29:44 +02:00
# read the input image
img_src = cv2.imread(src_path)
if img_src is None:
raise Exception("unable to read image " + str(input))
2023-07-23 13:29:44 +02:00
# convert the image to grayscale
img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY)
if img_gray is None:
raise Exception("unable to convert image to grayscale")
2023-07-23 13:29:44 +02:00
2023-07-25 20:46:24 +02:00
# apply threshold on the gray image to create a binary image
2023-07-23 13:29:44 +02:00
ret, thresh = cv2.threshold(img_gray, 127, 255, 0)
# find the contours
contours, hierarchy = cv2.findContours(
thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
2023-07-23 11:57:38 +02:00
for cnt in contours:
2023-07-23 13:29:44 +02:00
areas.append(cv2.contourArea(cnt))
areas.sort(reverse=True)
if len(areas) >= (number_of_photos_in_scan+1):
2023-07-23 13:29:44 +02:00
outer = areas[0]
inter_min = areas[number_of_photos_in_scan]
#print("Outer area: " + str(outer))
#print("Inner area: " + str(inter_min))
2023-07-23 13:29:44 +02:00
index = 0 # used for exported file name
2023-07-23 13:29:44 +02:00
for cnt in contours:
area = cv2.contourArea(cnt)
if (area < outer) and (area >= inter_min):
# compute the bounding rectangle of the contour
2023-07-25 20:26:32 +02:00
pos_x, pos_y, width, height = cv2.boundingRect(cnt)
2023-07-23 13:29:44 +02:00
2023-07-25 20:26:32 +02:00
cropped_width = int(width - ((width/100.0*crop_width)))
cropped_pos_x = int(pos_x + ((width-cropped_width)/2.0))
2023-07-23 13:29:44 +02:00
2023-07-25 20:26:32 +02:00
cropped_height = int(height - ((height/100.0*crop_height)))
cropped_pos_y = int(pos_y + ((height-cropped_height)/2.0))
img_crop = img_src[cropped_pos_y : cropped_pos_y + cropped_height, cropped_pos_x : cropped_pos_x + cropped_width]
if img_crop is None:
raise Exception("unable to crop image")
2023-07-23 13:29:44 +02:00
export_file_name = (
os.path.splitext(os.path.basename(os.path.normpath(src_path)))[0]
+ "-"
+ str(index)
+ ".png"
)
export_file_path = os.path.join(output_dir, export_file_name)
cv2.imwrite(export_file_path, img_crop)
index = index + 1
else:
raise Exception("unable to find all photos in scan")
2023-07-23 13:29:44 +02:00
def main():
print("starting ...")
destination_dir_path = -1
source_dir_path = -1
for argument in sys.argv:
if argument.startswith("destination_dir"):
destination_dir_path = argument.split("=")[1]
if argument.startswith("source_dir"):
source_dir_path = argument.split("=")[1]
if (destination_dir_path == -1) or (source_dir_path == -1):
2023-07-23 13:29:44 +02:00
print("Unable to parse config!")
print("Example usage:")
print(
2023-12-25 21:51:08 +01:00
" python extract_photos.py source_dir=scans/ destination_dir=tmp/"
2023-07-23 13:29:44 +02:00
)
sys.exit(-1)
failed_extractions = 0
2023-07-23 13:29:44 +02:00
for path in os.listdir(source_dir_path):
if os.path.isfile(os.path.join(source_dir_path, path)):
2023-07-25 20:26:32 +02:00
print("Found scan: " + str(os.path.join(source_dir_path, path)), end=" ")
2023-07-25 20:50:42 +02:00
sys.stdout.flush()
try:
auto_crop_scan(os.path.join(source_dir_path, path), destination_dir_path)
2023-07-25 20:26:32 +02:00
print(" → Successfully extracted photos from scan")
except Exception as e:
print(str(e))
failed_extractions = failed_extractions + 1
if(failed_extractions == 0):
2023-07-25 20:26:32 +02:00
print("\nExiting without error")
sys.exit(0)
else:
print("Unable to extract " + str(failed_extractions) + " scans.")
2023-07-25 20:26:32 +02:00
print("\nExiting with error")
sys.exit(-1)
2023-07-23 13:29:44 +02:00
if __name__ == "__main__":
main()