#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com """ import cv2 import sys import os import subprocess number_of_photos_in_scan = 4 crop_height = 4.0 # offset in percent crop_width = 4.0 # offset in percent def auto_crop_scan(src_path, output_dir): areas = list() areas.clear() # read the input image img_src = cv2.imread(src_path) if img_src is None: raise Exception("unable to read image " + str(input)) # convert the image to grayscale img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY) if img_gray is None: raise Exception("unable to convert image to grayscale") # apply threshold on the gray image to create a binary image ret, thresh = cv2.threshold(img_gray, 127, 255, 0) # find the contours contours, hierarchy = cv2.findContours( thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) for cnt in contours: areas.append(cv2.contourArea(cnt)) areas.sort(reverse=True) if len(areas) >= (number_of_photos_in_scan+1): outer = areas[0] inter_min = areas[number_of_photos_in_scan] #print("Outer area: " + str(outer)) #print("Inner area: " + str(inter_min)) index = 0 # used for exported file name for cnt in contours: area = cv2.contourArea(cnt) if (area < outer) and (area >= inter_min): # compute the bounding rectangle of the contour pos_x, pos_y, width, height = cv2.boundingRect(cnt) cropped_width = int(width - ((width/100.0*crop_width))) cropped_pos_x = int(pos_x + ((width-cropped_width)/2.0)) cropped_height = int(height - ((height/100.0*crop_height))) cropped_pos_y = int(pos_y + ((height-cropped_height)/2.0)) img_crop = img_src[cropped_pos_y : cropped_pos_y + cropped_height, cropped_pos_x : cropped_pos_x + cropped_width] if img_crop is None: raise Exception("unable to crop image") export_file_name = ( os.path.splitext(os.path.basename(os.path.normpath(src_path)))[0] + "-" + str(index) + ".png" ) export_file_path = os.path.join(output_dir, export_file_name) cv2.imwrite(export_file_path, img_crop) index = index + 1 else: raise Exception("unable to find all photos in scan") def main(): print("starting ...") destination_dir_path = -1 source_dir_path = -1 for argument in sys.argv: if argument.startswith("destination_dir"): destination_dir_path = argument.split("=")[1] if argument.startswith("source_dir"): source_dir_path = argument.split("=")[1] if (destination_dir_path == -1) or (source_dir_path == -1): print("Unable to parse config!") print("Example usage:") print( " python extract_photos.py source_dir=scans/ destination_dir=tmp/" ) sys.exit(-1) failed_extractions = 0 for path in os.listdir(source_dir_path): if os.path.isfile(os.path.join(source_dir_path, path)): print("Found scan: " + str(os.path.join(source_dir_path, path)), end=" ") sys.stdout.flush() try: auto_crop_scan(os.path.join(source_dir_path, path), destination_dir_path) print(" → Successfully extracted photos from scan") except Exception as e: print(str(e)) failed_extractions = failed_extractions + 1 if(failed_extractions == 0): print("\nExiting without error") sys.exit(0) else: print("Unable to extract " + str(failed_extractions) + " scans.") print("\nExiting with error") sys.exit(-1) if __name__ == "__main__": main()