2023-07-23 13:29:44 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
"""
2023-07-23 11:57:38 +02:00
import cv2
import sys
2023-07-23 13:29:44 +02:00
import os
2023-07-25 19:49:32 +02:00
import subprocess
2023-07-23 13:29:44 +02:00
2023-07-25 20:46:24 +02:00
number_of_photos_in_scan = 4
2023-07-25 19:49:32 +02:00
2023-07-25 20:59:12 +02:00
crop_height = 4.0 # offset in percent
crop_width = 4.0 # offset in percent
2023-07-25 20:26:32 +02:00
2023-07-23 13:29:44 +02:00
def auto_crop_scan ( src_path , output_dir ) :
2023-07-25 19:49:32 +02:00
areas = list ( )
areas . clear ( )
2023-07-23 13:29:44 +02:00
# read the input image
img_src = cv2 . imread ( src_path )
2023-07-25 19:49:32 +02:00
if img_src is None :
raise Exception ( " unable to read image " + str ( input ) )
2023-07-23 13:29:44 +02:00
# convert the image to grayscale
img_gray = cv2 . cvtColor ( img_src , cv2 . COLOR_BGR2GRAY )
2023-07-25 19:49:32 +02:00
if img_gray is None :
raise Exception ( " unable to convert image to grayscale " )
2023-07-23 13:29:44 +02:00
2023-07-25 20:46:24 +02:00
# apply threshold on the gray image to create a binary image
2023-07-23 13:29:44 +02:00
ret , thresh = cv2 . threshold ( img_gray , 127 , 255 , 0 )
# find the contours
contours , hierarchy = cv2 . findContours (
thresh , cv2 . RETR_TREE , cv2 . CHAIN_APPROX_SIMPLE
)
2023-07-23 11:57:38 +02:00
for cnt in contours :
2023-07-23 13:29:44 +02:00
areas . append ( cv2 . contourArea ( cnt ) )
areas . sort ( reverse = True )
2023-07-25 19:49:32 +02:00
if len ( areas ) > = ( number_of_photos_in_scan + 1 ) :
2023-07-23 13:29:44 +02:00
outer = areas [ 0 ]
2023-07-25 19:49:32 +02:00
inter_min = areas [ number_of_photos_in_scan ]
#print("Outer area: " + str(outer))
#print("Inner area: " + str(inter_min))
2023-07-23 13:29:44 +02:00
2023-07-25 19:49:32 +02:00
index = 0 # used for exported file name
2023-07-23 13:29:44 +02:00
for cnt in contours :
area = cv2 . contourArea ( cnt )
if ( area < outer ) and ( area > = inter_min ) :
# compute the bounding rectangle of the contour
2023-07-25 20:26:32 +02:00
pos_x , pos_y , width , height = cv2 . boundingRect ( cnt )
2023-07-23 13:29:44 +02:00
2023-07-25 20:26:32 +02:00
cropped_width = int ( width - ( ( width / 100.0 * crop_width ) ) )
cropped_pos_x = int ( pos_x + ( ( width - cropped_width ) / 2.0 ) )
2023-07-23 13:29:44 +02:00
2023-07-25 20:26:32 +02:00
cropped_height = int ( height - ( ( height / 100.0 * crop_height ) ) )
cropped_pos_y = int ( pos_y + ( ( height - cropped_height ) / 2.0 ) )
img_crop = img_src [ cropped_pos_y : cropped_pos_y + cropped_height , cropped_pos_x : cropped_pos_x + cropped_width ]
2023-07-25 19:49:32 +02:00
if img_crop is None :
raise Exception ( " unable to crop image " )
2023-07-23 13:29:44 +02:00
export_file_name = (
os . path . splitext ( os . path . basename ( os . path . normpath ( src_path ) ) ) [ 0 ]
+ " - "
+ str ( index )
+ " .png "
)
export_file_path = os . path . join ( output_dir , export_file_name )
cv2 . imwrite ( export_file_path , img_crop )
index = index + 1
2023-07-25 19:49:32 +02:00
else :
raise Exception ( " unable to find all photos in scan " )
2023-07-23 13:29:44 +02:00
def main ( ) :
print ( " starting ... " )
destination_dir_path = - 1
source_dir_path = - 1
for argument in sys . argv :
if argument . startswith ( " destination_dir " ) :
destination_dir_path = argument . split ( " = " ) [ 1 ]
if argument . startswith ( " source_dir " ) :
source_dir_path = argument . split ( " = " ) [ 1 ]
2023-12-25 21:11:38 +01:00
if ( destination_dir_path == - 1 ) or ( source_dir_path == - 1 ) :
2023-07-23 13:29:44 +02:00
print ( " Unable to parse config! " )
print ( " Example usage: " )
print (
2023-12-25 21:51:08 +01:00
" python extract_photos.py source_dir=scans/ destination_dir=tmp/ "
2023-07-23 13:29:44 +02:00
)
sys . exit ( - 1 )
2023-07-25 19:49:32 +02:00
failed_extractions = 0
2023-07-23 13:29:44 +02:00
for path in os . listdir ( source_dir_path ) :
if os . path . isfile ( os . path . join ( source_dir_path , path ) ) :
2023-07-25 20:26:32 +02:00
print ( " Found scan: " + str ( os . path . join ( source_dir_path , path ) ) , end = " " )
2023-07-25 20:50:42 +02:00
sys . stdout . flush ( )
2023-07-25 19:49:32 +02:00
try :
2023-12-25 21:11:38 +01:00
auto_crop_scan ( os . path . join ( source_dir_path , path ) , destination_dir_path )
2023-07-25 20:26:32 +02:00
print ( " → Successfully extracted photos from scan " )
2023-07-25 19:49:32 +02:00
except Exception as e :
print ( str ( e ) )
failed_extractions = failed_extractions + 1
if ( failed_extractions == 0 ) :
2023-07-25 20:26:32 +02:00
print ( " \n Exiting without error " )
2023-07-25 19:49:32 +02:00
sys . exit ( 0 )
else :
print ( " Unable to extract " + str ( failed_extractions ) + " scans. " )
2023-07-25 20:26:32 +02:00
print ( " \n Exiting with error " )
2023-07-25 19:49:32 +02:00
sys . exit ( - 1 )
2023-07-23 13:29:44 +02:00
if __name__ == " __main__ " :
main ( )