#!/bin/bash
#####################################################################
##                                                                 ##
##    imageSorting v0.6                                            ##
##    this script can be used to sort recovered images             ##
##                                                                 ##
#####################################################################

#####################################################################
##                                                                 ##
## General:                                                        ##
##  This Script is free; You can do whatever you want with it,     ##
##  except the following:                                          ##
##    - You're not allowed to publish it under any license.        ##
##    - You're not allowed to remove these lines.                  ##
##    - You're not allowed to remove or modify the author line.    ##
## Disclaimer of Warranty and Liability:                           ##
##  THIS SCRIPT IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTY  ##
##  OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT  ##
##  LIMITATION, WARRANTIES THAT THE SCRIPT IS FREE OF DEFECTS,     ##
##  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING.  ##
##  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE       ##
##  SCRIPT IS WITH YOU.                                            ##
##  SHOULD THIS SCRIPT PROVE DEFECTIVE IN ANY RESPECT, YOU         ##
##  (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME    ##
##  THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION.     ##
##                                                                 ##
##    Author: Jean Michel Bruenn <jean.bruenn@jeanbruenn.info>     ##
##    Date:  31th December 2008                                    ##
##                                                                 ##
#####################################################################

###
# ToDo
###
#    - Someone should review all stuff what i am doing, like
#      the SED rules, maybe there are better(faster) ways to
#      do the same stuff.
#    - Make use of "local" within functions.
#    - Make some little status how much time and how much work
#      which slot did, displayed at the end of script execution
#    - Make it possible to add more than one directory as arg
#      for -f
#    - Make more useful echo's
#    - Make more useful comment's
#    - Add more Fileformats

###
# Changelog
###
# 0.6
#    - Removed Image Magicks "convert" calls and replaced em with
#      cp -ra (resulting in faster, safer and better processing,
#      to read the whole story look
#      at http://jeanbruenn.info/new/2011/07/23/imagesorting-0-6/
#    - The IMG_YEAR Validation was incorrect, resulting in some
#      weird directory names. fixed.
#    - Instead of giving 0000 as year, when the year is unknown
#      it's using "unknown" now.
#    - Removed the creation of "corrupted" directory, no convert
#      no corrupted directory needed anymore :)
#
# 0.5
#    - Removed the on-the-fly copy-detection. causing too much
#      trouble.
#    - Added automatic detection of -p if not set instead of
#      just using the default value (it will use minimum 2 if
#      nothing was given and we detect only one or two cpus/cores
#      and up to 255 depending on the amount of available cpus or
#      cores - That way the default value should always be safe.
#    - Added a function to clean some strings by removing garbage
#      from start, beginning and within the string like whitespaces
#    - Apps detected using "which"
#    - Added -auto-orient and -despeckle as args to convert
#
# 0.4
#    - Rewritten some functions
#    - Removed the cp* stuff to /tmp we lose too much time by doing
#      so.
#    - Enhanced the exif stuff.
#    - Added "thumbnails" directory for sorting.
#    - Added "command-line" options
#    - Added on-the-fly copy-detection
#    - Added SED rule to remove whitespaces from directory and file-
#      names
#    - Added simple lock-mechanism
#
# 0.3
#    - Added parallelization (thx to TheBonsai for the skelleton-
#      script)
#    - Rewrote script to use functions.
#
# 0.2
#    - Complete rewrite
#
# 0.1
#    - Initial release
###

###
# Variables
###
PATT='.*\.\(png\|jpg\|gif\|bmp\|svg\|tiff\|tif\|jpeg\|eps\|psd\)';
GREP=$(which grep);
SED=$(which sed);
IDEN=$(which identify);
CUT=$(which cut);
EXIF=$(which exiftool);
if [ ! -x "$EXIF" ]; then
  EXIF="/home/beate/Image-ExifTool-8.61/exiftool";
fi

#####################################################################
##                                                                 ##
##    DO NOT CHANGE ANYTHING BELOW THIS LINE                       ##
##        ...except you know what you are doing of course.         ##
##                                                                 ##
#####################################################################

###
# maybe i will add more useful stuff later, like removing unreadable
# chars
###
function cleanVariable() {

  # remove whitespaces and tabs from beginning and end of the string
  VAR=$(echo "$@" | sed 's/^[ \t]*//;s/[ \t]*$//');

}

###
# imsCheck : tests whether needed tools exist
###
imsCheck() {

  if [ -e $GREP ]; then
    if [ $DEBUG -eq 1 ] ; then
      echo "++ grep found in $GREP";
    fi
  else
    echo "+ grep not found. exiting...";
    exit 0
  fi

  if [ -e $SED ]; then
    if [ $DEBUG -eq 1 ] ; then
      echo "++ sed found in $SED";
    fi
  else
    echo "+ sed not found. exiting...";
    exit 0
  fi
  
  if [ -e $IDEN ]; then
    if [ $DEBUG -eq 1 ] ; then
      echo "++ identify found in $IDEN";
    fi
  else
    echo "+ identify not found. exiting...";
    exit 0
  fi

  if [ -e $CUT ]; then
    if [ $DEBUG -eq 1 ] ; then
      echo "++ cut found in $CUT";
    fi
  else
    echo "+ cut not found. exiting...";
    exit 0
  fi
  
  if [ -e $EXIF ]; then
    if [ $DEBUG -eq 1 ] ; then
      echo "++ exiftool found in $EXIF";
    fi
  else
    echo "+ exiftool not found. exiting...";
    exit 0
  fi

}

###
# this is the main function, containing the IMSTHREADS-skelleton
# and calling the other functions
###
imsMain() {

   imsCheck

   n=0

   # preinitialize the slots
   for ((i=0; i<IMSTHREADS; i++)); do
      SLOT[i]=-1
   done

   while sleep 0.5 && ((n < NUMFILES)); do
      echo "DEBUG: n = $n" >&2
      # find free slots and use them
      for ((i = 0; i < IMSTHREADS; i++)); do
         if (( ${SLOT[i]} == -1 )); then
            imsProcess "Slot $i":"${FILES[n]}" &
            ((n++))
            SLOT[i]=$!
            echo "+ Slot $i taken" >&2
            (((n+1) > NUMFILES)) && break
         fi
      done

      # find terminated slots and tag them 'free'
      for ((i = 0; i < IMSTHREADS; i++)); do
         if ! kill -0 ${SLOT[i]} >/dev/null 2>&1; then
            echo "+ Slot $i released" >&2
            SLOT[i]=-1
         fi
      done
   done

   # wait for remaining slots to run out
   exit=0
   while ! ((exit)); do
      exit=1
      for ((i = 0; i < IMSTHREADS; i++)); do
         if ! kill -0 ${SLOT[i]} >/dev/null 2>&1; then
            echo "+ Slot $i released" >&2
            SLOT[i]=-1
         fi
      done

      for ((i = 0; i < IMSTHREADS; i++)); do
         ((${SLOT[i]} != -1)) && exit=0
      done
   done

}

###
# imsCheckData : this function is obtaining some information of a picture
#             like height and width, which is essential for the sorting
#             procedure
###
imsCheckData() {

  # Instead of calling identify 5 times or more, we call it once:
  # HEIGHT:WIDTH:PAGEHEIGHT:PAGEWIDTH:FORMAT
  IMGDATA=$($IDEN -format "%h:%w:%H:%W:%m" "$@")

  H=$(echo $IMGDATA | $CUT -d: -f1)
  if [ -z $H ]; then
    H=$(echo $IMGDATA | $CUT -d: -f3)
  fi
  W=$(echo $IMGDATA | $CUT -d: -f2)
  if [ -z $W ]; then
    W=$(echo $IMGDATA | $CUT -d: -f4)
  fi

  IMG_FORMAT=$(echo $IMGDATA | $CUT -d: -f5)

  # USED if's instead of directly running exiftool two times.
  IMG_YEAR=$($EXIF -DateTimeOriginal "$@" | $SED -r 's|.+: ||' | $SED -r 's|:.+||');

  cleanVariable $IMG_YEAR
  IMG_YEAR="$VAR";

  if [ -z "$IMG_YEAR" ] || [ "${#IMG_YEAR}" -lt 4 ] || [ "${#IMG_YEAR}" -gt 4 ]; then
    IMG_YEAR=$($EXIF -FileModifyDate "$@" | $SED -r 's|.+: ||' | $SED -r 's|:.+||');
    cleanVariable $IMG_YEAR
    IMG_YEAR="$VAR";
  fi

  if [ "${#IMG_YEAR}" -lt 4 ] || [ "${#IMG_YEAR}" -gt 4 ] || [[ "$IMG_YEAR" = "0000" ]]; then
    IMG_YEAR="unknown";
  fi

  # Let's make sure we got the file format
  if [ -z $IMG_FORMAT ]; then
    IMG_FORMAT=$(file "$@" -b | $SED -r 's| .*||');
  fi

  IMG_FORMAT=$(echo $IMG_FORMAT | $SED -r 's| .*||' | $SED -r 's|[0-9]||g');

  if [ -z "$W" ] || [ -z "$H" ] ; then
    IMG_TYPE="unknown";
  else

    if (( $W > 800 )) && (( $H > 800 )) ; then
      IMG_TYPE="large";
    elif (( $W > 400 )) && (( $H > 400 )) ; then
      IMG_TYPE="medium";
    elif (( $W > 160 )) && (( $H > 160 )) ; then
      IMG_TYPE="small";
    elif (( $W < 160 )) && (( $H < 160 )) ; then
      IMG_TYPE="thumb";
    else
      if (( $W > 800 )); then
        IMG_TYPE="large";
      elif (( $W > 400 )); then
        IMG_TYPE="medium";
      elif (( $W > 160 )); then
        IMG_TYPE="small";
      elif (( $W < 160 )); then
        IMG_TYPE="small";
      else
        IMG_TYPE="unknown";
      fi

    fi

  fi

  # we got now the needed data of the picture, let's create needed
  # directories and give back the $DESTINATION and $NEWFILENAME vars
  if [ ! -d "$TODIR" ]; then
    mkdir -p "$TODIR";
    echo "+ Created $TODIR";
  fi

  if [ ! -d "$TODIR/$IMG_YEAR" ] ; then
    mkdir -p "$TODIR/$IMG_YEAR";
    echo "+ Created $TODIR/$IMG_YEAR";
  fi

  if [ ! -d "$TODIR/$IMG_YEAR/$IMG_TYPE" ] ; then
    mkdir -p "$TODIR/$IMG_YEAR/$IMG_TYPE";
    echo "+ Created $TODIR/$IMG_YEAR/$IMG_TYPE";
  fi

  if [ ! -d "$TODIR/$IMG_YEAR/$IMG_TYPE/$IMG_FORMAT" ] ; then
    mkdir -p "$TODIR/$IMG_YEAR/$IMG_TYPE/$IMG_FORMAT";
    echo "+ Created $TODIR/$IMG_YEAR/$IMG_TYPE/$IMG_FORMAT";
  fi

  DESTINATION="$TODIR/$IMG_YEAR/$IMG_TYPE/$IMG_FORMAT";
  STRIPPED=$(echo "$@" | $SED -r 's|.*/||');
  S=$(echo "$STRIPPED" | $SED -r 's|\..*||');
  E=$(echo "$STRIPPED" | $SED -r 's|.*\.||');
  if [ -f "$DESTINATION/$STRIPPED" ] ; then

    # file exists let's add a number to the filename
    # and check whether that file exists too (we set
    # this number higher until we found a not existing
    # filename.
    echo "+ Copy detected. Renaming...";

    for (( I=1; $I \<= 9999; I++ )) ; do

      if [ ! -f "$DESTINATION/$S.$I.$E" ] ; then

        # wow, we got a free filename. Let's break the
        # for and give back the new filename
        NEWFILENAME="$S.$I.$E";
        break;

      fi

    done
    echo "+ Renamed to: $S.$I.$E";

  else

    # the filename is free already.
    NEWFILENAME="$S.$E";

  fi

}

###
# imsConvertImage : this function is used to run imageMagick on the file
#             to remove probably invisible garbage at the end of
#             some files.
###
imsConvertImage() {

  S=$(echo "$@" | $CUT -d: -f1)
  E=$(echo "$@" | $CUT -d: -f2)
  ES=$(echo "$@" | $SED -r 's|.*/||');

  # do the convert
  #$CONV $CONVOPTS "$S" "$E"
  cp -ra "$S" "$E"
  
  echo "+ Copied file $S to $E.";

}
###
# this function is more a "caller" than a function.
# it calls other functions :)
###
imsProcess() {

  echo "+ imsProcess(): got '$1'" >&2
  SLOT=$(echo "$@" | $SED -r 's|:.*||');
  CURRFILE=$(echo "$@" | $SED -r 's|.*:||');

  # Now we need to read out information about the graphic and we need to
  # create additional folders if they don't exist yet
  imsCheckData "$CURRFILE"

  # Now as we got the data and $DESTINATION back we can convert the file
  # and store the converted one within the correct and sorted directory.
  imsConvertImage "$CURRFILE:$DESTINATION/$NEWFILENAME"

}

###
# Command line .. 
###

while getopts ":f:t:p:hd" opt; do
  case $opt in
    f)

      # -f was triggered, let's check whether the directory exists.
      if [ -d "$OPTARG" ]; then
        FROMDIR="$OPTARG"
      else
        echo "-f was triggered, with parameter: $OPTARG" >&2
        echo "but the directory $OPTARG does not exist - exit" >&2
        exit 0
      fi
      ;;

    t)

      # -t was triggered
      TODIR="$OPTARG"
      ;;

    p)

      # -p was triggered
      IMSTHREADS="$OPTARG"
      ;;

    h)

      echo "Usage: imageSorting.sh [-p X] -f [DIRECTORY] -t [DIRECTORY]"
      echo "Example: imageSorting.sh -p 2 -f /recovered -t /sorted"
      echo "this would sort all images from /recovered to /sorted with"
      echo "two parallel running threads."
      echo ""
      echo "Usage:"
      echo "  -f                DIRECTORY from where to get images"
      echo "  -t                DIRECTORY where to store images"
      echo "  -p                threads 2-12 is a safe choice"
      echo "  -d                will print a lot :)"
      echo "  -h                displays this help and exit"
      echo ""
      echo "See http://jeanbruenn.info if you liked this script"
      exit 0
      ;;

    d)
      DEBUG=1
      ;;

    \?)

      echo "Invalid option: -$OPTARG try -h for help" >&2
      exit 0
      ;;

    :)

      echo "Option -$OPTARG requires an argument." >&2
      exit 0
      ;;

  esac
done

if [ -z $DEBUG ] ; then
   DEBUG=0
fi

# Check whether all required settings have been set:
if [ ! $FROMDIR ] || [ ! $TODIR ]; then
  echo "The options -f and -t are required! exit"
  exit 0
fi

if [ ! $IMSTHREADS ]; then
    # threads wasn't set, let's try autodetection
    PROCS=$(cat /proc/cpuinfo | grep "processor" | wc -l);
    if [ -z $PROCS ]; then
      IMSTHREADS=2
    else
      if (( $PROCS -eq 1 )) || (( $PROCS -eq 0 )); then
        IMSTHREADS=2
      else
        IMSTHREADS=$PROCS
      fi
    fi
fi

###
# Start everything
###

clear

echo " ============================================================================== "
echo "     Image Sorting Utility       "
echo " ============================================================================== "
echo " : started                   "

# Let's make sure that no copy of this script is running at the same
# time - Otherwise it will probably result in many broken images.
if mkdir /var/lock/ImageSorting; then
  if [ $DEBUG -eq 1 ] ; then
    echo "+ locking succeeded" >&2
  fi
else
  echo "+ locking failed (imageSorting already running?) - exit" >&2
  exit 1
fi

# Let's get a filelist to work with
echo "+ getting filelist"
A=0
while read -r; do
  FILES[A++]="$REPLY";
done < <(find "$FROMDIR" -type f -iregex "$PATT")
NUMFILES=${#FILES[@]}

# Got the filelist - Let's start
imsMain

# Done - Finish up:
rm -rf /var/lock/ImageSorting

if [ $DEBUG -eq 1 ] ; then
  echo "++ removed lock"
fi

echo " =============================== "
echo "     Image Sorting Utility       "
echo " =============================== "
echo " : finished                      "

