#! /bin/sh
# Asterisk voicemail attachment conversion script
# Revision history :
# 22/11/2010 - V1.0 - Creation by N. Bernaerts
# 07/02/2012 - V1.1 - Add handling of mails without attachment (thanks to Paul Thompson)
# 01/05/2012 - V1.2 - Use mktemp, pushd & popd
# 08/05/2012 - V1.3 - Change mp3 compression to CBR to solve some smartphone compatibility (thanks to Luca Mancino)
# 01/08/2012 - V1.4 - Add PATH definition to avoid any problem (thanks to Christopher Wolff)
# 01/06/2013 - V1.4G - Modified by DeltaEnd to provide integration with Google STT
# 01/10/2013 - V1.4G1 - Made script more robust with more error checking and retry attempts
# 01/11/2013 - V1.5G - Script now enhances the original MP3 script by fixing the pager/cell message bug that prevented short voicemail notifications from being sent.  Also changed some lines around to try to help with Pi users.
# 05/09/2014 - V1.6 - Added Nexiwave support, if Nexiwave username/passwd is passed into the script.
 
# set PATH
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"

nexiwave_username="$1"
nexiwave_password="$2"
 
# save the current directory
pushd .
 
# create a temporary directory and cd to it
TMPDIR=$(mktemp -d)
cd $TMPDIR
 
# dump the stream to a temporary file
cat >> stream.org
 
# get the boundary
BOUNDARY=`grep "boundary=" stream.org | cut -d'"' -f 2`

# Additional plain-text checking
PLAIN=`grep -c "boundary=" stream.org`

# cut the file into parts
# stream.part - header before the boundary
# stream.part1 - header after the bounday
# stream.part2 - body of the message
# stream.part3 - attachment in base64 (WAV file)
# stream.part4 - footer of the message
awk '/'$BOUNDARY'/{i++}{print > "stream.part"i}' stream.org
 
# if mail is having no audio attachment (plain text)
PLAINTEXT=`cat stream.part1 | grep 'plain'`

if [ "$PLAINTEXT" != "" -o "$PLAIN" == '0' ] ; then
 
  # prepare to send the original stream
  cat stream.org > stream.new

# else, if mail is having audio attachment
else
 
  # cut the attachment into parts
  # stream.part3.head - header of attachment
  # stream.part3.wav.base64 - wav file of attachment (encoded base64)
  sed '7,$d' stream.part3 > stream.part3.wav.head
  sed '1,6d' stream.part3 > stream.part3.wav.base64
 
  # convert the base64 file to a wav file
  dos2unix -o stream.part3.wav.base64
  base64 -di stream.part3.wav.base64 > stream.part3.wav
 
  if [[ "$nexiwave_username" != '' && "$nexiwave_password" != '' ]] ; then
    # if we have Nexiwave configured, use Nexiwave to transcribe the full audio:
    TEXT="`wget -q --max-redirect=500 --post-file stream.part3.wav --header="Content-Type: audio/vnd.wav" -O - \"https://api.nexiwave.com/SpeechIndexing/file/storage/$nexiwave_username:$nexiwave_password/recording/?response=application/raw-transcript&transcriptFormat=$transcriptFormat&auto-redirect=true\"`"

    [ "$TEXT" == '' ] && TEXT="(empty)"    
  else
  # Attempt to encode the first 15 seconds (will fail if less than 15 seconds exists)
  flac --best -f --sample-rate=8000 --totally-silent --until=0:15.00 stream.part3.wav -o message.flac
 
  # Detect Flac encoding failure due to less than 15 seconds of audio and instead encode the whole thing
    if [ $? -ne '0' ] ; then
          flac --best -f --sample-rate=8000 --totally-silent stream.part3.wav -o message.flac
    fi
 
  # Run output file through Google STT system to get text
  echo -e '\n\tTranscription (First 15 Seconds):\n' > output.txt
  TEXT=`/usr/local/sbin/speech-recog-cli.pl message.flac | head -2 | tail -1 | cut -f 2 -d ":"`
  
  # If initial transcribe fails, try 3 more times
  COUNT='0'
  while [[ "$TEXT" == *message.flac* && "$COUNT" -lt '3' ]] ; do
  TEXT=`/usr/local/sbin/speech-recog-cli.pl message.flac | head -2 | tail -1 | cut -f 2 -d ":"`
  ((COUNT++))
  done
  fi
  
  # If the transcribe fails, put a generic failure message
  if [[ $TEXT == *Openning* ]] ; then
  TEXT='We were unable to transcribe this message.  Please listen to it instead.'
  fi
  echo $TEXT >> output.txt
 
  # convert wav file to mp3 file
  # -b 24 is using CBR, giving better compatibility on smartphones (you can use -b 32 or -b 64 to increase quality)
  # -V 2 is using VBR, a good compromise between quality and size for voice audio files
  lame -hm m -b 64 stream.part3.wav stream.part3.mp3
 
  # convert back mp3 to base64 file
  base64 stream.part3.mp3 > stream.part3.mp3.base64
 
  # generate the new mp3 attachment header
  # change Type: audio/x-wav to Type: audio/mpeg
  # change name="msg----.wav" to name="msg----.mp3"
  sed 's/x-wav/mpeg/g' stream.part3.wav.head | sed 's/.wav/.mp3/g' > stream.part3.mp3.head
 
  # generate first part of mail body, converting it to LF only
  mv stream.part stream.new
  cat output.txt >> stream.part2
  cat stream.part1 >> stream.new
  cat stream.part2 >> stream.new
  cat stream.part3.mp3.head >> stream.new
  dos2unix -o stream.new
 
  # append base64 mp3 to mail body, keeping CRLF
  unix2dos -o stream.part3.mp3.base64
  cat stream.part3.mp3.base64 >> stream.new
 
  # append end of mail body, converting it to LF only
  echo "" >> stream.tmp
  echo "" >> stream.tmp
  cat stream.part4 >> stream.tmp
  dos2unix -o stream.tmp
fi
 
# send the mail thru sendmail
cat stream.new | sendmail -t
 
# go back to original directory
popd
 
# remove all temporary files and temporary directory
rm -Rf $TMPDIR