echo "Getting streams IDs ..."

PATTERN='{"contentId":ID_PATTERN,"width":1200,"height":1920}'
#IDS=$(wget -e robots=off -q -O - "icm.tv.pionier.net.pl"|grep OnMaterialClick|grep "<span"|sed -e "s/OnMaterialClick(/\n/g"|grep -v onclick|sed -e "s/,\"plwa/\n/g"|grep -v style)
IDS=$(seq 1000 9999)

echo "Got streams IDs ..."
echo "Getting streams URLs ..."

for ID in $IDS
do
  PREPARED=$(echo "$PATTERN"|sed -e "s/ID_PATTERN/$ID/g")
  echo "$PREPARED"
  curl -X POST -H "Content-Type: application/json" -d "$PREPARED" http://icm.tv.pionier.net.pl/PlayerWebService.asmx/GetMaterialMp4PlaylistByContentId|sed -e "s/\",\"/\n/g"|sed -e "s/\"/\n/g"|grep http>$ID.Urls.Txt
  echo ""
done

echo "Got streams ULSs ..."

rm $(ls -la *.Urls.Txt|awk '{ print $5 " " $9 }'|grep "0 "|awk '{ print $2 }')
echo "useless ids and jsons removed"
IDS=$(ls -la *.Urls.Txt|awk '{ print $9 }'|sed -e "s/\./ /g"|awk '{print $1}')
echo "Clean ids list will be used. "

echo "Getting streams metadatas ..."

for ID in $IDS
do
  bash MetadataScraper.Bsh "$ID"
done

echo "Got streams metadatas ..."
echo "Getting streams as MP4s and converting mp4s to wavs ..."

# TODO if U need fetch all streams then remove all '#' in this section/for
for ID in $IDS
do
  I=0
#  for
    URL=$(cat $ID.Urls.Txt|head -1)
#  do
    lynx --dump "$URL">$ID.$I.mp4
#    I=$(echo "$I+1"|bc)
    ffmpeg -i $ID.$I.mp4 -acodec pcm_s16le -ac 1 -ar 16000 $ID.$I.wav
#    echo "$URL">$ID.usedUrl.txt
    rm $ID.$I.mp4
#  done
done

echo "Got streams as MP4s and converted mp4s to wavs ..."
echo "Getting Speech2Txt by Kaldi ..."

# as above, those loops are needed to avoid url/session id expired exception.
for ID in $IDS
do
  I=0
#  for URL in $(cat $ID.Urls.Txt|head -1)
   URL=$(cat $ID.Urls.Txt|head -1)
#  do
#    I=$(echo "$I+1"|bc)
    bash GraKaldiClient.sh $ID.$I
    rm $ID.$I.wav
    bash PrepareTranscription.Bsh $ID.$I>$ID.text
    rm $ID.$I.TextGrid $ID.$I.xml
#  done
done

echo "removing all tempolary data."
rm *.Urls.Txt

echo "All done, check directory listing: "