#!/bin/bash
 

#d=1			# debug
pf="%08d\n"		# 8 digit number formatting
z=$(printf $pf 0)	# 0 formatted
max=${z//[0]/9}		# max decimal number in this format
live=0

if [[ $1 == *"://"* ]];
then
  echo
  echo "  ERROR: A target file must preceede the URL."
  echo
  exit 0
fi

f=$1
# strip illegal characters from the target filename (first parameter)
f=${f//[^A-Za-z0-9._-]/_}


bn=$(basename -- "$0")
ext="${bn##*.}"
fn="${bn%.*}"; #fn="${0##*/}"


# crate helper script
# important: define the executable temporary script filename ($hs)
g="0"; # temp-filename head
h="-ffmpeg"; # temp-filename tail
hs=$g$f"-"$fn$h
hsA=$g$f"A-"$fn$h
echo '#!/bin/bash' > $hs
echo "#" >> $hs
echo "# This file ("$hs") was generated automatically and is" >> $hs
echo "# solely intended to manually finish media muxing in case you aborted" >> $hs
echo "# the download process. It should delete itself upon completion but" >> $hs
echo "# if muxing has already completed, you can safely remove it." >> $hs
echo >> $hs
echo "function ffrun-$f {" >> $hs
echo 'ffmpeg -f concat -i $f-ffmpeg.cat -err_detect ignore_err -c copy $f.$x -y 2>&1' >> $hs

echo "}" >> $hs
echo "i=$f" >> $hs
echo "function cleanup {" >> $hs
echo "if [ -z \${d+x} ] || [ \$d\"x\" == \"x\" ];" >> $hs
echo "then" >> $hs
echo "    if [ -f \$i\$z ]; then rm \$i\$z > /dev/null 2>&1; fi" >> $hs
echo "    if [ -f \$i-ffmpeg.cat ];" >> $hs
echo "    then" >> $hs
echo "	for a in \$(cat \$i-ffmpeg.cat);" >> $hs
echo "	do" >> $hs
echo "	  if [ \$a == \"file\" ];" >> $hs
echo "	  then continue" >> $hs
echo "	  else" >> $hs
echo "	    c=\"'\"" >> $hs
echo "	    b=\$(echo \${a/\$c/})" >> $hs
echo "	    b=\$(echo \${b/%\$c/})" >> $hs
echo "	    if [ \$b != \$f ] || [ \$live == \"mp4\" ] || [ \$live == 2 ];" >> $hs
echo "	    then" >> $hs
echo "	      rm \"\$b\" > /dev/null 2>&1" >> $hs
echo "	    fi" >> $hs
echo "	  fi" >> $hs
echo "	done" >> $hs
echo "  fi" >> $hs
echo "  if [ -f \$i-ffmpeg.cat ]; then rm \$i-ffmpeg.cat 2>&1; fi" >> $hs
echo "  rm \$0 2>&1" >> $hs; # this line goes only to the temp file !!!!!
echo "fi" >> $hs
echo "}" >> $hs
echo >> $hs
# and give it the flag to be executable by the same user
chmod u+x $hs


function quit {
  f_=${fn//[^$g]/*}$h;
  if [ "${f_:0:1}" != "*" ]; # safety belt for a risky operation !!!
			     # we do not remove anything if f_ starts with * , that is fn was defined problematic !!!
  then
    for a in $f_; do
      # only remove files with names that contain the basename of this file
      # ( "${bn%.*}" ) but are not the same 
      if [ -f $a ] && [[ $a == *"yt_streamcat"* ]] && [ $a != $bn ]  && [ $a"x" != $0"x" ];
      then
	rm $a > /dev/null 2>&1
      fi
    done
    exit 0
  fi
}


function msg {
if [ -f $hs ]; then
echo
echo "  IMPORTANT:"
echo "    If you abort the download process (by hitting <Ctrl-C> for instance),"
echo "    you can finish media muxing quickly with the temporary script that has"
echo "    just been created for the current download:"
echo "        ./"$hs
echo "    Notice: it is not the same that's running right now and will delete itself"
echo "    once the task is completed."
echo
fi
}


function usage {
echo "  Script to download and concatenate YouTube stream fragments into one file."
echo "  Requires the binary program wget (and preferably ffmpeg for live streams)"
echo "  to be available and executable by the user."
echo 
echo "  USAGE:"
echo "  $0 <target file> <stream base URL> [start] [end] <2nd stream URL>"
echo
echo "	target file	must not exist (mandatory)"
echo "	stream base URL of a suitable YT media source (mandatory between quotes)"
echo "	start		number of starting segment (optional, default: 1)"
echo "	end		number of ending segment (optional, default: $max)"
echo "	2nd stream URL	to get the audio as a secondary stream (optional)"
echo
}


# cleanup of temp files
function cleanup {
if [ -z ${d+x} ] || [ $d == "" ];
then
  for i in [ $f $fA ]
  do
  if [ -f $i-ffmpeg.cat ];
  then
    for a in $(cat $i-ffmpeg.cat);
    do
      if [ $a == "file" ];
      then continue
      else
	c="'"
	b=$(echo ${a/$c/})
	b=$(echo ${b/%$c/})
	if [ $b != $i ] || [ $x == "aac" ] || [ $x == "mp4" ];
	then
	  rm "$b" 2>&1
	fi
      fi
    done
  fi
  if [ -f $i-ffmpeg.cat ]; then rm $i-ffmpeg.cat 2>&1; fi
  done
  if [ -f $hs ]; then rm $hs 2>&1; fi
  if [ -f $hsA ]; then rm $hsA 2>&1; fi
fi
}


if [ $f"x" == "x" ] || [ $f == *"://"* ];
then
  usage
  echo "  ERROR: You must specify a target file."
  quit
else 
  if [ -f $f ] || [ -f $f".mp4" ] || [ -f $f".webm" ] || [ -f $f".aac" ];
  then
    echo ""
    echo "  WARNING: Target file \"$f[.mp4|.webm|.aac]\" exists! Will not overwrite."
    echo "  Move it or specify a different target file name."
    echo ""
    quit
  fi
fi


#url=$2
declare -a urls
urls[0]="$2"
urls[1]="$5"

for url in "${urls[@]}"
do

if [[ "$url" == "$2" && $url"x" == "x" ]] || [[ "https://"* == "$url" && "$url" != *"googlevideo.com/videoplayback"* ]]; 
then 
  usage
  echo "  ERROR: No valid YouTube stream base URL was given."
  quit
else
  if [[ "$url" == *"yt_live_broadcast"* ]];
    then live=1
  fi
  if [ "$url" == "$2" ]; then urlV=$url; fi
  if [ "$url" == "$5" ]; then urlA=$url; fi
  if [ "$url" == "$2" ];
  then
    if [[ "$url" == *"source=yt_otf"* || "$url" == *"source=yt_live_broadcast"* ]];
    then sq="&sq="
    else
      if [[ "$url" == *"source/yt_otf"* || "$url" == *"source/yt_live_broadcast"* ]];
      then sq="/sq/"
      else
	echo
	echo
	echo "  ERROR: This script can only be used on YouTube media URLs which point to"
	echo "         no contiguous media file -- the URL's \"source\" parameter can have the"
	echo "         value \"yt_otf\" or \"yt_live_broadcast\" (but NOT \"youtube\")."
        quit
      fi
    fi
  fi
fi;

done



echo ""



# backtick ``
# finding out current location
# https://stackoverflow.com/questions/59895/how-to-get-the-source-directory-of-a-bash-script-from-within-the-script-itself
SCRIPT="${BASH_SOURCE[0]}"
while [ -h "$SCRIPT" ]; do # resolve $Script until the file is no longer a symlink
  TARGET="$(readlink "$SCRIPT")"
  if [[ $TARGET == /* ]]; then
    echo "  Script '$SCRIPT' is an absolute symlink to '$TARGET'"
    SCRIPT="$TARGET"
  else
    DIR="$( dirname "$SCRIPT" )"
    echo "  Script '$SCRIPT' is a relative symlink to '$TARGET' (relative to '$DIR')"
    SCRIPT="$DIR/$TARGET" # if $SCRIPT was a relative symlink, we need to resolve it relative to the path where the symlink file was located
  fi
done
echo "  Script is '$SCRIPT'"
RDIR="$( dirname "$SCRIPT" )"
DIR="$( cd -P "$( dirname "$SCRIPT" )" >/dev/null 2>&1 && pwd )"  #"
if [ "$DIR" != "$RDIR" ]; then
  echo "  DIR '$RDIR' resolves to '$DIR'"
fi
echo "  DIR is '$DIR'"


echo ""
echo "  Target file is: '$f'"


# define the starting sequence number
start=$3

if [ $start"x" == "x" ]; then start=1; fi
if [ $((0+$start)) == 0 ]; then start=1; fi
re='^[0-9]+$'
if ! [[ $start =~ $re ]];
then
  echo "  The tird parameter must be the number of the seqence to start with."
  start=1
fi


# define the ending sequence number (maximum set to 999999)
end=$4
if [ $end"x" == "x" ]; then end=$max; fi
if [ $end -lt $start ]; then end=$((start+end)); fi
if ! [[ $end =~ $re ]];
then
  echo "  The fourth parameter must be the number of the seqence to end with."
  end=$max
fi


# secondary url
#urlA=$5
#if [ $dur"x" == "x" ]; then dur=5; fi
#if ! [[ $dur =~ $re ]] || [ $dur -lt 5 ];
#then
#  echo "  The fifth parameter must be the number of seconds (at least 5) defining each segment's duration."
#  dur=5
#fi

echo "  Start segment: "$start
echo "  End segment: "$end
#echo "  Segment duration: "$dur


msg

echo "  Starting download process now..."

# download the initial (zero) segment which is the media file header
# always needed
url=$urlV
if [ $live == 0 ]; then url=$url$sq"0"; fi

  (ulimit -S -f 2; wget "$url" -O $f"header") > /dev/null 2>&1
   ulimit -S -f unlimited

  wget $url -O $f$z > /dev/null 2>&1

if [ $? == 8 ]; then error=1; fi


if [ "$error" == "1" ];# || [ $live != 0 ];
then
  wget $urlV -O $f$z > /dev/null 2>&1
  if [ $? == 0 ];
  then
    if [ $live != 1 ];
    then live=1

    fi
  else
    if [ -f $f$z ] && ! [ -s $f$z ];
    then
      rm $f$z 2>&1
      echo
      echo "  ERROR: No media could be found at the given URL at this time."
      echo "         (It may be caused by a temporary (YT) network problem.)"
    fi
    quit
  fi
fi

#   else
      # the character to add manually (and look out for at the end) of the url
      y="_"
      # check the url's last character
      urlc=$((${#urlV}-1))
      if [ "${urlV:$urlc:1}" == $y ];
      then # if the extra character is not there
	   # try to get the "live" segments of a live broadcast
	live=2;
      else	# if
		# - the streaming has already stopped, or
		# - you want to download older segments of a live stream
		#     (Note: not all live streams will let you seek backward)
	# in both these cases the source must be treated as if it were yt_otf
	live=3;
      fi


# secondary stream
if ! [ -z ${urlA+x}  ];
then
  fA=$f"A"
  if [ $live == 0 ];
  then
    wget $urlA$sq"0" -O $fA$z > /dev/null 2>&1
  else
    wget $urlA -O $fA$z > /dev/null 2>&1
  fi

fi


if [[ -f $f"header" && -s $f"header" ]];
then header=$f"header"
else header=$fA$z
fi
if ! [ -f $header ]; then header=$f$z; fi
# read current segment number and duration from file header
if [ -f $header ];
then
  header=$(tr -d '\0' <$header)
  header=${header//[^A-Za-z0-9-]/}
  onum=$(echo ${header/#*First-Frame-Time-Us/})
  onum=$(echo ${onum/%First-Frame-Uncertainty-Us*/})
#  onum=$(echo ${header/#*Finalized-Sequence-Number/})
#  onum=$(echo ${onum/%Finalized-Media-End-Timestamp-Us*/})
  odur=$(echo ${header/#*Target-Duration-Us/})
  odur=$(echo ${odur/%First*/})
  header=""
  if [[ $onum =~ $re && "$start" -gt "$onum" ]]; # && [ $live == 2 ];
  then
    header="new"
    end=$(($end - $start))
    start=$((onum-1))
    #if [ $start -gt $end ];
    #then
      end=$((start + $end))
    #fi
  fi
  for (( i=0; i<${#odur}; i++ )); do
    if [ ${odur:$i:1} == 0 ];
    then break
    else ndur=$ndur${odur:$i:1}
    fi
  done
  dur=$((ndur))
  if [ $dur != 0 ];
  then
    echo "  Latest segment number: "$onum
    echo "  Segment duration: "$dur" sec"
  if [ "$header" == "new" ];
    then
    echo
    echo "  Start segment was out of reach, now set to: "$start
    echo "  End segment set to: "$end
  fi
    echo
  fi

fi

if [ -f $f"header" ]; then rm $f"header"; fi


# initiate the target file
if [ -z ${d+x} ];
then
  mv $f$z $f > /dev/null 2>&1
else
  cp $f$z $f > /dev/null 2>&1
fi


if [ -z ${d+x} ];
then
  mv $fA$z $fA > /dev/null 2>&1
else
  cp $fA$z $fA > /dev/null 2>&1
fi



# ffmpeg cannot handle a header-only file, so start with an empty file-list
echo > $f-ffmpeg.cat







# if wget is localized, we cannot extract data from its non-Egnlish response, 
# so set language to English temporarily
OLANG=$LANG
export LANG=en_US.UTF-8


if [ $live != 0 ]; then li=$i; fi


sdur=$dur
pl=0
vminus=0
vAminus=0

# get the file segments
for (( i=$start; i<$(expr $[end+1]); i++ ))
do

  if [ "$quit_loop" == "1" ]; then break; fi

  for url in "${urls[@]}"
  do

    if [ "$stalled" == "$m" ];
    then
      if [ "$stalled" == "video" ];
      then
	i=$((i-1)); unset stalled; break
	#url=$urlV
	#vAminus=$((vAminus+1))
      else
	#if [ "$stalled" == "video" ];
	#then
	  #url=$urlV
	  #vminus=$((vminus+1))
	#fi
	if [ "$stalled" == "audio" ];
	then
	  url=$urlA
	  vAminus=$((vAminus+1))
	fi
      fi
      unset stalled
    fi

    if [ ! -z ${stalled+x} ]; then stalled=$m; break; fi

#  for (( j=$i; j<$(expr $[$i+1]); j++ ))
#  do
    n=$i; #echo $n  $vminus $vAminus
    if [ "$url" == "$2" ];
    then
      if [ "$url" != "" ];
      then n=$((n - $vminus))
      else break; break;
      fi
    else
      if [[ "$url" == "$5" && "$url" != "" ]];
      then n=$((n - $vAminus))
      else break
      fi
    fi


    if [ $live != 2 ] || [ "$start" == "$onum" ];
    then
      #n=$i
      segment=$url$sq$i
    else
      #n=$i
      segment=$url
    fi


    l=0


    # spider-check if upcoming segment length is non-zero
    l=$(wget $segment --spider --server-response -O - 2>&1)
    if [ $? == 0 ]; 
    then

      # wget executed properly

      # read file container type
      if [ -z ${t+x} ];
      then
	if [[ $l == *"[video/"* ]];
	then
	  m="video"
	  if [ "$url" == "$2" ];
	  then
	    x=$(echo ${l/#*\[video\//})
	    x=$(echo ${x/%\]*/})
	  else
	    xA=$(echo ${l/#*\[video\//})
	    xA=$(echo ${xA/%\]*/})
	  fi
	else if [[ $l == *"[audio/"* ]];
	then
	  m="audio"
	  if [ "$url" == "$2" ];
	  then
	    x=$(echo ${l/#*\[audio\//})
	    x=$(echo ${x/%\]*/})
	  else
	    xA=$(echo ${l/#*\[audio\//})
	    xA=$(echo ${xA/%\]*/})
	  fi
	  if [ "$url" == "$2" ] && [ "$x" == "mp4" ]; then x="aac"; fi
	  if [ "$xA" == "mp4" ]; then xA="aac"; fi
	fi
	if [ -z ${acu+x} ] && ! [ -z ${xA+x} ];
	then
	  acu=1
	  echo 'xA="'$xA'"; if [ -f $fA ]; then ffmpeg -f concat -i $fA-ffmpeg.cat -err_detect ignore_err -c copy $fA.$xA -y 2>&1; i=$fA; cleanup; fi; pkill yt_streamcat' >> $hs
	fi
	fi
      fi



      # pass required variables and instructions to clean up to the helper script
      if [ "$url" == "$2" ] && [ -f $hs ] && [ $i == $start ];
      then echo 'x="'$x'"; d="'$d'"; z="'$z'"; f="'$f'"; fA="'$fA'"; live='$live'; ffrun-$f; cleanup' >> $hs
      fi


      # read file length
      if [[ $l == *"Length: "* ]];
      then
	l=$(echo ${l/#*Length: /})
	l=$(echo ${l/% */})
      else
	l=0
      fi

      # if length is zero, we may have passed the end of the stream (?)
      if [ $l == 0 ];
	then
	  break
      fi


    else #wget failed
      quit_loop=1
      break
    fi




      out=$f
      if [ "$url" == "$5" ]; then out=$fA; fi


      # download a segment && [ $live != 2 ] && [ "$x" != "mp4" ] && [ "$x" != "aac" ]  && [ "$xA" != "mp4" ] && [ "$xA" != "aac" ]
      if [ -z ${d+x} ] && [[ ( "$m" == "video" && "$x" == "webm" ) ]];# || ( $live != 2 ) ]];# || ( [ "$m" == "audio" ] && [ "xB" != "aac" ] ) ]] ;##[
      then
	wget $segment -q -O - >> $out
      else
        # prepare to save segments into separate files with formatted numbering
	num=$(printf $pf $n)

	wget $segment -q -O - | tee $out$num -a $out > /dev/null 2>&1

	# needed for ffmpeg processing
	# attach initial header to each segment
	# (as ffmpeg cannot handle headerless raw data)
	  #cat $f$z $f$num > $f$num"_" 2>&1
	  #mv $f$num"_" $f$num 2>&1
	  # echo file \'`echo $f$num`\' >> $f-ffmpeg.cat

	# result: ffmpeg would in the end produce 
	# playable but faulty media files this way

      fi

      if [ -z ${fl+x} ];
      then
	fl=$(ls -Ss1pq --block-size=1 $out)
	fl=$(echo ${fl/% */})
      fi



    # check if current segment file size is non-zero
    if [[ -s $out$num ]];
    then
	  if [ "$xB" != "webm" ];# "#[ $live == 2 ];# || [ $live == 3 ];#[ $live == "mp4" ] || 
	  then
	    if [[ ! ( $l =~ $re ) && ! ( "$url" == "$2" && "$x" == "webm" ) ]]; # if wget could not see data length
	    then		  # "ls" the file and read it from the output
	      l=$(ls -Ss1pq --block-size=1 $out$num)
	      l=$(echo ${l/% */})
	    fi
	    echo file \'`echo $out$num`\' >> $out-ffmpeg.cat
	  else
	    echo file \'`echo $out`\' > $out-ffmpeg.cat
	  fi

      if [ "$url" == "$2" ];
      then xB=$x;
      else
	if [ "$url" == "$5" ]; then xB=$xA; fi
      fi

      if [ ! -z ${d+x} ]; # display where the processing stands
      then echo "    $m/$xB file: $out$num / $end  length: $l"
      else echo "    $m/$xB segment $i / $end  length: $l"
      fi

      fl=$((fl + $l))
    else
      #if [ $i == $start ] || [ $i == $(expr $[start+1]) ];
      if [ $live -ge 2 ];
      then # zero length first segment, could not download this time
	if [[ $url == *"yt_live_broadcast"* ]];
	then
	  #dur=0
	  #l=0

	  if ! [[ "$url" == "$2" && "$x" == "webm" ]];
	  then
	    echo "    yt_live_broadcast stalled"
	    stalled=$m
	  else
	    unset stalled
	    if [ ! -z ${d+x} ];
	    then
	      echo "    $m/$x file: $out$num  length: $l"
	    else
	      l=$(ls -Ss1pq --block-size=1 $out)
	      l=$(echo ${l/% */})
	      if [ "$fl" -gt "$l" ];
	      then
		l=$((fl - $l))
	      else
		l=$((l - $fl))
	      fi
	      fl=$((fl + $l))
	      echo "    $m/$x segment $i / $end  length: $l"
	    fi
	  fi

	  if [ -f $out$num ] && [ $out$num != $f ];
	  then
	    rm $out$num 2>&1
	  fi
	  #if [ "$url" == "$2" ];
	  #then vminus=$((vminus+1))
	  #else
	    #if [ "$url" == "$5" ];
	    #then vAminus=$((vAminus+1));
	    #fi
	  #fi

	  if [ "$stalled" == "$m" ];
	  then
	    #i=$((i-1))
	    continue
	  fi

	else
	  cleanup
	  quit
	fi
      else
	break; # otherwise we are done
      fi
    fi

	if [ $x == "mp4" ] || [ $x == "aac" ];# [ $live == "mp4" ];
	then
	  if [[ $live == 2 ]];#"$stalled" == '1' || 
	  then
	    #if [ $pl -gt 0 ] && [ $pl == $l ];
	    #then
	      #i=$((i-1)); dur=1;fi;#continue
	    #else
	      # we need to wait at the end of the most current stream
	      if [ "$url" == "$2" ];
	      then 
		if [ "$dur" -gt "1" ];
		then
		  if [ "$dur" -gt "2" ];
		  then sleep $((dur))
		  else sleep 1
		  fi
		fi
		dur=$sdur
		# previous length = current length for the next cycle
		pl=$l
	      else sleep 0
	      fi
	    #fi
	  fi
	fi
#  done

  done

done


# write back the original language setting
export LANG=$OLANG


# attach the contiguous raw stream to the header
# this may be a playable file already
#if [ ! -z ${d+x} ];
#then
#  cat $f >> $f$z"_" 2>&1
#  mv $f$z"_" $f 2>&1
#else
#  cat $f >> $f$z 2>&1
#  mv $f$z $f 2>&1
#fi


# prepare the assembled file for ffmpeg
if [ $live != 2 ] && [ $x != "aac" ] && [ $x != "mp4" ];#[ $live != "mp4" ] && 
then
  echo file \'`echo $f`\' > $f-ffmpeg.cat
else
  if [ -s $f$z ] && ! [ -s $f$(printf $pf $start) ];
  then
    mv $f$z $f$(printf $pf $start)
  fi
fi


function ffrun {

  ffmpeg -f concat -i $f-ffmpeg.cat -err_detect ignore_err -c copy $f.$x -y 2>&1
  if [ -f $fA ] && [ -f $fA-ffmpeg.cat ];
  then ffmpeg -f concat -i $fA-ffmpeg.cat -err_detect ignore_err -c copy $fA.$xA -y 2>&1
  fi
  # command line for live streaming (does not seem to work)
  # ffmpeg -i $manifest_file -c copy $f.ts

}

# try to run ffmpeg
# (only really needed for live streams)
ffrun
if [ $? == 0 ]; then cleanup; fi
