#!/bin/bash

baseurl=http://www.rocklinux.net/typo3
PATH="$PATH:/usr/local/bin"

rm -rf exported
mkdir exported
cd exported

echo -n > urlsubst.sed
echo init index.html >  todo.txt
echo init error.html >> todo.txt

while [ -s todo.txt ]
do
	echo -n > todo.new
	while read y x
	do
		echo -n "Fetching [$y] $x ... "
		[ "${x%/*}" != "$x" ] && mkdir -p "${x%/*}"
		if [ -z "${x##*\?*&type=333&*}" ]; then
			echo -en "rewrite.\\nFetching [$y] news.rdf ... "
			if [ -e "news.rdf" ]; then
				echo "dup."
				continue
			fi
			echo "s,$x,news.rdf,g" >> urlsubst.sed
			curl -s -D http.txt -o "news.rdf" "$baseurl/$x"
			x="news.rdf"
		elif [ -z "${x##*\?*}" ]; then
			url="$baseurl/$x"
			md5="$( echo "$x" | md5sum | cut -f1 -d' ' ).html"
			echo -en "rewrite.\\nFetching [$y] $md5 ... "
			if [ -e "$md5" ]; then
				echo "dup."
				continue
			fi
			echo "s,$x,$md5,g" >> urlsubst.sed
			curl -s -D http.txt -o "$md5" "$baseurl/$x"
			x="$md5"
		elif [ -e "$x" ]; then
			echo "dup."
			continue
		elif [ -f "../rocklinux.org/$x" ]; then
			ln "../rocklinux.org/$x" "$x"
			echo "Linked" > http.txt
		else
			curl -s -D http.txt -o "$x" "$baseurl/$x"
		fi
		if grep -iq '^HTTP/[^ ]* 404' http.txt
		then
			rm -f "$x" 2> /dev/null
			rmdir -p "${x%/*}" 2> /dev/null
			echo "not found."
		elif grep -iq '^location: ' http.txt
		then
			loc="$( grep -i '^location: ' http.txt | \
			        cut -f2- -d' ' | tr -d '\r' )"
			cat > "$x" << EOT
<html><head>
<meta http-equiv="refresh" content="0; URL=$loc">
</head><body>
Click <a href="$loc">here</a> if you are not redirected automatically.
</body></html>
EOT
			echo "redirected to $loc."
		elif [ -s "$x" ]
		then
			if [ -z "${x%%*.html}" ]
			then
				subs=0
				while read fn
				do
					[ -z "${fn##*://*}" ] && continue
					[ -z "${fn##mailto:*}" ] && continue
					if ! [ -e "$fn" ]; then
						echo "$x $fn" >> todo.new
						(( subs++ ))
					fi
				done < <(
					tr ' \t<>' '\n\n\n\n' < $x | \
					egrep -i '^(src|href|lowsrc|background)=' | tr -d '"' | \
					cut -f2- -d= | cut -f1 -d'#' | sort -u | \
					egrep -v '^[^\?]*[\./]\.' | grep '^[^\./&]'
				)
				echo "exported (found $subs new items)."
			else
				echo "exported."
			fi
		fi
	done < todo.txt
	sort -u < todo.new > todo.txt
done | tee export.log

cat >> urlsubst.sed << EOT
s,openPic('showpic.php,openPic('$baseurl/showpic.php,g
EOT

for x in *.html; do
	sed -f urlsubst.sed < $x > $x.new
	mv $x.new $x
done

cat > .htaccess << EOT
ErrorDocument 404 /error.html
EOT

echo "<base href=\"/\">" > error.new
cat error.html >> error.new
mv error.new error.html

rm -f todo.txt todo.new http.txt urlsubst.sed

cd ../..
mv rocklinux.org rocklinux.old
mv typo3/exported rocklinux.org
rm -rf rocklinux.old

