Cào Đọc báo trực tuyến

Trông ngứa mắt, nghịch tí chơi. Tranh thủ ôn lại tí bài cào bới… 😛

#! /bin/sh
imgpage='http://docbao.dec.vn/show_image.aspx?pid='

echo 'New directory name:'
read newdir
if [ ! -d $newdir ]
then
	mkdir $newdir
else
	echo "'$newdir' already existed. Please try another name.";
	exit
fi

cd $newdir

wget $1 -O source.html

egrep 'selected="selected" value=".*">1<' source.html |\\
	sed -r 's#^.* selected="selected" value="([a-f0-9]+)">1</option>#\\1_1#g'> index.txt

length=`wc -l index.txt`
if [ "$length" == '0 index.txt' ]
then
	echo 'There is something wrong. Please try again.';
	rm -rf $newdir;
	exit
fi

egrep "<option value=\\"[a-f0-9]+\\">[0-9]" source.html |\\
	sed -r 's#^.*value="(.*)">(.*)</option>#\\1_\\2#g' >> index.txt

sed -r 's/\\x0D//g' -i index.txt

rm -f source.html

for i in `less index.txt`;
	do pid=`echo $i | sed -r 's/^(.*)_.*$/\\1/'`;
	page=`echo $i | sed -r 's/^.*_(.*)$/\\1/'`;
	wget $imgpage$pid\\&type=d -O $page.jpg;
done

echo -e "Do you want to make an HTML index file?\\nYes (y) - No (n)"
read mkindex

if [ "$mkindex" == 'y' ]
then
	sed -r 's#^.*_(.*)$#<li><a href="\\1.jpg">Trang \\1</a></li>#g' index.txt > index.html
fi

echo 'Done!'