|
近日一同事来访,谈到他的一个消遣就是去交大BBS的鹊桥版上去欣赏美女照片。我说既然如此,何不编一个脚本,把鹊桥版上最近若干帖子的照片集中到一个网页上去看。并且在每张照片附上出处超链接,以便查看详细信息。
于是有了下面的一个脚本,输出是index.htm
-------------------------------
#!/bin/sh
if [ $1 ]
then
myloops=$1
else
myloops=3
fi
myurl='http://bbs.sjtu.edu.cn/bbstdoc,board,LoveBridge.html'
true > myaddress.txt
while [ $myloops -ge 1 ]
do
wget -q -O mytmp.htm $myurl
sed -n '/<tr><td>/{s/<tr><td>/\
<tr><td>/g;p;}' mytmp.htm | sed -n -e 's/^.*<a href=bbstcon/bbstcon/' -e 's/\.html>.*$/.html/' -e 's/^/http:\/\/bbs.sjtu.edu.cn\//' -e '2,$p' | sed -e 's/\.html>/\.html\
/' | sed -n '/\.html/p' >> myaddress.txt
myurl=`sed -n '/上一页/{s/>上一页.*$//; s/^<a href=//; s/^/http:\/\/bbs\.sjtu\.edu\.cn\//; p; q; }' mytmp.htm`
myloops=`expr $myloops - 1`
done
mycounts=`wc -l myaddress.txt | awk '{print $1;}'`
myindex=1
true > mydata.txt
while [ $myindex -le $mycounts ]
do
myurl=`sed -n "${myindex}p" myaddress.txt`
wget -q -O mytmp.htm $myurl
sed -n '/<IMG SRC=\"http:\/\/bbs\.sjtu\.edu\.cn\/file\/LoveBridge\/[0-9]*\.[Jj][Pp][Gg]\" ALT=/{s/<IMG SRC=\"//; s/\" ALT=.*$// p;}' mytmp.htm > mypictmp.txt
mypiccounts=`wc -l mypictmp.txt | awk '{print $1;}'`
if [ $mypiccounts -eq 0 ]
then
myindex=`expr $myindex + 1`
continue
fi
cat mypictmp.txt| awk "{ printf(\"%s \",\$0); printf(\"${myurl}\\n\");}" >> mydatatmp.txt
myindex=`expr $myindex + 1`
done
sort -k 1,1 -u mydatatmp.txt > mydata.txt
rm mydatatmp.txt
awk 'BEGIN{\
print "<html>";\
print "<body>";\
}\
{\
printf("<a href=%s> ",$2);\
printf("<IMG SRC=\"%s\" ></a> <br>\n",$1);\
}\
END{\
print "</body>";\
print "</html>";\
}' mydata.txt > index.htm |
|