2012-10-27 08:39:27Morris
[JAVA] 備份此部落格文章的代碼
demo 影片
備份此新聞台的 JAVA 代碼,當然你可以直接使用網路現有的網站打包軟體,但我也嘗試寫寫如何備份自己的文章,我後來使用 JAVA 的功能去完成。
不過這個有幾個缺點,雖然我是屬於登入的狀態,但是 JAVA 去做連線的時候,上鎖或者是限定好友的文章是無法被備份的,然後被備份的新聞台人氣會暴升。
我是將多餘的頁面訊息砍掉,只保留文章的內文(html格式)而已,並不確定是否與網路現有的軟體是否相同。
總之自己寫寫也蠻不錯玩的。
import java.net.*;
import java.io.*;
import java.util.Scanner;
public class ReadWebPage {
public static void main(String[] arg) throws Exception {
for(int i = 0; i <= 70; i++) {
if(i == 0)
readPage("http://mypaper.pchome.com.tw/zerojudge");
else
readPage("http://mypaper.pchome.com.tw/zerojudge/P" + i);
}
}
public static void readPage(String strURL) {
int chunksize = 4096;
byte[] chunk = new byte[chunksize];
int count;
try {
URL pageUrl = new URL(strURL);
// 讀入網頁(位元串流)
BufferedInputStream bis = new BufferedInputStream(
pageUrl.openStream());
BufferedOutputStream bos = new BufferedOutputStream(
new FileOutputStream("URLpage.html", false));
System.out.println("running page");
while ((count = bis.read(chunk, 0, chunksize)) != -1) {
bos.write(chunk, 0, count); // 寫入檔案
}
bos.close();
bis.close();
Scanner fin = new Scanner(new FileInputStream("URLpage.html"), "utf-8");
PrintWriter fout;
String line, fileTitleName = "";
boolean flag = false;
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("class=\"blog\"") != -1) {
flag = true;
break;
}
}
if(flag != true) return;
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("<div id=\"page\">") != -1) {
break;
}
if(line.indexOf("/zerojudge/post") != -1) {
int pos = line.indexOf("/zerojudge/post");
int npos = line.indexOf("\"", pos+1);
if(Character.isDigit(line.charAt(npos-1))) {
System.out.println("http://mypaper.pchome.com.tw" + line.substring(pos, npos));
read("http://mypaper.pchome.com.tw" + line.substring(pos, npos));
}
}
}
System.out.println("page Done");
} catch (IOException e) {
e.printStackTrace();
}
}
public static void read(String strURL) {
int chunksize = 4096;
byte[] chunk = new byte[chunksize];
int count;
try {
URL pageUrl = new URL(strURL);
BufferedInputStream bis = new BufferedInputStream(
pageUrl.openStream());
BufferedOutputStream bos = new BufferedOutputStream(
new FileOutputStream("URLtmp.html", false));
System.out.println("running article");
while ((count = bis.read(chunk, 0, chunksize)) != -1) {
bos.write(chunk, 0, count); // 寫入檔案
}
bos.close();
bis.close();
Scanner fin = new Scanner(new FileInputStream("URLtmp.html"), "utf-8");
String line, fileTitleName = "";
boolean flag = false;
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("name=\"keywords\"") != -1) {
int pos = line.indexOf("content=");
fileTitleName = line.substring(pos+9, line.length()-2);
flag = true;
break;
}
}
if(flag != true) return;
BufferedOutputStream fout = new BufferedOutputStream(
new FileOutputStream(fileTitleName +".html", false));
//PrintWriter fout = new PrintWriter(new FileOutputStream(fileTitleName+".html"));
while(fin.hasNext()) {
chunk = fin.nextLine().getBytes();
line = new String(chunk, "utf-8");
if(line.indexOf("innertext brk_h") != -1) {
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("ArticleMapTitle") != -1)
break;
//fout.println(line);
chunk = line.getBytes();
fout.write(chunk, 0, chunk.length);
}
break;
}
}
fout.close();
System.out.println(fileTitleName + " Done");
} catch (IOException e) {
e.printStackTrace();
}
}
}
備份此新聞台的 JAVA 代碼,當然你可以直接使用網路現有的網站打包軟體,但我也嘗試寫寫如何備份自己的文章,我後來使用 JAVA 的功能去完成。
不過這個有幾個缺點,雖然我是屬於登入的狀態,但是 JAVA 去做連線的時候,上鎖或者是限定好友的文章是無法被備份的,然後被備份的新聞台人氣會暴升。
我是將多餘的頁面訊息砍掉,只保留文章的內文(html格式)而已,並不確定是否與網路現有的軟體是否相同。
總之自己寫寫也蠻不錯玩的。
import java.net.*;
import java.io.*;
import java.util.Scanner;
public class ReadWebPage {
public static void main(String[] arg) throws Exception {
for(int i = 0; i <= 70; i++) {
if(i == 0)
readPage("http://mypaper.pchome.com.tw/zerojudge");
else
readPage("http://mypaper.pchome.com.tw/zerojudge/P" + i);
}
}
public static void readPage(String strURL) {
int chunksize = 4096;
byte[] chunk = new byte[chunksize];
int count;
try {
URL pageUrl = new URL(strURL);
// 讀入網頁(位元串流)
BufferedInputStream bis = new BufferedInputStream(
pageUrl.openStream());
BufferedOutputStream bos = new BufferedOutputStream(
new FileOutputStream("URLpage.html", false));
System.out.println("running page");
while ((count = bis.read(chunk, 0, chunksize)) != -1) {
bos.write(chunk, 0, count); // 寫入檔案
}
bos.close();
bis.close();
Scanner fin = new Scanner(new FileInputStream("URLpage.html"), "utf-8");
PrintWriter fout;
String line, fileTitleName = "";
boolean flag = false;
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("class=\"blog\"") != -1) {
flag = true;
break;
}
}
if(flag != true) return;
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("<div id=\"page\">") != -1) {
break;
}
if(line.indexOf("/zerojudge/post") != -1) {
int pos = line.indexOf("/zerojudge/post");
int npos = line.indexOf("\"", pos+1);
if(Character.isDigit(line.charAt(npos-1))) {
System.out.println("http://mypaper.pchome.com.tw" + line.substring(pos, npos));
read("http://mypaper.pchome.com.tw" + line.substring(pos, npos));
}
}
}
System.out.println("page Done");
} catch (IOException e) {
e.printStackTrace();
}
}
public static void read(String strURL) {
int chunksize = 4096;
byte[] chunk = new byte[chunksize];
int count;
try {
URL pageUrl = new URL(strURL);
BufferedInputStream bis = new BufferedInputStream(
pageUrl.openStream());
BufferedOutputStream bos = new BufferedOutputStream(
new FileOutputStream("URLtmp.html", false));
System.out.println("running article");
while ((count = bis.read(chunk, 0, chunksize)) != -1) {
bos.write(chunk, 0, count); // 寫入檔案
}
bos.close();
bis.close();
Scanner fin = new Scanner(new FileInputStream("URLtmp.html"), "utf-8");
String line, fileTitleName = "";
boolean flag = false;
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("name=\"keywords\"") != -1) {
int pos = line.indexOf("content=");
fileTitleName = line.substring(pos+9, line.length()-2);
flag = true;
break;
}
}
if(flag != true) return;
BufferedOutputStream fout = new BufferedOutputStream(
new FileOutputStream(fileTitleName +".html", false));
//PrintWriter fout = new PrintWriter(new FileOutputStream(fileTitleName+".html"));
while(fin.hasNext()) {
chunk = fin.nextLine().getBytes();
line = new String(chunk, "utf-8");
if(line.indexOf("innertext brk_h") != -1) {
while(fin.hasNext()) {
line = fin.nextLine();
if(line.indexOf("ArticleMapTitle") != -1)
break;
//fout.println(line);
chunk = line.getBytes();
fout.write(chunk, 0, chunk.length);
}
break;
}
}
fout.close();
System.out.println(fileTitleName + " Done");
} catch (IOException e) {
e.printStackTrace();
}
}
}
上一篇:[記錄] 打包 JAR 檔案
下一篇:[JAVA][模仿失敗作] 隨機