package ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
/**
* NetEase Loan Crawl Manager
* @author tsj-pc
*
*/
public class WangYiDaiCrawlManager {
public static HttpClientCrawlerImpl httpClientCrawlerImpl = new HttpClientCrawlerImpl();
public static String[] column_key = { "platName", "locationAreaName",
"locationCityName", "platUrl" };
private static CrawlResultPojo crawlOnePage(UrlPojo urlPojo) {
CrawlResultPojo resultPojo = httpClientCrawlerImpl.crawl4Post(urlPojo);
return resultPojo;
}
public static int item_count = 0;
public static String parserOnePage(String jsonStr) {
// parse the json
JSONObject jsonObj = (jsonStr);
JSONArray jsonArray = (("list")
.toString());
StringBuilder stringBuilder = new StringBuilder();
for (Object json : jsonArray) {
JSONObject itemJson = (JSONObject) json;
for (String column : column_key) {
((column) + "\t");
}
("\n");
item_count++;
}
return ();
}
public static void processWangYiDai(String url, int max_page_number,
String filePath) {
//Storage all crawling entries
StringBuilder all_items = new StringBuilder();
UrlPojo urlPojo = new UrlPojo(url);
Map<String, Object> parasMap = new HashMap<String, Object>();
int have_download_page_count = 0;
Set<String> uniqSet = new HashSet<String>();
for (int pageNumber = 1; pageNumber <= max_page_number; pageNumber++) {
("currPage", pageNumber);
("params", "");
("sort", 0);
(parasMap);
CrawlResultPojo resultPojo = crawlOnePage(urlPojo);
if ((())) {
("If you encounter duplication, it means that the crawling has been completed!");
break;
} else {
(());
}
if (resultPojo != null) {
String content = ();
String page_items = parserOnePage(content);
all_items.append(page_items);
have_download_page_count++;
}
}
("all items size---" + item_count);
("Already downloaded ---" + have_download_page_count);
(filePath, all_items.toString(), "utf-8");
("save successfully~");
}
public static void main(String[] args) {
String url = "/front_select-plat";
int max_page_number = 100;
String fileName = "NetEase Loan_Dataset.txt";
processWangYiDai(url, max_page_number, fileName);
("done!");
}
}