-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGPlayCrawler.java
95 lines (87 loc) · 3.45 KB
/
GPlayCrawler.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package com.ghq.crawler.process;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.firebase.client.Firebase;
import com.firebase.client.Logger;
public class GPlayCrawler {
static List<Map> games = new ArrayList<Map>();
static List<String> descriptions = new ArrayList<String>();
public static void crawl(String link){
Document doc = null;
try {
doc = Jsoup.connect(link).timeout(10*1000).get();
} catch (IOException e) {
e.printStackTrace();
}
Elements cards = doc.getElementsByClass("card-content");
for (Element card : cards){
Element details = card.select("div.details").get(0);
String url = "https://play.google.com"+details.select("a.title").attr("href");
String title = details.select("a.title").attr("title");
String rank = details.select("h2").text().split(". ")[0];
String dev = details.select("a.subtitle").text();
String devUrl = "https://play.google.com"+details.select("a.subtitle").attr("href");
String price = details.select("span.display-price").text();
String description = details.select("div.description").text();
Element rating = card.select("div.reason-set").get(0);
String ratingString = rating.select("div.tiny-star").attr("aria-label");
String ratingValue = rating.select("div.current-rating").attr("style").split(": ")[1].replace(";", "");
Element image = card.select("img.cover-image").get(0);
String imageLarge = image.attr("data-cover-large");
String imageSmall = image.attr("data-cover-small");
System.out.println("URL : "+url);
System.out.println("Name : "+title);
System.out.println("Rank : "+rank);
System.out.println("Developer : "+dev+", "+devUrl);
System.out.println("Price : "+price);
System.out.println(description);
System.out.println("Rating : "+ratingValue+ratingString);
System.out.println("Image : "+imageSmall);
System.out.println();
descriptions.add(description);
games.add(toMaps(title,url,imageLarge,imageSmall,rank,dev,devUrl,price,ratingString,ratingValue));
}
}
private static Map toMaps(String title, String url, String imageLarge,
String imageSmall, String rank, String dev, String devUrl,
String price, String ratingString,
String ratingValue) {
HashMap<String,String> map = new HashMap<String, String>();
map.put("title", title);
map.put("url", url);
map.put("imageLarge", imageLarge);
map.put("imageSmall", imageSmall);
map.put("rank", rank);
map.put("dev", dev);
map.put("devUrl", devUrl);
map.put("price", price);
map.put("ratingString", ratingString);
map.put("ratingValue", ratingValue);
return map;
}
public static void main(String[] args) {
crawl("https://play.google.com/store/apps/category/GAME/collection/topselling_free");
crawl("https://play.google.com/store/apps/category/GAME/collection/topselling_paid");
Firebase.getDefaultConfig().setLogLevel(Logger.Level.DEBUG);
Firebase fb = new Firebase("https://sweltering-inferno-7806.firebaseio.com/");
System.out.println(games);
// Writing denormalized data
fb.child("games").setValue(games);
fb.child("descriptions").setValue(descriptions);
// Keep the thread alive until the operation completes.
while(true) {
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
e.printStackTrace();
} //Sleep 10 seconds
}
}
}