-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBigQuery-Syntex.sql
121 lines (113 loc) · 4.81 KB
/
BigQuery-Syntex.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
T1 - [githubarchive:month.201706]
T2 - [github_repos.languages]
T3 - [github_repos.commits]
Time length is 2 weeks from 2017-06-11 to 2017-06-25
To study the collaoration patterns in GitHub, 17 programming languages were selected:
C, C#, C++, Clojure, Go, Haskell, Java, Javascript, Julia, OCaml,
PHP, Python, R, Ruby, Rust, Scala, Scheme.
--------------------------------------------------------------------------------------------------
1 - 17 languages, all events
-------------------------------------------------------------------------------------------------------
SELECT T2.language.name as Language, Count (distinct T1.repo.name) as repo_num, count (distinct T3.author.name) as Developer, count (distinct T3.commit) as Commits
FROM (SELECT * FROM [githubarchive:month.201706]
WHERE created_at>='2017-06-11 00:00'
and created_at<='2017-06-25 00:00') T1
join [[bigquery-public-data:github_repos.commits] T3
ON T3.committer.date = T1.created_at
JOIN (SELECT * FROM [bigquery-public-data:github_repos.languages]
WHERE language.name ='C'
or language.name = 'C#'
or language.name = 'C++'
or language.name = 'Clojure'
or language.name = 'Go'
or language.name = 'Haskell'
or language.name = 'Java'
or language.name = 'JavaScript'
or language.name = 'Julia'
or language.name = 'OCaml'
or language.name = 'PHP'
or language.name = 'Python'
or language.name = 'R'
or language.name = 'Ruby'
or language.name = 'Rust'
or language.name = 'Scala'
or language.name = 'Scheme') T2
ON T2.repo_name = T1.repo.name
WHERE T2.language.name is not null
GROUP BY 1
Order by 2 desc
Limit 20
-----------------------------------------------------------------------------
2 - 17 languages, 7 events
-------------------------------------------------------------------------------
SELECT T2.language.name as Language, Count (distinct T1.repo.url) as repo_num, count (distinct T3.author.name) as Developer, count (distinct T3.commit) as Commits
FROM (
SELECT * FROM [githubarchive:month.201706]
WHERE created_at>='2017-06-11 00:00' and created_at<='2017-06-25 00:00'
and type ='CommitCommentEvent'
OR type = 'PushEvent'
OR type = 'IssueCommentEvent'
OR type = 'PullRequestEvent'
OR type = 'PullRequestReviewCommentEvent'
OR type = 'IssuesEvent'
or type = 'createEvent') T1
join [[bigquery-public-data:github_repos.commits] T3
ON T3.committer.date = T1.created_at
JOIN (SELECT * FROM [bigquery-public-data:github_repos.languages]
WHERE language.name ='C'
or language.name = 'C#'
or language.name = 'C++'
or language.name = 'Clojure'
or language.name = 'Go'
or language.name = 'Haskell'
or language.name = 'Java'
or language.name = 'JavaScript'
or language.name = 'Julia'
or language.name = 'OCaml'
or language.name = 'PHP'
or language.name = 'Python'
or language.name = 'R'
or language.name = 'Ruby'
or language.name = 'Rust'
or language.name = 'Scala'
or language.name = 'Scheme') T2
ON T2.repo_name = T1.repo.name
WHERE T2.language.name is not null
GROUP BY 1
Order by 2 desc
Limit 20
--------------------------------------------------------------
3 - all languages, 7 events
-------------------------------------------------------------------
SELECT T2.language.name as Language, Count (distinct T1.repo.name) as repo_num, count (distinct T3.author.name) as Developer, count (distinct T3.commit) as Commits
FROM (
SELECT * FROM [githubarchive:month.201706]
WHERE created_at>='2017-06-11 00:00' and created_at<='2017-06-25 00:00'
and type ='CommitCommentEvent'
OR type = 'PushEvent'
OR type = 'IssueCommentEvent'
OR type = 'PullRequestEvent'
OR type = 'PullRequestReviewCommentEvent'
OR type = 'IssuesEvent'
or type = 'createEvent') T1
join [[bigquery-public-data:github_repos.commits] T3
ON T3.committer.date = T1.created_at
JOIN [bigquery-public-data:github_repos.languages] T2
ON T2.repo_name = T1.repo.name
WHERE T2.language.name is not null
GROUP BY 1
Order by 2 desc
------------------------------------------------------------------
4 - all languages, all events
-----------------------------------------------------------------
SELECT T2.language.name as Language, Count (distinct T1.repo.name) as repo_num, count (distinct T3.author.name) as Developer, count (distinct T3.commit) as Commits
FROM (SELECT * FROM [githubarchive:month.201706]
WHERE created_at>='2017-06-11 00:00'
and created_at<='2017-06-25 00:00') T1
join [[bigquery-public-data:github_repos.commits] T3
ON T3.committer.date = T1.created_at
JOIN [bigquery-public-data:github_repos.languages] T2
ON T2.repo_name = T1.repo.name
WHERE T2.language.name is not null
GROUP BY 1
Order by 2 desc