forked from arthur-debert/google-code-issues-migrator
-
Notifications
You must be signed in to change notification settings - Fork 3
/
migrateissues.py
200 lines (174 loc) · 8.16 KB
/
migrateissues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import csv
import logging
import datetime
import re
from StringIO import StringIO
import httplib2
from github2.client import Github
from BeautifulSoup import BeautifulSoup
options = None
logging.basicConfig(level=logging.DEBUG)
g_statusre = \
'^(' + \
'Issue has not had initial review yet' + '|' + \
'Problem reproduced \/ Need acknowledged' + '|' + \
'Work on this issue has begun' + '|' + \
'Waiting on feedback or additional information' + '|' + \
'Developer made source code changes, QA should verify' + '|' + \
'QA has verified that the fix worked' + '|' + \
'This was not a valid issue report' + '|' + \
'Unable to reproduce the issue' + '|' + \
'This report duplicates an existing issue' + '|' + \
'We decided to not take action on this issue' + '|' + \
'The requested non-coding task was completed' + \
')$'
def get_url_content(url):
h = httplib2.Http(".cache")
resp, content = h.request(url, "GET")
return content
class IssueComment(object):
def __init__(self, date, author, body):
self.created_at = date
self.body_raw = body
self.author = author
self.user = options.github_user_name
@property
def body (self):
return ("%s - %s \n%s" % (self.author, self.created_at, self.body_raw)).encode('utf-8')
def __repr__(self):
return self.body.encode('utf-8')
class Issue(object):
def __init__(self, issue_line):
for k,v in issue_line.items():
setattr(self, k.lower(), v)
logging.info("Issue #%s: %s" % (self.id, self.summary))
self.get_original_data()
def parse_date(self, node):
datenode = node.find(attrs={'class' : 'date'})
datestring = datenode['title']
try:
return datetime.datetime.strptime(datestring, '%a %b %d %H:%M:%S %Y')
except ValueError: # if can't parse time, just assume now
return datetime.datetime.now
def get_user(self, node):
authornode = node.find(attrs={'class' : 'author'})
userhrefnode = authornode.find(attrs={'href' : re.compile('^\/u\/')})
return userhrefnode.string
def get_body(self,node):
comment = unicode(node.find('pre').renderContents(), 'utf-8', 'replace')
return comment
def get_labels(self, soup):
self.labels = []
self.milestones = [] # Milestones are a form of label in googlecode
for node in soup.findAll(attrs = { 'class' : 'label' }):
label = unicode(re.sub('<\/?b>', '', node.renderContents()))
if re.match('^Milestone-', label):
self.milestones.append(re.sub('^Milestone-', '', label))
else:
self.labels.append(label)
return
def get_status(self, soup):
node = soup.find(name = 'span', attrs = { 'title' : re.compile(g_statusre) })
self.status = unicode(node.string)
self.labels.append("Status-%s" % self.status)
return
def get_original_data(self):
logging.info("GET %s" % self.original_url)
content = get_url_content(self.original_url)
soup = BeautifulSoup(content)
descriptionnode = soup.find(attrs={'class' : "cursor_off vt issuedescription"})
descriptionstring = unicode(descriptionnode.find('pre').renderContents(), 'utf-8', 'replace')
self.body = unicode("%s<br />Original link: %s" % (descriptionstring , self.original_url))
datenode = descriptionnode.find(attrs={'class' : 'date'})
datestring = datenode['title']
try:
self.created_at = datetime.datetime.strptime(datestring, '%a %b %d %H:%M:%S %Y')
except ValueError: # if can't parse time, just assume now
self.created_at = datetime.datetime.now
comments = []
for node in soup.findAll(attrs={'class' : "cursor_off vt issuecomment"}):
try:
date = self.parse_date(node)
author = self.get_user(node)
body = self.get_body(node)
if not re.match('^\\n<i>\(No comment was entered for this change\.\)<\/i>\\n$', body):
comments.append(IssueComment(date, author, body))
except:
pass
self.comments = comments
logging.info('got comments %s' % len(comments))
self.get_labels(soup)
logging.info('got labels %s' % len(self.labels))
logging.info('got milestones %s' % len(self.milestones))
self.get_status(soup)
@property
def original_url(self):
gcode_base_url = "http://code.google.com/p/%s/" % options.google_project_name
return "%sissues/detail?id=%s" % (gcode_base_url, self.id)
def __repr__(self):
return u"%s - %s " % (self.id, self.summary)
def download_issues():
url = "http://code.google.com/p/" + options.google_project_name + "/issues/csv?can=1&q=&colspec=ID%20Type%20Status%20Priority%20Milestone%20Owner%20Summary"
logging.info('Downloading %s' % url)
content = get_url_content(url)
f = StringIO(content)
return f
def post_to_github(issue, sync_comments=True):
logging.info('should post %s', issue)
github = Github(username=options.github_user_name, api_token=options.github_api_token, requests_per_second=1)
if issue.status.lower() in "invalid closed fixed wontfix verified worksforme duplicate done".lower():
issue.status = 'closed'
else:
issue.status = 'open'
try:
git_issue = github.issues.show(options.github_project, int(issue.id))
logging.warn( "skipping issue : %s" % (issue))
except RuntimeError:
title = "%s" % issue.summary
logging.info('will post issue:%s' % issue)
logging.info("issue did not exist")
git_issue = github.issues.open(options.github_project,
title = title,
body = issue.body
)
if issue.status == 'closed':
github.issues.close(options.github_project, git_issue.number)
if sync_comments is False:
return git_issue
old_comments = github.issues.comments(options.github_project, git_issue.number)
for i,comment in enumerate(issue.comments):
exists = False
for old_c in old_comments:
# issue status changes have empty bodies in google code , exclude those:
if bool(old_c.body) or old_c.body == comment.body :
exists = True
logging.info("Found comment there, skipping")
break
if not exists:
#logging.info('posting comment %s', comment.body.encode('utf-8'))
try:
github.issues.comment(options.github_project, git_issue.number, comment)
except:
logging.exception("Failed to post comment %s for issue %s" % (i, issue))
return git_issue
def process_issues(issues_csv, sync_comments=True):
reader = csv.DictReader(issues_csv)
issues = [Issue(issue_line) for issue_line in reader]
[post_to_github(i, sync_comments) for i in issues]
if __name__ == "__main__":
import optparse
import sys
usage = "usage: %prog [options]"
parser = optparse.OptionParser(usage)
parser.add_option('-g', '--google-project-name', action="store", dest="google_project_name", help="The project name (from the URL) from google code.")
parser.add_option('-t', '--github-api-token', action="store", dest="github_api_token", help="Yout Github api token")
parser.add_option('-u', '--github-user-name', action="store", dest="github_user_name", help="The Github username")
parser.add_option('-p', '--github-project', action="store", dest="github_project", help="The Github project name:: user-name/project-name")
global options
options, args = parser.parse_args(args=sys.argv, values=None)
try:
issues_data = download_issues()
process_issues(issues_data)
except:
parser.print_help()
raise