-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfigures.py
117 lines (104 loc) · 3.76 KB
/
figures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2012, Leonardo Leite
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import re
import sys
import urllib2
import urllib
import os
import random
from BeautifulSoup import BeautifulSoup
BASE_URL = 'http://%s.wikipedia.org/wiki/%s'
ROCK_FOLDER = 'resources/rock/'
LANGUAGE_PT = 'pt'
LANGUAGE_EN = 'en'
import glob
def get_image(word, wiki=False):
"""Acha uma imagem para uma palavra
word: palavra em português
wiki: se wiki=True e imagem não existe nas pasta img, então baixa figura da wikipedia
return caminho para a imagem; None se palavra não tem imagem (ou se não achar na wikipedia)
"""
folder = "resources/img/"
word = word.replace(" ", "_")
list = glob.glob("%s%s.*" % (folder, word))
if list:
return list[0]
else:
if wiki:
try:
img_url = from_wikipedia(word)
except:
return None
if img_url:
img_url = unicode(img_url)
path = folder + word + _ext(img_url)
download(img_url, path)
return path
return None
def _ext(path):
"""Returns the extension of the file (including the dot)
path: it can be local or an URL, it doesn't matter
"""
regex = '.*(\.[a-zA-Z]{3})'
res = re.match(regex, path)
if res:
return res.group(1)
else:
return ''
def download(img_url, path):
"""Downloads an image in the path
path: the absolute path to the image, including the image name
"""
# todo extensão da imagem
print "downloading " + img_url
request = urllib2.Request(img_url, headers={'User-Agent' : "Magic Browser"})
data = urllib2.urlopen(request).read()
f = open(path, 'w')
f.write(data)
f.close()
def from_wikipedia(word):
"""Returns the link of a figure in the verbet of 'word' (a string)
For example, passing word='cavalo' it is expected to get a horse figure
If no figure is found, None is returned
By default the words are searched in the Wikipedia in Portuguse; the second try is the English Wikipedia
To change the language, set the language.LANGUAGE variable
"""
try:
url = BASE_URL % (LANGUAGE_PT, word)
request = urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})
site = urllib2.urlopen(request)
except:
print '%s does not exist in the wikipedia-%s' % (word, LANGUAGE_PT)
try:
url = BASE_URL % (LANGUAGE_EN, word)
request = urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})
site = urllib2.urlopen(request)
except:
print '%s does not exist in the wikipedia-%s' % (word, LANGUAGE_EN)
return None
soup = BeautifulSoup(site)
img_src = soup.find("a", { "class" : "image" }).findChild('img').get('src')
return 'http://' + img_src.replace('//', '')
def rock():
"""Pega uma imagem da pasta rock aleatoriamente"""
figures = os.listdir(ROCK_FOLDER)
random.shuffle(figures)
return ROCK_FOLDER + figures[0]
if __name__ == "__main__":
# print get_image(sys.argv[1], True)
print rock()