-
Notifications
You must be signed in to change notification settings - Fork 2
/
ask_reddit.py
156 lines (133 loc) · 5.23 KB
/
ask_reddit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# License: MIT License
# This sample code shows how to use the reddit API to fetch posts from Ask Reddit.
# We fetch the posts and the comments on the posts, then use the OpenAI API to generate
# a consensus answer to the question asked in the post. We then post the answer to Twitter
# and update a Google Sheet with the answer and send out an email with the answer. The code
# is scheduled to run twice daily.
# You can try this out in the spreadsheet here:
# https://docs.google.com/spreadsheets/d/1YXXBW_B6DeKVJQHGjowf1AJr9s6_yYItPcvDlChf9YM/edit#gid=0
import requests
import neptyne as nt
from openai import OpenAI
from datetime import datetime, timedelta
import praw
from requests_oauthlib import OAuth1
USER_NAME = "DouweOsinga"
USER_AGENT = (f"AnswerBot by /u/{USER_NAME} "
"https://docs.google.com/spreadsheets/d/1YXXBW_B6DeKVJQHGjowf1AJr9s6_yYItPcvDlChf9YM")
def post_to_twitter(tweet):
"""Make sure to get the consumer_key also known as api key, not the client_id:"""
url = "https://api.twitter.com/2/tweets"
auth = OAuth1(nt.get_secret("CONSUMER_KEY"),
nt.get_secret("CONSUMER_KEY_SECRET"),
nt.get_secret("ACCESS_TOKEN"),
nt.get_secret("ACCESS_TOKEN_SECRET"))
payload = {"text": tweet}
return requests.post(
auth=auth, url=url, json=payload, headers={"Content-Type": "application/json"}
)
def call_ai(prompt):
# No key needed; Neptyne provides one:
client = OpenAI()
completion = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "assistant",
"content": prompt,
},
],
)
return completion.choices[0].message.content
def get_reddit_posts(subreddit_name: str, limit: int = 10):
reddit = praw.Reddit(
client_id=nt.get_secret("REDDIT"),
client_secret=nt.get_secret("REDDIT_SECRET"),
user_agent=USER_AGENT,
username=USER_NAME,
)
subreddit = reddit.subreddit(subreddit_name)
posts = subreddit.hot(limit=limit)
results = []
for post in posts:
if post.score >= 3000:
comments = post.comments.list()
formatted_date = datetime.utcfromtimestamp(
post.created_utc).strftime("%Y-%m-%d")
results.append({
'title': post.title,
'url': post.url,
'score': post.score,
'date': formatted_date,
'author': post.author.name,
'comments': [comment.body for comment in comments if isinstance(comment, praw.models.Comment)]
})
return results
def process_post(post):
asker = post['author']
discussion = "\n".join(post['comments'])[:3500]
question = post['title']
formatted_date = post['date'][:10]
url = post['url']
score = post['score']
prompt = [
"Given this threaded discussion:\n\n",
discussion,
"\n\nFormulate an answer to this question:",
question,
"\nfrom ",
asker]
prompt += [
"Try to get to a consensus answer in 60 words. Some questions ask for personal replies, ",
"how did you spend your summer? Other ask for a general answer like, what is the best way to ",
"spend a summer. In the first case, return an overview of answers given. In the second case ",
"return a summary. Formulate the answer as an actual answer to the question that makes sense to ",
"somebody who doesn't know there was a discussion. Don't use markdown."
]
consensus = call_ai("".join(prompt))
return formatted_date, url, asker, score, question, consensus
def update_sheet(posts):
new_rows = []
rows = {
row[1]: row
for row in B5:G
}
for post in posts:
url = post['url']
if url not in rows:
new_row = process_post(post)
new_rows.append(new_row)
rows[new_row[1]] = new_row
B5 = sorted(rows.values(), key=lambda row: row[0], reverse=True)
return new_rows
def generate_tweet(answer, url):
budget = 280 - 24 - 2
if len(answer) > budget - 30:
print(f"Answer too long {len(answer)} reduing to {budget - 30}")
prompt = f"Summarize this answer to {budget - 20} characters:" + answer
answer = call_ai(prompt)
print(f"Now {len(answer)}")
if len(answer) > budget:
answer = answer[:budget - 3] + "..."
tweet = answer + "\n" + url
return tweet
@nt.daily(hour=1, minute=1, timezone="America/New_York")
@nt.daily(hour=13, minute=1, timezone="America/New_York")
async def keep_updated():
print("Running...")
posts = get_reddit_posts("AskReddit", limit=25)
new_rows = update_sheet(posts)
paras = []
for date, url, poster, score, question, answer in new_rows:
paras.append(f"{poster}: {question}")
paras.append(answer)
paras.append(url)
paras.append('')
if paras:
nt.email.send("[email protected]", f"{len(new_rows)} New Answers",
"\n".join(paras))
for date, url, poster, score, question, answer in new_rows:
tweet = generate_tweet(answer, url)
post_to_twitter(tweet)
print("Processed", len(new_rows), "rows")
return new_rows