Skip to content

Commit

Permalink
Refactor, harden, and bring up to date
Browse files Browse the repository at this point in the history
- use Neo4j driver version 5
- refactor and simplify here and there
- don't make feedback with unknown project be registered with project `apoc` (they will be ignored)
- security: don't allow _any_ request field to become a database property. Filter against a whitelist (all currently used fields are whitelisted)
- security: throttle requests -- identical requests coming in the same minute result in a `403` HTTP response 
- surface user journey in listing api.

Depends on 

neo4j-labs/neo4j-labs.github.io#3
neo4j-documentation/docs-ui#197
  • Loading branch information
stefano-ottolenghi authored Nov 3, 2023
2 parents 4abc992 + 9e1d074 commit 0f9cf1d
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 83 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
node_modules
.idea
.serverless
.serverless
__pycache__
192 changes: 111 additions & 81 deletions handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import json
import logging
from urllib import parse

import boto3
import flask
from dateutil import parser
from neo4j import GraphDatabase
from retrying import retry

ssmc = boto3.client('ssm')
app = flask.Flask('feedback form')
Expand All @@ -29,60 +27,69 @@ def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")


host_port = get_ssm_param('com.neo4j.labs.feedback.dbhostport')
user = get_ssm_param('com.neo4j.labs.feedback.dbuser')
password = get_ssm_param('com.neo4j.labs.feedback.dbpassword')

db_driver = GraphDatabase.driver(f"neo4j+s://{host_port}", auth=(user, password))

post_feedback_query = """
MATCH (project:Project {name: $project})
MERGE (page:Page {uri: $page})
MERGE (page)-[:PROJECT]->(project)
CREATE (feedback:Feedback)
SET feedback += $params, feedback.timestamp = datetime()
CREATE (page)-[:HAS_FEEDBACK]->(feedback)
"""
# `dbhostport` contains host:port, but lacks protocol. It is an Aura instance, so it is neo4j+s
HOST = 'neo4j+s://' + get_ssm_param('com.neo4j.labs.feedback.dbhostport')
USER = get_ssm_param('com.neo4j.labs.feedback.dbuser')
PASSWORD = get_ssm_param('com.neo4j.labs.feedback.dbpassword')

driver = GraphDatabase.driver(HOST, auth=(USER, PASSWORD))

@retry(stop_max_attempt_number=5, wait_random_max=1000)
def post_feedback(params):
with db_driver.session() as session:
result = session.run(post_feedback_query, params)
print(result.consume().counters)
return True


def determine_project(page):
if "/docs/labs/neo4j-streams" in page:
def determine_project(params):
if "project" in params.keys():
return params["project"]
if "/docs/labs/neo4j-streams" in params["url"]:
return "neo4j-streams"
if "grandstack.io" in page:
if "grandstack.io" in params["url"]:
return "GRANDstack"
return "apoc"
return ""


def feedback(request, context):
print("request:", request, "context:", context)
logger.info("request:", request, "context:", context)

form_data = parse.parse_qsl(request["body"])
headers = request["headers"]

params = {key: value for key, value in form_data}
fields_whitelist = [
'project', 'url', 'identity', 'gid', 'uetsid', 'helpful',
'moreInformation', 'reason', 'userJourney'
]

page = params["url"]
params["helpful"] = str2bool(params["helpful"])
params = {key: value for key, value in form_data if key in fields_whitelist}

headers = request["headers"]
project = determine_project(params)
params["helpful"] = str2bool(params["helpful"])
params["userAgent"] = headers.get("User-Agent")
params["referer"] = headers.get("Referer")

if "project" in params:
project = params["project"]
else:
project = determine_project(page)

print(page, params)
logger.info(f'Project `{project}`, query parameters: {params}')

result, _, _ = driver.execute_query("""
MATCH (feedback:Feedback)
WHERE feedback.url = $url AND feedback.helpful = $params.helpful AND
feedback.userAgent = $params.userAgent AND
datetime.truncate('minute', feedback.timestamp) = datetime.truncate('minute')
RETURN feedback
""", project=project, url=params['url'], params=params,
database_='neo4j')
if len(result) > 0:
logger.info('Duplicate request within same minute')
logger.info(result)
return {
"statusCode": 403
}

post_feedback({"params": params, "page": page, "project": project})
_, summary, _ = driver.execute_query("""
MATCH (project:Project {name: $project})
MERGE (page:Page {uri: $url})
MERGE (page)-[:PROJECT]->(project)
CREATE (feedback:Feedback)
SET feedback += $params, feedback.timestamp = datetime()
CREATE (page)-[:HAS_FEEDBACK]->(feedback)
""", project=project, url=params['url'], params=params,
database_='neo4j')
logger.info(f'Feedback stored: {summary.counters}')

return {
"statusCode": 200,
Expand All @@ -95,8 +102,13 @@ def feedback(request, context):


def feedback_api(event, context):
path_parameters = event.get("pathParameters")
'''headers = event.get('headers')
if headers.get('X-Neo-Feedback') == None: # some secrecy
return {
"statusCode": 403
}'''

path_parameters = event.get("pathParameters")
if not path_parameters:
return {
"statusCode": 404
Expand All @@ -110,24 +122,26 @@ def feedback_api(event, context):
else:
now = datetime.datetime.now().replace(day=1)

logger.info(f"Retrieving feedback for {now}")
params = {"year": now.year, "month": now.month, "project": project}

logger.info(f"Retrieving feedback for {params}")

with db_driver.session() as session:
params = {"year": now.year, "month": now.month, "project": project}
result = session.run("""
MATCH (feedback:Feedback)<-[:HAS_FEEDBACK]-(page)-[:PROJECT]->(:Project {name: $project})
WHERE datetime({year:$year, month:$month+1}) > feedback.timestamp >= datetime({year:$year, month:$month })
result, _, _ = driver.execute_query("""
MATCH (feedback:Feedback)<-[:HAS_FEEDBACK]-(page:Page)-[:PROJECT]->(:Project {name: $project})
WHERE datetime({year:$year, month:$month+1}) > feedback.timestamp >= datetime({year:$year, month:$month})
RETURN feedback, page
ORDER BY feedback.timestamp DESC
""", params)

rows = [{"helpful": row["feedback"]["helpful"],
"information": row["feedback"]["moreInformation"],
"reason": row["feedback"]["reason"],
"uri": row["page"]["uri"],
"date": row["feedback"]["timestamp"].to_native().strftime("%d %b %Y")
}
for row in result]
""", params, database_='neo4j')
rows = [
{
"helpful": row["feedback"]["helpful"],
"information": row["feedback"]["moreInformation"],
"reason": row["feedback"]["reason"],
"userJourney": prettify_journey(row["feedback"]["userJourney"]),
"uri": row["page"]["uri"],
"date": row["feedback"]["timestamp"].to_native().strftime("%d %b %Y")
}
for row in result]

response = {
"statusCode": 200,
Expand All @@ -141,6 +155,22 @@ def feedback_api(event, context):
return response


def prettify_journey(journey):
if journey == None:
return journey

ret = ''
journey = json.loads(journey)
for i in range(len(journey)):
if i > 0:
ret += ' '*(i-1) + '↳ '
if i < len(journey)-1:
ret += '(' + str(journey[i+1]['landTime'] - journey[i]['landTime']) + 's) '
ret += journey[i]['title']
ret += '\n'

return ret

def page_api(event, context):
logger.info(f"event: {event}, context: {context}")
path_parameters = event.get("pathParameters")
Expand All @@ -154,23 +184,23 @@ def page_api(event, context):
page = base64.b64decode(encoded_page).decode("utf-8")

logger.info(f"page: {page}")
with db_driver.session() as session:
params = {"page": page}
result = session.run("""
MATCH (page {uri: $page})

result, _, _ = driver.execute_query("""
MATCH (page:Page {uri: $page})
RETURN page, [(page)-[:HAS_FEEDBACK]->(feedback) | feedback] AS feedback
""", params)

rows = [{"uri": row["page"]["uri"],
"feedback": [{
"helpful": entry["helpful"],
"information": entry["moreInformation"],
"reason": entry["reason"],
"date": entry["timestamp"].to_native().strftime("%d %b %Y")
}
for entry in row["feedback"]
]}
for row in result]
""", page=page, database_='neo4j')
rows = [
{
"uri": row["page"]["uri"],
"feedback": [{
"helpful": entry["helpful"],
"information": entry["moreInformation"],
"reason": entry["reason"],
"date": entry["timestamp"].to_native().strftime("%d %b %Y")
}
for entry in row["feedback"]]
}
for row in result]

response = {
"statusCode": 200,
Expand All @@ -194,8 +224,7 @@ def fire_api(event, context):

project = path_parameters.get("project").replace("@graphapps-", "@graphapps/")

with db_driver.session() as session:
result = session.run("""
result, _, _ = driver.execute_query("""
MATCH (project:Project {name: $project})<-[:PROJECT]-(page:Page)-[:HAS_FEEDBACK]->(feedback)
WITH page, collect(feedback) AS allFeedback
WITH page,
Expand All @@ -214,14 +243,15 @@ def fire_api(event, context):
1+(1/n*z*z) AS under
RETURN page, notHelpful, helpful, (left-right) / under AS unhelpfulness
ORDER BY unhelpfulness desc
""", {"project": project})

rows = [{"uri": row["page"]["uri"],
"helpful": row["helpful"],
"notHelpful": row["notHelpful"],
"unhelpfulness": row["unhelpfulness"]
}
for row in result]
""", project=project, database_='neo4j')
rows = [
{
"uri": row["page"]["uri"],
"helpful": row["helpful"],
"notHelpful": row["notHelpful"],
"unhelpfulness": row["unhelpfulness"]
}
for row in result]

response = {
"statusCode": 200,
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ MarkupSafe==2.1.3
neo4j
python-dateutil==2.8.2
pytz==2023.3.post1
retrying
s3transfer==0.7.0
six==1.16.0
urllib3==2.0.7
Expand Down

0 comments on commit 0f9cf1d

Please sign in to comment.