diff --git a/PhotoScraper/RPI_SIS_PhotoScraper.py b/PhotoScraper/RPI_SIS_PhotoScraper.py
index c442e16..2760137 100644
--- a/PhotoScraper/RPI_SIS_PhotoScraper.py
+++ b/PhotoScraper/RPI_SIS_PhotoScraper.py
@@ -1,34 +1,53 @@
+import sys
import json
import time
import imghdr
import argparse
-import getpass, requests, os, re
+import getpass
+import requests
+import os
from pathlib import Path
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from datetime import datetime
##################################################################
# a few optional command line argument variables
-parser = argparse.ArgumentParser(description='RPI SIS Photo and Registration Scraper')
-parser.add_argument('--credentials_file', type=str, default="",
- help='a file containing the user RIN and PIN')
-parser.add_argument('--term_file', type=str, default="",
- help='a file containing the term')
-parser.add_argument('--crn_file', type=str, default="",
- help='a file containing the crns of desired courses')
-parser.add_argument('--headless', default=False, action="store_true",
- help='run program without visual display')
-parser.add_argument('--no_photos', default=False, action="store_true",
- help='dont save photos')
-parser.add_argument('--run_forever', default=False, action="store_true",
- help='keep clicking to avoid auto-timeout')
+parser = argparse.ArgumentParser(description="RPI SIS Photo and Registration Scraper")
+parser.add_argument(
+ "--credentials_file",
+ type=str,
+ default="",
+ help="a file containing the user RIN and PIN",
+)
+parser.add_argument(
+ "--term_file", type=str, default="", help="a file containing the term"
+)
+parser.add_argument(
+ "--crn_file",
+ type=str,
+ default="",
+ help="a file containing the crns of desired courses",
+)
+parser.add_argument(
+ "--headless",
+ default=False,
+ action="store_true",
+ help="run program without visual display",
+)
+parser.add_argument(
+ "--no_photos", default=False, action="store_true", help="dont save photos"
+)
+parser.add_argument(
+ "--run_forever",
+ default=False,
+ action="store_true",
+ help="keep clicking to avoid auto-timeout",
+)
args = parser.parse_args()
@@ -37,6 +56,7 @@
# Workaround for if pyopenssl is installed and we want weak keys
try:
from urllib3.contrib import pyopenssl
+
pyopenssl.extract_from_urllib3()
except ImportError:
pass
@@ -46,79 +66,51 @@
# Login to SIS
def login():
chrome_options = Options()
- # read credentials from (optional) file
- if len(args.credentials_file)>0 and os.path.isfile(args.credentials_file):
- with open(str(args.credentials_file),'r') as f:
- rin_id = f.readline().strip()
- pin_id = f.readline().strip()
- else:
- rin_id = input("RIN: ")
- pin_id = getpass.getpass("PIN: ")
# By default we launch the display and allow visual debugging
if args.headless:
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
- # Just setting the default ciphers (for this session) to be weak DES/SHA for SIS compatibility
- # Be careful about navigating to any other sites...
- requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'DES-CBC3-SHA:AES128-SHA:'+requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS
+ # read credentials from (optional) file
+ if len(args.credentials_file) > 0 and os.path.isfile(args.credentials_file):
+ with open(str(args.credentials_file), "r") as f:
+ rcsid = f.readline().strip()
+ passwd = f.readline().strip()
+ else:
+ rcsid = input("RCS ID: ")
+ passwd = getpass.getpass("Password: ")
+
+ print("Setting up selenium", file=sys.stderr)
driver = webdriver.Chrome(options=chrome_options)
+ driver.implicitly_wait(10)
# open SIS
- driver.get('https://sis.rpi.edu/')
+ driver.get("https://sis.rpi.edu")
- count = 0
- while True:
- count += 1
- # Types in username & password in login page
- try:
- username_box = driver.find_element_by_id('username')
- break
- except:
- if count > 10:
- print ("ERROR: couldn't find username box")
- exit(0)
- # slight delay to allow page to load
- print ("wait a little longer for the page to load")
- time.sleep(1)
- username_box.send_keys(rin_id)
try:
- password_box = driver.find_element_by_id('password')
- except:
- print ("ERROR: couldn't find password box")
- exit(0)
-
- password_box.send_keys(pin_id)
-
- time.sleep(2)
-
- try:
- # click login button
- login_button = driver.find_element_by_name("submit")
- except:
- print ("ERROR: couldn't find submit button")
- exit(0)
-
- print ("now we can click login button")
- login_button.click()
-
- time.sleep(2)
-
- while True:
-
- time.sleep(3)
-
- if "Rensselaer Self-Service Information System" in driver.page_source:
- print("success -- made it past duo page")
- break
- else:
- print("please complete duo authentication")
-
- print ("Continuing with processing...")
- success = True
-
- return driver, success
+ print("Logging in", file=sys.stderr)
+ driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]').click()
+ driver.find_element(By.ID, "username").send_keys(rcsid)
+ driver.find_element(By.ID, "password").send_keys(passwd)
+ driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]').click()
+
+ if len(driver.find_elements(By.CSS_SELECTOR, "p.output--error")) > 0:
+ print("Incorrect password", file=sys.stderr)
+ return driver, False
+
+ print("2FA: enter code on your phone", file=sys.stderr)
+ driver.implicitly_wait(25)
+ driver.find_element(By.ID, "dont-trust-browser-button").click()
+ driver.implicitly_wait(10)
+ driver.find_element(
+ By.CSS_SELECTOR, 'a[href="/rss/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"]'
+ )
+
+ return driver, True
+ except NoSuchElementException:
+ print("Failed to login", file=sys.stderr)
+ return driver, False
##################################################################
@@ -126,18 +118,18 @@ def login():
def selectTerm(driver):
# read term from (optional) file
term = ""
- if len(str(args.term_file))>0 and os.path.isfile(str(args.term_file)):
- with open(str(args.term_file),'r') as f:
+ if len(str(args.term_file)) > 0 and os.path.isfile(str(args.term_file)):
+ with open(str(args.term_file), "r") as f:
term = f.readline().strip()
# click Instructors & Advisors Menu
- driver.find_element_by_link_text('Instructor & Advisor Menu').click()
+ driver.find_element(By.LINK_TEXT, "Instructor & Advisor Menu").click()
# click Select a Semester or Summer Session
- driver.find_element_by_link_text('Select a Semester or Summer Session').click()
+ driver.find_element(By.LINK_TEXT, "Select a Semester or Summer Session").click()
# grab the list of available terms
- select_term = Select(driver.find_element_by_name('term'))
+ select_term = Select(driver.find_element(By.NAME, "term"))
options_term = select_term.options
# loop (querying live user) until we have a valid term
@@ -147,7 +139,7 @@ def selectTerm(driver):
if term == "":
print("Here are the available Sessions/Terms:")
for index in range(len(options_term)):
- print("[{0}] {1}".format(index,options_term[index].text))
+ print("[{0}] {1}".format(index, options_term[index].text))
term = input("Select a term ( or Exit to terminate ): ")
# if user does not wish to continue, exit
if term.lower() == "exit":
@@ -165,7 +157,7 @@ def selectTerm(driver):
term = ""
# click submit button
- driver.find_element_by_xpath("//input[@value='Submit']").click()
+ driver.find_element(By.XPATH, "//input[@value='Submit']").click()
return term, True
@@ -176,17 +168,17 @@ def saveImagesToFolder(term, class_list):
if len(class_list) == 0:
return
- course_crn = class_list[0]['course_crn']
- course_prefix = class_list[0]['course_prefix']
- course_name = class_list[0]['course_name']
- course_section = class_list[0]['course_section']
- course_number = class_list[0]['course_number']
+ course_crn = class_list[0]["course_crn"]
+ course_prefix = class_list[0]["course_prefix"]
+ course_name = class_list[0]["course_name"]
+ course_section = class_list[0]["course_section"]
+ course_number = class_list[0]["course_number"]
- course_folder_name = "{}-{}-{}".format(course_prefix,course_number,course_section)
+ course_folder_name = "{}-{}-{}".format(course_prefix, course_number, course_section)
# make term (month year) into month-year
term_elements = term.split()
- folder_term = term_elements[0]+"-"+term_elements[1]
+ folder_term = term_elements[0] + "-" + term_elements[1]
# get path and create path if not already existed
path = Path(folder_term, course_folder_name)
@@ -197,30 +189,30 @@ def saveImagesToFolder(term, class_list):
# loops through the class list of dictionaries of student info
for i in range(len(class_list)):
obj = {}
- obj['full_name'] = class_list[i]['name']
- obj['first_name'] = class_list[i]['first_name']
- obj['middle_name'] = class_list[i]['middle_name']
- obj['last_name'] = class_list[i]['last_name']
- obj['degrees'] = class_list[i]['degrees']
+ obj["full_name"] = class_list[i]["name"]
+ obj["first_name"] = class_list[i]["first_name"]
+ obj["middle_name"] = class_list[i]["middle_name"]
+ obj["last_name"] = class_list[i]["last_name"]
+ obj["degrees"] = class_list[i]["degrees"]
- obj['rin'] = class_list[i]['rin']
+ obj["rin"] = class_list[i]["rin"]
if "rcs" not in class_list[i]:
- obj['rcs'] = ""
+ obj["rcs"] = ""
print(f"Warning: no RCS for {class_list[i]['name']}")
else:
- obj['rcs'] = class_list[i]['rcs']
+ obj["rcs"] = class_list[i]["rcs"]
if "email" not in class_list[i]:
- obj['email'] = ""
+ obj["email"] = ""
print(f"Warning: no email for {class_list[i]['name']}")
else:
- obj['email'] = class_list[i]['email']
+ obj["email"] = class_list[i]["email"]
- obj['course_crn'] = class_list[i]['course_crn']
- obj['course_prefix'] = class_list[i]['course_prefix']
- obj['course_name'] = class_list[i]['course_name']
- obj['course_section'] = class_list[i]['course_section']
- obj['course_number'] = class_list[i]['course_number']
- obj['term'] = class_list[i]['term']
+ obj["course_crn"] = class_list[i]["course_crn"]
+ obj["course_prefix"] = class_list[i]["course_prefix"]
+ obj["course_name"] = class_list[i]["course_name"]
+ obj["course_section"] = class_list[i]["course_section"]
+ obj["course_number"] = class_list[i]["course_number"]
+ obj["term"] = class_list[i]["term"]
jsonfile.append(obj)
for k in class_list[i].keys():
@@ -231,10 +223,10 @@ def saveImagesToFolder(term, class_list):
first_name = name_str[0]
last_name = name_str[1]
rcs_id = "error-{}-{}".format(first_name, last_name)
- print ("NAMESTR="+name_str)
- print ("first_name="+first_name)
- print ("last_name="+last_name)
- print ("rcs_id"+rcs_id)
+ print("NAMESTR=" + name_str)
+ print("first_name=" + first_name)
+ print("last_name=" + last_name)
+ print("rcs_id" + rcs_id)
# if there is an email address, assign letters before "@rpi.edu" to rcs_id
if k == "email":
rcs_id = class_list[i].get(k)[:-8]
@@ -249,18 +241,18 @@ def saveImagesToFolder(term, class_list):
continue
r = requests.get(img_url)
- #Deduce the extension, build the output path
- img_format = imghdr.what(None,r.content).lower()
+ # Deduce the extension, build the output path
+ img_format = imghdr.what(None, r.content).lower()
img_name = rcs_id + "." + img_format
filepath = path / img_name
- #Actually write the file. We could skip the context manager and just use Image.save(filepath)
- with open(str(filepath),'wb') as f:
+ # Actually write the file. We could skip the context manager and just use Image.save(filepath)
+ with open(str(filepath), "wb") as f:
f.write(r.content)
print("Saved photo for student rcs {}".format(rcs_id))
- term_string = term.replace(' ','_')
- filename = "all_json/"+term_string+"_"+course_crn+".json"
+ term_string = term.replace(" ", "_")
+ filename = "all_json/" + term_string + "_" + course_crn + ".json"
with open(filename, "w") as f:
json.dump(jsonfile, f, indent=4, sort_keys=True)
@@ -273,49 +265,54 @@ def getStudentInfoFromCourse(driver, term):
class_list = []
# click Summary Class List & Electronic Warning System (EWS)
- driver.find_element_by_link_text('Summary Class List & Electronic Warning System (EWS)').click()
+ driver.find_element(
+ By.LINK_TEXT, "Summary Class List & Electronic Warning System (EWS)"
+ ).click()
try:
- current_record = driver.find_element_by_partial_link_text('Current Record Set')
- #print ("'Current Record Set' label found")
+ current_record = driver.find_element(By.PARTIAL_LINK_TEXT, "Current Record Set")
+ # print ("'Current Record Set' label found")
try:
- first = driver.find_element_by_link_text('Current Record Set: 1 - 200')
- print ("1-200 found")
- getStudentInfoFromCourseHelper(driver,term, class_list)
- print ("1-200 finished")
+ first = driver.find_element(
+ By.PARTIAL_LINK_TEXT, "Current Record Set: 1 - 200"
+ )
+ print("1-200 found")
+ getStudentInfoFromCourseHelper(driver, term, class_list)
+ print("1-200 finished")
try:
- second = driver.find_element_by_partial_link_text('201 -')
- print ("201-?? found")
+ second = driver.find_element(By.PARTIAL_LINK_TEXT, "201 -")
+ print("201-?? found")
second.click()
- getStudentInfoFromCourseHelper(driver,term, class_list)
+ getStudentInfoFromCourseHelper(driver, term, class_list)
driver.back()
- print ("201-?? finished")
+ print("201-?? finished")
except:
- print ("ERROR IN CURRENT RECORD COUNTING -- SECOND")
+ print("ERROR IN CURRENT RECORD COUNTING -- SECOND")
return 0
except:
- print ("ERROR IN CURRENT RECORD COUNTING -- FIRST")
+ print("ERROR IN CURRENT RECORD COUNTING -- FIRST")
return 0
except:
- #print ("'Current Record Set' label not found")
- getStudentInfoFromCourseHelper(driver,term, class_list)
+ # print ("'Current Record Set' label not found")
+ getStudentInfoFromCourseHelper(driver, term, class_list)
driver.back()
driver.back()
if class_list == 0:
- print ("Warning: this class size is 0")
+ print("Warning: this class size is 0")
else:
# Use the info collected and save the image with rcs id for term/course in current directory
saveImagesToFolder(term, class_list)
+
##################################################################
-def addMajor(majors,degree,text):
+def addMajor(majors, degree, text):
majors.append(degree + " / " + text)
return majors
-def addConcentrationToLastMajor(majors,text):
+def addConcentrationToLastMajor(majors, text):
majors[-1] = majors[-1] + " / " + text
return majors
@@ -325,44 +322,58 @@ def addConcentrationToLastMajor(majors,text):
def getStudentInfoFromCourseHelper(driver, term, class_list):
# check if class is size 0
- if len(driver.find_elements_by_class_name('errortext')) == 1:
+ if len(driver.find_elements(By.CLASS_NAME, "errortext")) == 1:
print("Error: Class size is 0!")
return 0
- COURSENAMESTRING = driver.find_elements_by_class_name('datadisplaytable')[0].find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')[0].find_elements_by_tag_name('th')[0].text
+ COURSENAMESTRING = (
+ driver.find_elements(By.CLASS_NAME, "datadisplaytable")[0]
+ .find_element(By.TAG_NAME, "tbody")
+ .find_elements(By.TAG_NAME, "tr")[0]
+ .find_elements(By.TAG_NAME, "th")[0]
+ .text
+ )
- COURSENAMESTRING_split = COURSENAMESTRING.split(' - ')
+ COURSENAMESTRING_split = COURSENAMESTRING.split(" - ")
if len(COURSENAMESTRING_split) < 2:
- print ("ERROR: course name formatting bug")
+ print("ERROR: course name formatting bug")
return 0
COURSENAME = ""
- for index in range(len(COURSENAMESTRING_split)-1):
+ for index in range(len(COURSENAMESTRING_split) - 1):
if index > 0:
COURSENAME = COURSENAME + " - "
COURSENAME = COURSENAME + COURSENAMESTRING_split[index]
- print ("COURSE NAME IS "+COURSENAME)
+ print("COURSE NAME IS " + COURSENAME)
if len(COURSENAMESTRING_split[-1]) != 12:
- print ("ERROR: course prefix / code bug")
+ print("ERROR: course prefix / code bug")
return 0
COURSEPREFIX = COURSENAMESTRING_split[-1][0:4]
COURSENUMBER = COURSENAMESTRING_split[-1][5:9]
COURSESECTION = COURSENAMESTRING_split[-1][10:]
- CRNSTRING = driver.find_elements_by_class_name('datadisplaytable')[0].find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
+ CRNSTRING = (
+ driver.find_elements(By.CLASS_NAME, "datadisplaytable")[0]
+ .find_element(By.TAG_NAME, "tbody")
+ .find_elements(By.TAG_NAME, "tr")
+ )
CRNSTRING = str(CRNSTRING[1].text)
if CRNSTRING[0:5] == "CRN: ":
CRNSTRING = CRNSTRING[5:]
else:
- print ("ERROR: could not find CRN")
+ print("ERROR: could not find CRN")
return 0
# find link for pic
- student_list = driver.find_elements_by_class_name('datadisplaytable')[2].find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
+ student_list = (
+ driver.find_elements(By.CLASS_NAME, "datadisplaytable")[2]
+ .find_element(By.TAG_NAME, "tbody")
+ .find_elements(By.TAG_NAME, "tr")
+ )
# find which column is the "Student Name" column, since it isn't always the same column number
- student_headers = student_list[0].find_elements_by_tag_name('th')
+ student_headers = student_list[0].find_elements(By.TAG_NAME, "th")
stu_col = -1
id_col = -1
for i in range(len(student_headers)):
@@ -372,29 +383,33 @@ def getStudentInfoFromCourseHelper(driver, term, class_list):
id_col = i
if stu_col < 0:
- print("Error: Could not find a column labeled \"Student Name\"!")
+ print('Error: Could not find a column labeled "Student Name"!')
return 0
if id_col < 0:
- print("Error: Could not find a column labeled \"ID\"!")
+ print('Error: Could not find a column labeled "ID"!')
return 0
# NOTE: uncomment this line to help with debugging
- #print("Student column: " + str(stu_col))
+ # print("Student column: " + str(stu_col))
# loop through list of students to get image, name, and email
# all info collected from for loop (img url, name, email) put into dict
for s in range(1, len(student_list)):
student_record = {}
- student = driver.find_elements_by_class_name('datadisplaytable')[2].find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')[s]
+ student = (
+ driver.find_elements(By.CLASS_NAME, "datadisplaytable")[2]
+ .find_element(By.TAG_NAME, "tbody")
+ .find_elements(By.TAG_NAME, "tr")[s]
+ )
# NOTE: uncomment these line to help with debugging
- #print('Row Number: ' + str(s))
- #print('Row Length: ' + str(len(student.find_elements_by_tag_name('td'))))
- #print('Cell Value: ' + student.find_elements_by_tag_name('td')[stu_col].text)
+ # print('Row Number: ' + str(s))
+ # print('Row Length: ' + str(len(student.find_elements(By.TAG_NAME, 'td'))))
+ # print('Cell Value: ' + student.find_elements(By.TAG_NAME, 'td')[stu_col].text)
- full_name_cell = student.find_elements_by_tag_name('td')[stu_col].text
- full_name_cell_split = full_name_cell.split(', ')
+ full_name_cell = student.find_elements(By.TAG_NAME, "td")[stu_col].text
+ full_name_cell_split = full_name_cell.split(", ")
# format of the full name appears to be one of:
# Smith, John X. (with middle initial)
# Smith, John (with no middle name/initial)
@@ -404,14 +419,20 @@ def getStudentInfoFromCourseHelper(driver, term, class_list):
first_name = full_name_cell_split[1]
middle_name = ""
first_name_length = len(first_name)
- if first_name_length > 3 and first_name[first_name_length-3] == ' ' and first_name[first_name_length-1] == '.':
- middle_name = first_name[first_name_length-2:]
- first_name = first_name[0:first_name_length-3]
+ if (
+ first_name_length > 3
+ and first_name[first_name_length - 3] == " "
+ and first_name[first_name_length - 1] == "."
+ ):
+ middle_name = first_name[first_name_length - 2 :]
+ first_name = first_name[0 : first_name_length - 3]
- id_rin = student.find_elements_by_tag_name('td')[id_col].text
+ id_rin = student.find_elements(By.TAG_NAME, "td")[id_col].text
try:
- student.find_elements_by_tag_name('td')[stu_col].find_element_by_class_name('fieldmediumtext').click()
+ student.find_elements(By.TAG_NAME, "td")[stu_col].find_element(
+ By.CLASS_NAME, "fieldmediumtext"
+ ).click()
except:
input()
raise
@@ -420,48 +441,58 @@ def getStudentInfoFromCourseHelper(driver, term, class_list):
driver.get(img_url)
# image, initalize to empty string
- student_record['img url'] = ""
- image_arr = driver.find_elements_by_tag_name('img')
+ student_record["img url"] = ""
+ image_arr = driver.find_elements(By.TAG_NAME, "img")
- #do search through all tags for first non-header-layout tag
- #have to skip 2 more tags because they are transparent images
+ # do search through all tags for first non-header-layout tag
+ # have to skip 2 more tags because they are transparent images
for i in range(len(image_arr)):
- if image_arr[i].get_attribute('NAME') != "web_tab_corner_right":
- student_record['img url'] = image_arr[i+2].get_attribute('src')
- #Uncomment this line to print the image URLs we are attempting, useful for debugging
- #print("found non-match, +2 is " + student_record['img url'])
+ if image_arr[i].get_attribute("NAME") != "web_tab_corner_right":
+ student_record["img url"] = image_arr[i + 2].get_attribute("src")
+ # Uncomment this line to print the image URLs we are attempting, useful for debugging
+ # print("found non-match, +2 is " + student_record['img url'])
break
# name
- info_name = driver.find_elements_by_class_name('plaintable')[4].find_element_by_tag_name('tbody').find_element_by_tag_name('tr').find_elements_by_tag_name('td')[1].text
+ info_name = (
+ driver.find_elements(By.CLASS_NAME, "plaintable")[4]
+ .find_element(By.TAG_NAME, "tbody")
+ .find_element(By.TAG_NAME, "tr")
+ .find_elements(By.TAG_NAME, "td")[1]
+ .text
+ )
name = info_name[16:]
- student_record['name'] = name
- student_record['rin'] = id_rin
+ student_record["name"] = name
+ student_record["rin"] = id_rin
- student_record['first_name'] = first_name
- student_record['middle_name'] = middle_name
- student_record['last_name'] = last_name
+ student_record["first_name"] = first_name
+ student_record["middle_name"] = middle_name
+ student_record["last_name"] = last_name
- student_record['course_name'] = COURSENAME
- student_record['course_number'] = COURSENUMBER
- student_record['course_prefix'] = COURSEPREFIX
- student_record['course_crn'] = CRNSTRING
- student_record['course_section'] = COURSESECTION
+ student_record["course_name"] = COURSENAME
+ student_record["course_number"] = COURSENUMBER
+ student_record["course_prefix"] = COURSEPREFIX
+ student_record["course_crn"] = CRNSTRING
+ student_record["course_section"] = COURSESECTION
- student_record['term'] = term
+ student_record["term"] = term
- print("Gathering info for student: "+name)
+ print("Gathering info for student: " + name)
# email address
- driver.find_element_by_link_text('Student E-mail Address').click()
- if len(driver.find_elements_by_class_name('datadisplaytable')) == 1:
- emails = driver.find_element_by_class_name('datadisplaytable').find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
+ driver.find_element(By.LINK_TEXT, "Student E-mail Address").click()
+ if len(driver.find_elements(By.CLASS_NAME, "datadisplaytable")) == 1:
+ emails = (
+ driver.find_element(By.CLASS_NAME, "datadisplaytable")
+ .find_element(By.TAG_NAME, "tbody")
+ .find_elements(By.TAG_NAME, "tr")
+ )
for i in range(len(emails)):
if emails[i].text == "Campus Student Email Address":
- email = emails[i+1].find_element_by_tag_name('td').text
- student_record['email'] = email
- student_record['rcs'] = email[0:len(email)-8]
+ email = emails[i + 1].find_element(By.TAG_NAME, "td").text
+ student_record["email"] = email
+ student_record["rcs"] = email[0 : len(email) - 8]
break
driver.back()
@@ -469,28 +500,30 @@ def getStudentInfoFromCourseHelper(driver, term, class_list):
majors = []
# undergraduate major
- driver.find_element_by_link_text('Student Information').click()
- if len(driver.find_elements_by_class_name('datadisplaytable')) >= 1:
+ driver.find_element(By.LINK_TEXT, "Student Information").click()
+ if len(driver.find_elements(By.CLASS_NAME, "datadisplaytable")) >= 1:
- for table in driver.find_elements_by_class_name('datadisplaytable'):
- stuff = table.find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
+ for table in driver.find_elements(By.CLASS_NAME, "datadisplaytable"):
+ stuff = table.find_element(By.TAG_NAME, "tbody").find_elements(
+ By.TAG_NAME, "tr"
+ )
for i in range(len(stuff)):
if stuff[i].text == "Current Program":
- degree = stuff[i+1].text
+ degree = stuff[i + 1].text
if stuff[i].text[0:7] == "Major: ":
- majors = addMajor(majors,degree,stuff[i].text[7:])
+ majors = addMajor(majors, degree, stuff[i].text[7:])
if stuff[i].text[0:22] == "Major and Department: ":
- majors = addMajor(majors,degree,stuff[i].text[22:])
+ majors = addMajor(majors, degree, stuff[i].text[22:])
if stuff[i].text[0:21] == "Major Concentration: ":
- majors = addConcentrationToLastMajor(majors,stuff[i].text[21:])
+ majors = addConcentrationToLastMajor(majors, stuff[i].text[21:])
driver.back()
- student_record['degrees'] = []
+ student_record["degrees"] = []
for m in majors:
- #print ("MAJOR"+m)
- student_record['degrees'].append(m)
+ # print ("MAJOR"+m)
+ student_record["degrees"].append(m)
class_list.append(student_record)
driver.back()
@@ -498,17 +531,17 @@ def getStudentInfoFromCourseHelper(driver, term, class_list):
##################################################################
# Gets the info regarding each course of student images with their rcs id
-def wasteTimeClicking(driver,seconds):
+def wasteTimeClicking(driver, seconds):
counter = 0
while counter < seconds:
print("wasting time")
# click Instructors & Advisors Menu
- driver.find_element_by_link_text('Instructor & Advisor Menu').click()
+ driver.find_element(By.LINK_TEXT, "Instructor & Advisor Menu").click()
time.sleep(5)
# click Select a Semester or Summer Session
- driver.find_element_by_link_text('Select a Semester or Summer Session').click()
+ driver.find_element(By.LINK_TEXT, "Select a Semester or Summer Session").click()
time.sleep(55)
counter += 60
@@ -516,11 +549,11 @@ def wasteTimeClicking(driver,seconds):
##################################################################
# Gets the info regarding each course of student images with their rcs id
-def loopOverCourses(driver,term):
+def loopOverCourses(driver, term):
# read crns from (optional) file
crns = []
- if len(str(args.crn_file))>0 and os.path.isfile(str(args.crn_file)):
- with open(str(args.crn_file),'r') as f:
+ if len(str(args.crn_file)) > 0 and os.path.isfile(str(args.crn_file)):
+ with open(str(args.crn_file), "r") as f:
while True:
crn = f.readline().strip()
if len(crn) != 5:
@@ -528,38 +561,40 @@ def loopOverCourses(driver,term):
crns.append(crn)
# make a directory to hold the registration directories
- os.makedirs('all_json',exist_ok=True)
+ os.makedirs("all_json", exist_ok=True)
# click Course Information- Select a CRN
- driver.find_element_by_link_text('Course Information- Select a CRN').click()
+ driver.find_element(By.LINK_TEXT, "Course Information- Select a CRN").click()
# if there was at least one crn in the file
if crns:
- driver.find_element_by_link_text('Enter Section Identifier (CRN) Directly').click()
+ driver.find_element(
+ By.LINK_TEXT, "Enter Section Identifier (CRN) Directly"
+ ).click()
for crn in crns:
- print ("Begin processing CRN "+crn)
- crn_box = driver.find_element_by_name('CRN')
+ print("Begin processing CRN " + crn)
+ crn_box = driver.find_element(By.NAME, "CRN")
crn_box.clear()
crn_box.send_keys(crn)
crn_box.send_keys(Keys.TAB)
crn_box.send_keys(Keys.RETURN)
getStudentInfoFromCourse(driver, term)
- print ("Finished processing CRN "+crn)
+ print("Finished processing CRN " + crn)
return
# otherwise, query the user for which crns to scrape
else:
# check if there are any sections assigned for this term
- if len(driver.find_elements_by_class_name('warningtext')) == 1:
- print ("Error: No sections assigned for this term!")
+ if len(driver.find_elements(By.CLASS_NAME, "warningtext")) == 1:
+ print("Error: No sections assigned for this term!")
return
# iterate and ask if user wants images/names from this course
- select_course = Select(driver.find_element_by_name('crn'))
+ select_course = Select(driver.find_element(By.NAME, "crn"))
options_course = select_course.options
for index in range(len(options_course)):
- select_course = Select(driver.find_element_by_name('crn'))
+ select_course = Select(driver.find_element(By.NAME, "crn"))
options_course = select_course.options
course = options_course[index].text
@@ -573,47 +608,66 @@ def loopOverCourses(driver,term):
elif answer == "exit":
return
elif answer == "y":
- print ("Getting student pictures... (this could take a few seconds per student)")
+ print(
+ "Getting student pictures... (this could take a few seconds per student)"
+ )
select_course.select_by_index(index)
- driver.find_element_by_xpath("//input[@value='Submit']").click()
+ driver.find_element(By.XPATH, "//input[@value='Submit']").click()
getStudentInfoFromCourse(driver, term)
break
else:
print("Invalid answer! Try again!")
-##################################################################
-if __name__ == "__main__":
- try:
- driver, success = login()
-
- # if login is valid with correct User ID or PIN, continue the program by collecting data
- if success:
-
- while True:
-
- # Get the term to use to save images
- term, success = selectTerm(driver)
+# Assumes SIS main page is open
+def get_csci_crns(driver):
+ driver.find_element(By.LINK_TEXT, "SITE MAP").click()
+ driver.find_element(By.LINK_TEXT, "Class Search").click()
- if success:
- loopOverCourses(driver,term)
+ driver.find_element(
+ By.XPATH, "/html/body/div[3]/form/table/tbody/tr/td/select/option[2]"
+ ).click()
+ driver.find_element(By.CSS_SELECTOR, 'input[type="submit"][value="Submit"]').click()
+ driver.find_element(By.CSS_SELECTOR, 'option[value="CSCI"]').click()
+ driver.find_element(
+ By.CSS_SELECTOR, 'input[type="submit"][value="Section Search"]'
+ ).click()
+ return [
+ crn_link.text
+ for crn_link in driver.find_elements(
+ By.CSS_SELECTOR, 'a[href^="/rss/bwckschd.p_disp_listcrse"]'
+ )
+ ]
- sttime = datetime.now().strftime('%Y%m%d %H:%M:%S')
- with open("last_completed_run.txt", 'a') as logfile:
- logfile.write(sttime + ' completed scrape\n')
- if not args.run_forever:
- print ("--------------------\nlets NOT run forever\n--------------------")
- break
-
- # wait a number of hours before doing it all again
- num_hours = 1
- wasteTimeClicking(driver,60*60*num_hours)
- print ("----------------\nLETS RUN FOREVER\n----------------")
+##################################################################
+if __name__ == "__main__":
+ driver, success = login()
- finally:
- # ends the program
- try:
- driver.quit()
- except:
- pass #If we got an exception in login(), driver will not exist in this scope
+ # if login is valid with correct User ID or PIN, continue the program by collecting data
+ if not success:
+ driver.quit()
+ else:
+ crns = get_csci_crns(driver)
+ print(crns)
+ # while True:
+ # # Get the term to use to save images
+ # term, success = selectTerm(driver)
+
+ # if success:
+ # loopOverCourses(driver, term)
+
+ # sttime = datetime.now().strftime("%Y%m%d %H:%M:%S")
+ # with open("last_completed_run.txt", "a") as logfile:
+ # logfile.write(sttime + " completed scrape\n")
+
+ # if not args.run_forever:
+ # print(
+ # "--------------------\nlets NOT run forever\n--------------------"
+ # )
+ # break
+
+ # # wait a number of hours before doing it all again
+ # num_hours = 1
+ # wasteTimeClicking(driver, 60 * 60 * num_hours)
+ # print("----------------\nLETS RUN FOREVER\n----------------")