From 4418c4a63accb45ca28743e2bd7f0b7c494cfc64 Mon Sep 17 00:00:00 2001
From: bpoullio <bpoullio@uccs.edu>
Date: Fri, 20 Sep 2019 16:24:25 -0600
Subject: [PATCH] updated canvas data sync, added script to pull exams by
 course per semester

---
 .../python/pullexams_bycourse-README.txt      |  22 ++
 .../python/pullexams_bycourse.py              | 272 +++++++++++++++
 .../python/canvas_data_sync-README.txt        |  10 +-
 .../python/canvas_data_sync.py                | 328 ++++++------------
 .../python/canvasfuncs/candata.py             | 108 ++++++
 .../python/canvasfuncs/hmacsig.py             |  59 ++++
 6 files changed, 571 insertions(+), 228 deletions(-)
 create mode 100644 api/pull_course_quizzes/python/pullexams_bycourse-README.txt
 create mode 100755 api/pull_course_quizzes/python/pullexams_bycourse.py
 create mode 100644 canvas_data/sync_canvas_data/python/canvasfuncs/candata.py
 create mode 100644 canvas_data/sync_canvas_data/python/canvasfuncs/hmacsig.py
diff --git a/api/pull_course_quizzes/python/pullexams_bycourse-README.txt b/api/pull_course_quizzes/python/pullexams_bycourse-README.txt
new file mode 100644
index 0000000..444eb9f
--- /dev/null
+++ b/api/pull_course_quizzes/python/pullexams_bycourse-README.txt
@@ -0,0 +1,22 @@
+# pullexams_bycourse.py
+#
+# Usage: python3 pullexams_bycourse.py <environment> <semester_code>
+#
+# Outputs: CSV of exam info with course info to cross-reference
+#
+# Args: Requires a target (test or prod) and any amount of terms
+#       Note that terms must match the SIS ID for term in Canvas
+#       See: https://canvas.instructure.com/doc/api/enrollment_terms.html
+#
+# Outline: 1. Request and document all courses matching criteria specified
+#          2. Request and document all quiz info for courses from 1
+#          3. Check quiz due dates against current date to filter further
+#          4. Write remaining available quizzes to file
+#
+# General advice: * Most replacement should happen between <>
+#                 * When you see {} do not remove w/o removing matching .format
+#                 * Careful changing things, infinite loops are possible
+#
+# Author: Brandon Poulliot
+#
+# Works as of 9/20/19
diff --git a/api/pull_course_quizzes/python/pullexams_bycourse.py b/api/pull_course_quizzes/python/pullexams_bycourse.py
new file mode 100755
index 0000000..d81027e
--- /dev/null
+++ b/api/pull_course_quizzes/python/pullexams_bycourse.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# pullexams_bycourse.py
+#
+# Usage: python3 pullexams_bycourse.py <environment> <semester_code>
+#
+# Outputs: CSV of exam info with course info to cross-reference
+#
+# Args: Requires a target (test or prod) and any amount of terms
+#       Note that terms must match the SIS ID for term in Canvas
+#       See: https://canvas.instructure.com/doc/api/enrollment_terms.html
+#
+# Outline: 1. Request and document all courses matching criteria specified
+#          2. Request and document all quiz info for courses from 1
+#          3. Check quiz due dates against current date to filter further
+#          4. Write remaining available quizzes to file
+#
+# General advice: * Most replacement should happen between <>
+#                 * When you see {} do not remove w/o removing matching .format
+#                 * Careful changing things, infinite loops are possible
+#
+# Author: Brandon Poulliot
+# 
+# Works as of 9/20/19
+
+# standard libraries
+from datetime import datetime
+import json
+from os.path import join
+from sys import argv, exit
+import csv
+import re
+
+# non-standard libraries
+import requests
+
+###############################################################################
+##############################   CHANGE THESE   ###############################
+###############################################################################
+
+# set datetime info -- change format to suit
+print('Starting exam pull now {:%Y-%m-%dT%H:%M:%S}'.format(
+      datetime.now()))
+
+# set a regex expression to verify your terms passed in -- see examples
+# also https://regexr.com/ can help building regex to match
+# MUST UNCOMMENT THESE TO VERIFY TERMS ARGS PASSED IN
+
+#term_regex = '^(SP|SU|FA)2[0-9]{3}$' # match format of SP2020 or FA2030
+                                     # case sensitive -- thru 2999
+
+#term_regex = '^2[0-9]{2}(1|4|7)$' # match 4-digit term starting w/ 2, middle
+                                  # digits are 0-9, last digit is 1, 4, or 7
+
+#term_regex = '^2[0-9]{3}\/(SP|SU|FA)$' # match year '/' term format thru 2999
+                                       # e.g., 2020/FA or 2031/SU thru 2999
+
+# Canvas and auth info
+canvas_domain = '<domain>{}.instructure.com'
+token = '<canvas_prod_token>'
+test_token = '<canvas_test_token>'
+
+# set the output path for quiz file
+prod_out = '<production_output_path>'
+test_out = '<test_output_path>'
+
+# change the name of the quiz file to suit
+quiz_fname = '<desired_quiz_filename>-{}.csv'
+quiz_headers = 'course_name,course_code,quiz_name,unlock_date,due_date,lock_date\n'
+
+# courses request parameters -- default: active w/ enrollments & not completed
+account = 1                 # subaccount ID
+per_page = 100              # results per page, most cases limit is 100
+do_enrollments = 'true'     # exclude courses w/o enrollments
+do_published = 'true'       # exclude unpublished courses
+do_completed = 'false'      # exclude completed courses
+do_term = 'sis_term_id:{}'  # search by semester -- use your SIS term ID
+# if using, uncomment here and in params manifest (see line XXX)
+#do_etype = ''        # teacher, student, ta, observer, or designer
+#do_blueprint = ''    # t/f only include blueprint parents
+#do_associated = ''   # t/f only include blueprint children
+#do_teachers =        # int list of teacher user IDs to filter by
+#do_subaccounts = ''  # int list of subaccount IDs to filter by
+#do_state = ''        # created, claimed, available, completed, deleted, all
+#do_search = ''       # partial course name, code, or full ID
+#do_include = ''      # list of includes, see API docs
+#do_sort = ''         # by course_name, sis_course_id, teacher, account_name
+#do_order = ''        # sort 'asc' or 'desc' order
+#do_filter = ''       # by course or teacher, see API docs
+
+
+###############################################################################
+##########################   DO NOT CHANGE THESE   ############################
+###############################################################################
+
+# separate the args
+args = []
+total_args = len(argv)
+
+i = 1
+while i < total_args:
+  print('Argument {}: {}'.format(i, argv[i]))
+  args.append(argv[i])
+  i += 1
+
+passed_args = len(args)
+
+if passed_args < 2:
+  print('''Not enough arguments supplied. \
+           Syntax is: python3 pullexams_bycourse.py [prod|test] [2xxx].''')
+  exit('invalid arguments')
+
+# set environment variables based on first arg
+if args[0] == 'prod':
+  target = 'prod'
+  env = ''
+  out_path = prod_out
+elif args[0] == 'test':
+  target = 'test'
+  env = '.test'
+  token = test_token
+  out_path = test_out
+else:
+  # will fail if no environment provided or not provided in correct order
+  target = None
+  print('Env arg invalid, exiting (should be prod/test), arg was: {}'.format(
+         args[0]))
+  exit('invalid argument')
+
+# set new quiz file path based on env out path and filename supplied
+quizf = join(out_path, quiz_fname)
+
+# set request header info
+headers = {'Authorization': 'Bearer {}'.format(token)}
+
+# set endpoint info
+base_domain = 'https://{}/api/v1/{}'.format(canvas_domain.format(env), '{}')
+course_uri = base_domain.format('accounts/{}/courses')
+quiz_uri = base_domain.format('courses/{}/quizzes')
+
+# semesters check
+terms = []
+i = 1
+while i < passed_args:
+  terms.append(args[i])
+  i += 1
+print('Terms provided: {}'.format(terms))
+
+for term in terms:
+
+  # double-check quiz count
+  iq = 0
+
+  # verify that the terms provided are valid using regex
+  # comment out if term_regex is not set above or utilized
+  try:
+    term_regex
+  except NameError:
+    print('Terms not being verified, proceeding...')
+  else:
+    verify_term = bool(re.match(term_regex, term))
+    if not verify_term:
+      print('Terms must be in {} format, please try again.'.format(term_regex))
+      exit('invalid term format')
+  
+  # storage arrays
+  courses_a = []  
+  quiz_a = []
+  quizzes_open = []
+
+  # params manifest, ensure all params specified are uncommented here too!
+  params = {
+    'with_enrollments': do_enrollments,
+    'published': do_published,
+    'completed': do_completed,
+    'enrollment_term_id': do_term.format(term)#,
+    #'enrollment_type[]': do_etype,
+    #'blueprint': do_blueprint,
+    #'blueprint_associated': do_associated,
+    #'by_teacher[]': do_teachers,
+    #'by_subaccounts': do_subaccounts,
+    #'state[]': do_state,
+    #'search_term': do_search,
+    #'include[]': do_include,
+    #'sort': do_sort,
+    #'order': do_order,
+    #'search_by': do_filter
+    }
+
+
+  # get course IDs w/ criteria spec'd above
+  # default: published, not completed, has enrollments
+  pubcourse_r = requests.get(course_uri.format(account), headers=headers, 
+                             params=params, timeout=10)
+  # grab the json response
+  pubcourses = pubcourse_r.json()
+
+  # for each course, add it to the courses array
+  ic = 0
+  for course in pubcourses:
+    courses_a.append(course)
+    ic += 1
+  # handle pagination, keep going until the last page
+  while pubcourse_r.links['current']['url'] != pubcourse_r.links['last']['url']:  
+    pubcourse_r = requests.get(pubcourse_r.links['next']['url'], headers=headers,
+                               params=params, timeout=10)
+    pubcourses = pubcourse_r.json()
+
+    for course in pubcourses:
+      courses_a.append(course)
+      ic += 1
+  print('Course count for {}: {}'.format(term, ic))
+  print('Completed course manifest, pulling exams...')
+
+  # send the biorobots to the roof, radiation limit 10s
+  for course in courses_a:
+    quiz_r =  requests.get(quiz_uri.format(course['id']), headers=headers, 
+                           timeout=10)
+    # get the response of quizzes in spec'd course
+    quizzes = quiz_r.json()
+
+    # add each quiz to the quizzes array
+    for quiz in quizzes:
+      quiz_a.append(quiz)
+
+    while quiz_r.links['current']['url'] != quiz_r.links['last']['url']:
+      quiz_r = requests.get(quiz_r.links['next']['url'], headers=headers, 
+                            timeout=10)
+      quizzes = quiz_r.json()
+      for quiz in quizzes:
+        quiz_a.append(quiz)
+
+  # check due date, lock date, unlock date (availability), add to quiz array
+  for quiz in quiz_a:
+    name = quiz['title']
+    due = quiz['due_at']
+    lock = quiz['lock_at']
+    unlock = quiz['unlock_at']
+    if due is not None:
+      due_date = datetime.strptime(due, '%Y-%m-%dT%H:%M:%SZ')
+      dt_check = datetime.utcnow()
+      available = due_date > dt_check
+    elif unlock is not None:
+      unlock_date = datetime.strptime(unlock, '%Y-%m-%dT%H:%M:%SZ')
+      available = unlock_date < dt_check
+    elif lock is not None:
+      lock_date = datetime.strptime(lock, '%Y-%m-%dT%H:%M:%SZ')
+      available = lock_date < dt_check
+    elif lock is None and unlock is None and due is None:
+      available = True
+    else:
+      available = False
+    if available:
+      row = '{},{},{}\n'.format(course['name'], course['sis_course_id'],
+                                name, unlock, due, lock)
+
+      quizzes_open.append(row)
+      iq += 1
+  print('Quizzes added to manifest for {}: {}'.format(term, iq))
+  print('Completed exams manifest, writing to file...')
+
+  # open the quiz file, write each row, close it up
+  with open(quizf.format(term), 'w+') as qfile:
+    qfile.write(quiz_headers)
+    qrow = 0
+    for row in quizzes_open:
+      qfile.write(row)
+      qrow += 1
+    qfile.close()
+  print('Quizzes Written to File: {}'.format(qrow))
+
diff --git a/canvas_data/sync_canvas_data/python/canvas_data_sync-README.txt b/canvas_data/sync_canvas_data/python/canvas_data_sync-README.txt
index 915c3e0..23cf8de 100644
--- a/canvas_data/sync_canvas_data/python/canvas_data_sync-README.txt
+++ b/canvas_data/sync_canvas_data/python/canvas_data_sync-README.txt
@@ -1,4 +1,8 @@
-Words of WARNING: This will likely download a LOT of data to the output directory. Carefully consider if you have space (hundreds of GB most likely) to spare for the sync files as well as newly re-written files (e.g., data was added to "module_progression_fact" table, but not enough to warrant a whole new file) before they are removed.
+Words of WARNING: This will likely download a LOT of data to the output 
+directory. Carefully consider if you have space (hundreds of GB most likely) 
+to spare for the sync files as well as newly re-written files (e.g., data was 
+added to "module_progression_fact" table, but not enough to warrant a whole new
+file) before they are removed.
 
 # canvas_data_sync.py
 # Author: Brandon Poulliot
@@ -47,7 +51,7 @@ Words of WARNING: This will likely download a LOT of data to the output director
 #             Uses default python to write a JSON file with all info on
 #               the first 100 data dumps after dump ID 345
 #         
-# NOTES: + Working as of 3/4/19
+# NOTES: + Working as of 9/20/19
 #    + Left in "future-proofing" lines, do not uncomment until useful
-#    + Flat file extensions don't matter -- change at will
+#    + Flat file extensions don't matter -- change at will#    + Schema/dump extenstions - keep JSON for syntax marks in text editors
 #    + Schema/dump extenstions - keep JSON for syntax marks in text editors
diff --git a/canvas_data/sync_canvas_data/python/canvas_data_sync.py b/canvas_data/sync_canvas_data/python/canvas_data_sync.py
index 5e18e32..1df5ea3 100644
--- a/canvas_data/sync_canvas_data/python/canvas_data_sync.py
+++ b/canvas_data/sync_canvas_data/python/canvas_data_sync.py
@@ -8,8 +8,6 @@
 #      * Can provide latest Canvas Data schema in file
 #      * Can provide more information on Canvas Data dumps in CSV file
 #
-# Requirements: only non-standard library is REQUESTS
-#
 # Script Map:
 #       1. Take in arguments and set API endpoint
 #       2. Create base-64-encoded HMAC-256 signature
@@ -23,9 +21,8 @@
 #       5. Remove the GZ archive files (options 3a and 3b)
 #       6. Remove files not present in most recent sync (option 3a)
 #
-# Arguments: 
-#      1. endpoint - required and positional, must come directly after
-#                script invocation
+# Arguments: 1. endpoint - required and positional, must come directly after
+#              script invocation
 #      2. -l (--limit) - optional, invoke using -l limit=#
 #                Only used with dump option, specifies a limit to
 #                the number of dump entries returned (default=50)
@@ -33,22 +30,21 @@
 #                Only used with dump option, specifies the ID of
 #                the dump to pull data after (i.e., ID > limit)
 #      4. -m (--method) - optional, invoke using
-#                -m (GET|DELETE|HEAD|OPTIONS|POST|PUT)
-#                Future-proofing this script when more methods
-#                become available (default=GET)
+#                 -m (GET|DELETE|HEAD|OPTIONS|POST|PUT)
+#                 Future-proofing this script when more methods
+#                 become available (default=GET)
 #
 # Usage: Call from shell/cmd with preferred version and arguments
-#    Examples:
-#          1. python3 canvas_data_sync.py sync -m GET
+#    Examples: 1. python3 canvas_data_sync.py sync -m GET
 #
-#             Uses python 3.x + GET method to sync all Canvas Data
+#           Uses python 3.x + GET method to sync all Canvas Data
 #
 #          2. python canvas_data_sync.py dump -l limit=100 -a after=345
 #
-#             Uses default python to write a JSON file with all info on
-#               the first 100 data dumps after dump ID 345
-#         
-# NOTES: + Working as of 3/4/19
+#           Uses default python to write a JSON file with all info on
+#           the first 100 data dumps after dump ID 345
+#
+# NOTES: + Working as of 9/20/19
 #    + Left in "future-proofing" lines, do not uncomment until useful
 #    + Flat file extensions don't matter -- change at will
 #    + Schema/dump extenstions - keep JSON for syntax marks in text editors
@@ -58,46 +54,48 @@
 ###############################################################################
 
 # standard modules
-import argparse
-import base64
 import gzip
-import hashlib
-import hmac
 import json
 import re
 import sys
 from datetime import datetime, timezone
 from os import listdir, remove
 from os.path import getsize, isfile, join
-from urllib import parse
 
 # non-standard modules
 import requests
+from canvasfuncs import hmacsig, candata
 
 ###############################################################################
 ################# User-Declared Variables -- CHANGE THESE! ####################
 ###############################################################################
 
 # set local timezone abbreviation to differentiate
-local_timezone = '<your timezone abbreviation>'
+local_timezone = '<TZ>'
+params = ''
 
 # generate local timestamp (LT) for filenames
 dt_lt = datetime.now().strftime('%m-%d-%Y_%H%M{}'.format(local_timezone))
-# generate UTC timestamp for HMAC-256 signature
-dt_now = datetime.now(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
 
-# output dir *MUST* have trailing slash
-# Example: /home/canvas/data-dumps/
-out_dir = '<C:/your/output/path/>'
+# output dir *MUST* have trailing slash followed by curly braces
+# Example: /home/canvas/data-dumps/{}
+out_dir = '</path/to/data/goes/here/{}>'
+schema_out = join(out_dir,'schema/')
 # your Canvas Data API key -- do NOT use secret here
-api_key = '<Canvas Data API key>'
+api_key = '<Canvas Data API Key>'
 # now use your Canvas Data API secret!
-cdata_secret = '<Canvas Data API secret>'
+cdata_secret = '<Canvas Data API Secret>'
 # filenames -- change as appropriate
-schema_fname = '{}-canvas-data-schema.json'.format(dt_lt)
-dump_fname = '{}-canvas_data_dumps.json'.format(dt_lt)
-# set file extension for flat files -- recommend using default of blank
-fext = ''
+schema_fname = 'canvasdata-schema-{}.json'
+dump_fname = '{}-canvasdata-dumps.json'.format(dt_lt)
+
+# schema notification settings
+body = join(schema_out, 'schema_notify')
+subj = 'Canvas Data Schema Changes {}'.format(dt_lt)
+msg = '''New schema version {} for Canvas Data. \n\
+      Please consult https://portal.inshosteddata.com/docs/api'''
+whonotify = '<optional notification email address>'
+
 
 # set block size for buffer as needed
 block_size = 8192
@@ -106,206 +104,85 @@
 ################# API Call Information Gathering Section ######################
 ###############################################################################
 
-# init variables for API call parameters
-raw_params = []
-params = ''
-
-# create argument parser to allow for command line arguments
-parser = argparse.ArgumentParser(description='''Separate Canvas Data API call
-                 components.''')
-
-# add arguments to parser, first is positional (must be 1st) and required
-parser.add_argument('endpoint',
-          help='''Specify the endpoint of your API call: dump, sync,
-          latest, or schema.''')
-parser.add_argument('-l', '--limit', 
-          help='''Syntax is "limit=#", specifies how many records to
-          return. Only works with dump.''')
-parser.add_argument('-a', '--after', 
-          help='''Syntax is "after=#", specifies to pull only data
-          after dump number provided. Only works with dump.''')
-# this one doesn't matter right now, only method available is GET
-parser.add_argument('-m', '--method', default='GET',
-          help='''Future-proofing for possible new methods for
-          Canvas Data API. Currently, only method is GET.''')
+# parse arguments
+args = candata.parse(sys.argv)
 
-# parse args from sys.argv into ParseResult object
-args = parser.parse_args()
-
-# check that limit, after, and method all meet syntax requirements
-if args.limit is not None:
-  limit_syntax = re.search('^limit\=\d+$', args.limit)
-  if limit_syntax is not None:
-    raw_params.append(args.limit)
-  
-if args.after is not None:
-  after_syntax = re.search('^after\=\d+$', args.after)
-  if after_syntax is not None:
-    raw_params.append(args.after)
-
-# check that the HTTP method is acceptable  
+# check that method is correct syntax
 method_syntax = re.search('^GET$', args.method)
-# Below for use only when more methods added to Canvas Data API
+
+# For use only when more methods added to Canvas Data API
 #method_syntax = re.search('^(GET|DELETE|HEAD|OPTIONS|POST|PUT)$',
 #              args.method)
 
 # if the method is wrong, the call won't work, exit
 if method_syntax is None:
-# Below for use only when more methods added to Canvas Data API
-#  print('''HTTP method is not valid, must be GET, DELETE, HEAD, OPTIONS,
-#       POST, or PUT. Exiting...''')
   print('HTTP method is not valid, must be GET. Exiting...')
   sys.exit('invalid method.')
 
-# check the endpoint argument and set the API call URL accordingly
-if args.endpoint.lower() == 'dump':
-  api_url = 'https://portal.inshosteddata.com/api/account/self/dump{}{}'
-elif args.endpoint.lower() == 'sync':
-  api_url = 'https://portal.inshosteddata.com/api/account/self/file/sync'
-elif args.endpoint.lower() == 'schema':
-  api_url = 'https://portal.inshosteddata.com/api/schema/latest'
-elif args.endpoint.lower() == 'latest':
-  api_url = 'https://portal.inshosteddata.com/api/account/self/file/latest'
-# TODO: Add byTable endpoint and args
-# elif args.endpoint.lower() == 'bytable':
-
-else:
-  print('''Invalid argument, must be "dump", "latest", "sync", or "schema".
-      Exiting...''')
-  sys.exit('invalid request')
-  
-# check if params set, sort alphabetically, join them, add to end of API call
-if args.endpoint.lower() == 'dump':
-  if len(raw_params) > 0:
-    raw_params.sort()
-    params = '&'.join(raw_params)
-    call_url = api_url.format('?', params)
-  # remove curly braces -- although this doesn't seem to matter...
-  else:
-    call_url = api_url.strip('{}')
-# if not using dump, don't add parameters
-else:
-  call_url = api_url
-  
-###############################################################################
-###################### HMAC Signature Building Section ########################
-###############################################################################
-
-# break the call into components to build HMAC-256 signature
-call_info = list(parse.urlparse(call_url))
+#  print('''HTTP method is not valid, must be GET, DELETE, HEAD, OPTIONS,
+#       POST, or PUT. Exiting...''')
 
-# set components for HMAC-256 signature
-reqOpts = {
-  'method' : args.method.upper(),
-  'host' : call_info[1],
-  # intentionally blank
-  'content_type' : '',
-  # intentionally blank
-  'content_md5' : '',
-  'path' : call_info[2],
-  'parameters' : params,
-  'req_timestamp' : dt_now,
-  'api_secret' : cdata_secret
-  }
+cdata_uri = 'https://portal.inshosteddata.com/api/{}'
+ep_all = ['dump', 'sync', 'schema', 'latest']
 
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-############### DO NOT CHANGE ANYTHING IN THIS SUBSECTION #####################
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+# check the endpoint argument
+endpoints = [args.endpoint.lower()]
 
-# build a bytes message by joining the signature components
-message = bytes('\n'.join(str(x) for x in reqOpts.values()), 'utf-8')
-# change the Canvas Data API secret to bytes
-api_secb = bytes(reqOpts['api_secret'], 'utf-8')
+if len([e for e in endpoints if e not in ep_all]) > 0:
+  print('''Invalid argument, must be "dump", "latest", "sync", or "schema". \
+        Exiting...''')
+  sys.exit('invalid request')
 
-# create an SHA-256 hashed HMAC object, then base 64 encode it
-signed_msg = base64.b64encode(hmac.new(api_secb, message,
-                     digestmod=hashlib.sha256).digest())
-# must be 'decoded'to utf-8 to get rid of byte marks (^,.,^)
-signature = signed_msg.decode('utf-8')
+# add a schema file to sync
+if 'sync' in endpoints:
+  endpoints.append('schema')
 
-# build auth headers from Canvas Data API key, HMAC-256 sig, and timestamp
-auth_headers = { 'Authorization' : 'HMACAuth {}:{}'.format(api_key, signature),
-         'Date' : '{}'.format(dt_now) }
+# TODO: Add byTable endpoint and args
 
 ###############################################################################
 ############################# API Call Generation #############################
 ###############################################################################
 
 # start the API call
-print('Starting Canvas Data {} request...\nTimestamp: {}\n'.format(args.endpoint.lower(), dt_lt))
-start_call = requests.get(call_url, headers=auth_headers)
-call_response = start_call.json()
-
-# initialize loop variables
-fname = ''
-file_path = ''
-flat_file = ''
-dl_url = ''
-sync_files = []
-
-###############################################################################
-########################## CData SYNC Section #################################
-###############################################################################
+for call in endpoints:
+  print('Starting Canvas Data {} request...\nTimestamp: {}\n'.format(
+    call, dt_lt))
+
+# not terribly useful unless schema changes, writes schema to a file
+if 'schema' in endpoints:
+  call_url = cdata_uri.format('schema/latest')
+  reqOpts = hmacsig.HMACopts(call_url, args.method, params, cdata_secret)
+  auth_headers = hmacsig.HMACsig(reqOpts, api_key)
+  start_call = requests.get(call_url, headers=auth_headers)
+  call_response = start_call.json()
+  file_path = join(schema_out, schema_fname.format(call_response['version'].replace('.', '-')))
+  if not isfile(file_path):
+    old_schema = listdir(schema_out)
+    with open(file_path, 'w+') as schema_file:
+      call_json = json.dump(call_response, schema_file, indent=4)
+      schema_file.close()
+    for schema in old_schema:
+      remove(join(schema_out, schema))
+    dt_complete = datetime.now().strftime('%m-%d-%Y_%H%M{}'.format(local_timezone))
+    msg_detail = msg.format(call_response['version'])
+    print('Schema file written, check output directory.\n Completed: {}'.format(dt_complete))
+    schema_notify = candata.notify(subj, body, msg_detail, whonotify)
 
 # main purpose of script -- syncs Canvas Data API files to output dir
-if args.endpoint.lower() == 'sync':
+if 'sync' in endpoints:
+  sync_files = []
+  call_url = cdata_uri.format('account/self/file/sync')
+  reqOpts = hmacsig.HMACopts(call_url, args.method, params, cdata_secret)
+  auth_headers = hmacsig.HMACsig(reqOpts, api_key)
+  start_call = requests.get(call_url, headers=auth_headers)
+  call_response = start_call.json()
   # get filename and download path for each table
   table_manifest = call_response['files']
-  for table in table_manifest:
-    fname = table['filename']
-    file_path = join(out_dir, fname)
-    flat_fname = fname.split('.')[0]
-    print(fname)
-    # add extension if desired, makes no difference but you do you
-    flat_file = file_path.split('.')[0] + fext
-    dl_url = table['url']
-    dl_file = requests.get(dl_url)
-    # is the file a full table or part of a table?
-    print('Partial table? {}'.format(table['partial']))
-    if isfile(file_path):
-      remove(file_path)
-      print('Local file fragment removed: {}'.format(file_path))
-    # delete any zero-length mishap files
-    if isfile(flat_file) and getsize(flat_file) == 0:
-      remove(flat_file)
-    # check if the flat file exists, if not, download it
-    # note that this will skip incomplete files more than 0 KB
-    if isfile(flat_file) and getsize(flat_file) > 0:
-      print('Skipping file: {} -- already exists.\n'.format(flat_fname))
-      sync_files.append(flat_fname)
-      continue
-    # if the thousand other scenarios aren't true, let's write the file!
-    else:
-      with open(file_path, 'wb') as sync:
-        sync.write(dl_file.content)
-        sync.close()
-      print('Downloaded file: {}'.format(fname))
-      # open gz file and dump contents into flat file block by block
-      with gzip.open(file_path, 'rb') as zipped, \
-      open(flat_file, 'wb') as unzipped:
-        while True:
-          block = zipped.read(block_size)
-          if not block:
-            break
-          else:
-            unzipped.write(block)
-        unzipped.write(block)
-        # must explicitly close both files before further manipulation
-        unzipped.close()
-        zipped.close()
-      print('Unzipped file: {}\n'.format(flat_file))
-      # after all is said and done, remove the GZ file
-      remove(file_path)
-      # add the downloaded file to a sync list
-      sync_files.append(flat_fname)
+  sync_files = candata.tablesync(table_manifest, out_dir, block_size)
   # catalog existing files -- to remove unnecessary files later (for sync)
   x_files = listdir(out_dir)
   # create a deletion manifest via list comprehension
-  del_manifest = [f for f in x_files + sync_files if f not in sync_files]
-  del_paths = []
-  # add the output path to file names in deletion manifest
-  for file in del_manifest:
-    del_paths.append(join(out_dir, file))
+  del_manifest = [f for f in x_files if f not in sync_files]
   # remove each file in the deletion manifest to complete the sync
   for file in del_manifest:
     fpath = join(out_dir, file)
@@ -315,13 +192,16 @@
   dt_complete = datetime.now().strftime('%m-%d-%Y_%H%M{}'.format(local_timezone))
   print('Canvas Data synchronized.\nCompleted: {}'.format(dt_complete))
 
-###############################################################################
-########################## CData LATEST Section ###############################
-###############################################################################
-
 # download latest dump (i.e., tables from last 24 hrs), not needed w/ sync
-elif args.endpoint.lower() == 'latest':
+if 'latest' in endpoints:
+  call_url = cdata_uri.format('account/self/file/latest')
+  reqOpts = hmacsig.HMACopts(call_url, args.method, params, cdata_secret)
+  auth_headers = hmacsig.HMACsig(reqOpts, api_key)
+  start_call = requests.get(call_url, headers=auth_headers)
+  call_response = start_call.json()
   table_list = call_response['artifactsByTable']
+
+  # TODO: match with sync to utilize candata.tablesync function
   for table in table_list:
     fname = table_list[table]['files'][0]['filename']
     flat_fname = fname.split('.')[0]
@@ -338,7 +218,7 @@
       print('Skipping file: {} -- already exists.\n'.format(flat_fname))
       continue
     else:
-      with open(file_path, 'wb') as latest:
+      with open(file_path, 'wb+') as latest:
         latest.write(dl_file.content)
         latest.close()
       print('Downloaded file: {}'.format(fname))
@@ -358,25 +238,23 @@
   dt_complete = datetime.now().strftime('%m-%d-%Y_%H%M{}'.format(local_timezone))
   print('Canvas Data latest dump downloaded.\nCompleted: {}'.format(dt_complete))
 
-###############################################################################
-########################## CData INFO Section #################################
-###############################################################################
+# if you need more information on daily dumps
+if 'dump' in endpoints:
+  dump_uri = cdata_uri.format('account/self/dump{}{}')
+
+  if args.limit is not None and args.after is not None:
+    call_url = paramcheck(args.limit, args.after, dump_uri)
+  else:
+    call_url = dump_uri.strip('{}')
+  reqOpts = hmacsig.HMACopts(call_url, args.method, params, cdata_secret)
+  auth_headers = hmacsig.HMACsig(reqOpts, api_key)
+  start_call = requests.get(call_url, headers=auth_headers)
+  call_response = start_call.json()
 
-# if you need more information on daily dumps, writes a JSON file with info       
-elif args.endpoint.lower() == 'dump':
   fname = dump_fname
   file_path = join(out_dir, fname)
-  with open(file_path,'w') as dump_file:
+  with open(file_path,'w+') as dump_file:
     call_json = json.dump(call_response, dump_file, indent=4)
   dt_complete = datetime.now().strftime('%m-%d-%Y_%H%M{}'.format(local_timezone))
   print('''Dump info file written, check output directory.
       \nCompleted: {}'''.format(dt_complete))
-  
-# writes schema to a file, not terribly useful unless schema changes
-elif args.endpoint.lower() == 'schema':
-  fname = schema_fname
-  file_path = join(out_dir, fname)
-  with open(file_path, 'w') as schema_file:
-    call_json = json.dump(call_response, schema_file, indent=4)
-  dt_complete = datetime.now().strftime('%m-%d-%Y_%H%M{}'.format(local_timezone))
-  print('Schema file written, check output directory.\n Completed: {}'.format(dt_complete))
diff --git a/canvas_data/sync_canvas_data/python/canvasfuncs/candata.py b/canvas_data/sync_canvas_data/python/canvasfuncs/candata.py
new file mode 100644
index 0000000..f235f31
--- /dev/null
+++ b/canvas_data/sync_canvas_data/python/canvasfuncs/candata.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+from subprocess import call
+import re
+from os.path import getsize, isfile, join
+import requests
+from os import listdir, remove
+import argparse
+import gzip
+
+def parse(argv=None):
+  # create argument parser to allow for command line arguments
+  parser = argparse.ArgumentParser(description='''Separate Canvas Data API call
+                                               components.''')
+
+  # add arguments to parser, first is positional (must be 1st) and required
+  parser.add_argument('endpoint',
+                      help='''Specify the endpoint of your API call: dump,
+                           sync, latest, or schema.''')
+  parser.add_argument('-l', '--limit',
+                      help='''Syntax is "limit=#", specifies how many records
+                           to return. Only works with dump.''')
+  parser.add_argument('-a', '--after',
+                      help='''Syntax is "after=#", specifies to pull only data
+                      after dump number provided. Only works with dump.''')
+  # this one doesn't matter right now, only method available is GET
+  parser.add_argument('-m', '--method', default='GET',
+                      help='''Future-proofing for possible new methods for
+                           Canvas Data API. Currently, only method is GET.''')
+  # parse args from sys.argv into ParseResult object
+  args = parser.parse_args()
+  return args
+
+def notify(subj, body, msg, whonotify):
+  with open(body, 'w') as email:
+    email.write(msg)
+    email.close()
+  send = 'mutt -s "{}" -- {} < {}'.format(subj, whonotify, body)
+  call(send, shell=True)
+  remove(body)
+
+def paramcheck(limit, after, endpoint):
+  limit_syntax = re.search('^limit\=\d+$', args.limit)
+  if limit_syntax is not None:
+    raw_params.append(args.limit)
+
+  after_syntax = re.search('^after\=\d+$', args.after)
+  if after_syntax is not None:
+    raw_params.append(args.after)
+
+  raw_params.sort()
+  params = '&'.join(raw_params)
+  call_url = dump_uri.format('?', params)
+
+  return call_url
+
+def tablesync(table_manifest, out_dir, block_size):
+  sync_files = []
+  for table in table_manifest:
+    fname = table['filename']
+    file_path = join(out_dir, fname)
+    flat_fname = fname.split('.')[0]
+    print(fname)
+    # add extension if desired, makes no difference but you do you
+    flat_file = file_path.split('.')[0] # + '<file extension>'
+    dl_url = table['url']
+    dl_file = requests.get(dl_url)
+    # is the file a full table or part of a table?
+    print('Partial table? {}'.format(table['partial']))
+    if isfile(file_path):
+      remove(file_path)
+      print('Local file fragment removed: {}'.format(file_path))
+    # delete any zero-length mishap files
+    if isfile(flat_file) and getsize(flat_file) == 0:
+      remove(flat_file)
+    # check if the flat file exists, if not, download it
+    # note that this will skip incomplete files more than 0 KB
+    if isfile(flat_file) and getsize(flat_file) > 0:
+      print('Skipping file: {} -- already exists.\n'.format(flat_fname))
+      sync_files.append(flat_fname)
+      continue
+    # if the thousand other scenarios aren't true, let's write the file!
+    else:
+      with open(file_path, 'wb+') as sync:
+        sync.write(dl_file.content)
+        sync.close()
+      print('Downloaded file: {}'.format(fname))
+      # open gz file and dump contents into flat file block by block
+      with gzip.open(file_path, 'rb') as zipped, \
+      open(flat_file, 'wb+') as unzipped:
+        while True:
+          block = zipped.read(block_size)
+          if not block:
+            break
+          else:
+            unzipped.write(block)
+        unzipped.write(block)
+        # must explicitly close both files before further manipulation
+        unzipped.close()
+        zipped.close()
+      print('Unzipped file: {}\n'.format(flat_file))
+      # after all is said and done, remove the GZ file
+      remove(file_path)
+      # add the downloaded file to a sync list
+      sync_files.append(flat_fname)
+  return sync_files
+
diff --git a/canvas_data/sync_canvas_data/python/canvasfuncs/hmacsig.py b/canvas_data/sync_canvas_data/python/canvasfuncs/hmacsig.py
new file mode 100644
index 0000000..581f399
--- /dev/null
+++ b/canvas_data/sync_canvas_data/python/canvasfuncs/hmacsig.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import base64
+import hashlib
+import hmac
+from datetime import datetime, timezone
+from urllib import parse
+
+def HMACopts(call_url, method, params, cdata_secret):
+
+  ###############################################################################
+  ###################### HMAC Signature Building Section ########################
+  ###############################################################################
+
+  # generate UTC timestamp for HMAC-256 signature
+  dt_now = datetime.now(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
+
+  # break the call into components to build HMAC-256 signature
+  call_info = list(parse.urlparse(call_url))
+
+  # set components for HMAC-256 signature
+  reqOpts = {
+    'method' : method.upper(),
+    'host' : call_info[1],
+    # intentionally blank
+    'content_type' : '',
+    # intentionally blank
+    'content_md5' : '',
+    'path' : call_info[2],
+    'parameters' : params,
+    'req_timestamp' : dt_now,
+    'api_secret' : cdata_secret
+    }
+  return reqOpts
+
+def HMACsig(reqOpts, api_key):
+
+  #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+  ############### DO NOT CHANGE ANYTHING IN THIS SUBSECTION #####################
+  #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+
+  # build a bytes message by joining the signature components
+  message = bytes('\n'.join(str(x) for x in reqOpts.values()), 'utf-8')
+  # change the Canvas Data API secret to bytes
+  api_secb = bytes(reqOpts['api_secret'], 'utf-8')
+
+  # create an SHA-256 hashed HMAC object, then base 64 encode it
+  signed_msg = base64.b64encode(hmac.new(api_secb, message,
+                       digestmod=hashlib.sha256).digest())
+  # must be 'decoded'to utf-8 to get rid of byte marks (^,.,^)
+  signature = signed_msg.decode('utf-8')
+
+  # build auth headers from Canvas Data API key, HMAC-256 sig, and timestamp
+  auth_headers = { 'Authorization' : 'HMACAuth {}:{}'.format(api_key, signature),
+                   'Date' : '{}'.format(reqOpts['req_timestamp']) }
+
+  return auth_headers
+