Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust table with student comparisons #46

Merged
merged 2 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,25 +198,44 @@ def submissions_max_similarity_json(self):

def top_comparisons(self, rows):
max_list = (self.valid_matched_submissions
.values('student__id')
.annotate(m=models.Max('max_similarity'))
.order_by('-m')[:rows])
return self._comparisons_by_submission(
.values('student__id')
.annotate(m=models.Max('max_similarity'))
.order_by('-m')[:rows])

compared_list = self._comparisons_by_submission(
self.valid_matched_submissions
.filter(student__id=each['student__id'])
.order_by('-max_similarity')
.first().id
for each in max_list
)

# Filter the comparisons such that only unique ones are maintained, while identical ones are removed.
# Done using Python sets which cannot have duplicate values.
unique_set = set()

for comparison_row in compared_list:
unique_set.update(comparison_row["matches"])

sorted_unique_set = sorted(unique_set, key=lambda comparison: comparison.similarity, reverse=True)

return sorted_unique_set

def comparisons_for_student(self, student):
return self._comparisons_by_submission(
self.valid_matched_submissions\
.filter(student=student)\
.order_by("created")\
student_list = self._comparisons_by_submission(
self.valid_matched_submissions
.filter(student=student)
.order_by("created")
.values_list("id", flat=True)
)

unique_set = set()

for student in student_list:
unique_set.update(student["matches"])

return unique_set

def _comparisons_by_submission(self, submissions):
comparisons = []
for s_id in submissions:
Expand Down
109 changes: 29 additions & 80 deletions data/tests.py
Original file line number Diff line number Diff line change
@@ -1,90 +1,39 @@
import logging
logging.disable(logging.CRITICAL)

from django.conf import settings
from django.test import TestCase
import random

from data.models import Course, Submission, Comparison, Exercise
from matcher import matcher
from radar.config import named_function
from data.models import Course, Submission, Comparison, Student
from aplus_client.django.models import ApiNamespace


TOKENS1 = "abcdefghi" # total 9, authored 4, longest 4
TOKENS2 = "abcxxxxxxxxxfgxab" # total 17, authored 4, longest 4
TEMPLATE = "abcdexxxx"
# Test for exercise view table generation
class TestExerciseTable(TestCase):
def test_run_ex_table(self):
site = ApiNamespace(600)
site.save()

class MatcherTestCase(TestCase):
course = Course(id=600, api_id=600, namespace_id=600)
course.save()

def test_algorithm(self):
for function_def in settings.MATCH_ALGORITHMS.values():
if function_def["callable"] is None:
continue
f = named_function(function_def["callable"])
a = TOKENS1
b = TOKENS2
ms = f(a, [ False ] * len(a), b, [ False ] * len(b), 2)
self.assertEqual(len(ms.store), 2)
self.assertEqual(ms.store[0].a, 0)
self.assertEqual(ms.store[0].b, 0)
self.assertEqual(ms.store[0].length, 3)
self.assertEqual(ms.store[1].a, 5)
self.assertEqual(ms.store[1].b, 12)
self.assertEqual(ms.store[1].length, 2)
exercise = course.get_exercise("TestCourse")

def test_submission(self):
self._create_test_course()
for submission in Submission.objects.filter(matched=False).order_by("student__key"):
matcher.match(submission)
s = Submission.objects.get(student__key="001")
self.assertEqual(s.authored_token_count, 9)
self.assertEqual(s.longest_authored_tile, 9)
s = Submission.objects.get(student__key="002")
self.assertEqual(s.authored_token_count, 17)
self.assertEqual(s.longest_authored_tile, 17)
self.assertEqual(Comparison.objects.all().count(), 3)
cts = Comparison.objects.filter(submission_b__isnull=True)
self.assertEqual(len(cts), 2)
self.assertAlmostEqual(cts[0].similarity, 0.0, 1)
self.assertAlmostEqual(cts[1].similarity, 0.0, 1)
self.assertEqual(cts[0].matches_json, "[]")
c = Comparison.objects.exclude(submission_b__isnull=True).first()
self.assertAlmostEqual(c.similarity, 9 / 26, 1)
self.assertEqual(c.matches_json, "[[0,0,3],[12,5,2]]")
comparison_set = []

def test_template(self):
self._create_test_course()
exercise = Exercise.objects.all().first()
exercise.template_tokens = TEMPLATE
exercise.save()
for submission in Submission.objects.filter(matched=False).order_by("student__key"):
matcher.match(submission)
s = Submission.objects.get(student__key="001")
self.assertEqual(s.authored_token_count, 4)
self.assertEqual(s.longest_authored_tile, 4)
s = Submission.objects.get(student__key="002")
self.assertEqual(s.authored_token_count, 10)
self.assertEqual(s.longest_authored_tile, 10, "Submission with tokens {} should have longest authored tile {}".format(s.tokens, 10))
self.assertEqual(Comparison.objects.all().count(), 3)
cts = Comparison.objects.filter(submission_b__isnull=True).order_by("submission_a")
self.assertEqual(len(cts), 2)
self.assertAlmostEqual(cts[0].similarity, 5 / 9, 1)
self.assertAlmostEqual(cts[1].similarity, 7 / 17, 1)
self.assertEqual(cts[0].matches_json, "[[0,0,5]]")
self.assertEqual(cts[1].matches_json, "[[0,0,3],[3,5,4]]")
c = Comparison.objects.exclude(submission_b__isnull=True).first()
self.assertAlmostEqual(c.similarity, 4 / 14, 1)
self.assertEqual(c.matches_json, "[[12,5,2]]")
for i in range(50):
student_a = Student(key=i+1000, course=course)
student_b = Student(key=i+2000, course=course)
submission_a = Submission(key=i+1000, exercise=exercise, student=student_a, matched=True)
submission_b = Submission(key=i+2000, exercise=exercise, student=student_b, matched=True)

def _create_test_course(self):
course = Course(key="test", name="Test", provider="filesystem", tokenizer="scala", minimum_match_tokens=2, api_id="0", namespace_id="0")
course.save()
exercise = course.get_exercise("1")
student1 = course.get_student("001")
student2 = course.get_student("002")
submissions = [
Submission(key="1", exercise=exercise, student=student1, tokens=TOKENS1, indexes_json="[]"),
Submission(key="2", exercise=exercise, student=student2, tokens=TOKENS2, indexes_json="[]"),
]
for s in submissions:
s.save()
comparison = Comparison(submission_a=submission_a, submission_b=submission_b, similarity=random.random())

comparison_set.append(comparison)

student_a.save()
student_b.save()
submission_a.save()
submission_b.save()
comparison.save()

sorted_comparison_set = sorted(set(comparison_set), key=lambda comparison: comparison.similarity, reverse=True)

self.assertQuerySetEqual(sorted_comparison_set, exercise.top_comparisons(100))
90 changes: 27 additions & 63 deletions matcher/tests.py
Original file line number Diff line number Diff line change
@@ -1,76 +1,40 @@
import logging
import random
import string
import time
logging.disable(logging.CRITICAL)

from django.test import TestCase
from django.conf import settings
from django.utils.module_loading import import_string

match_algorithm = import_string(settings.MATCH_ALGORITHMS["jplag_ext"]["callable"])
from data.models import Student, Course, Submission
from matcher import tasks
from aplus_client.django.models import ApiNamespace

def random_char():
return random.choice(string.printable)
TOKENS1 = "ABCD, Testing"
TOKENS2 = "123123 Test"

def random_string(size):
return ''.join(random_char() for _ in range(size))

def random_string_copy(string, copy_pr):
# note that copy_pr == 0 does not guarantee that the randomly drawn char does not happen to be equal to c
return ''.join((random_char() if copy_pr < random.random() else c) for c in string)
# Test for matcher calls
class TestMatcher(TestCase):

def generate_data(a_size, b_size, similarity_p):
tokens_a = random_string(a_size)
tokens_b = random_string_copy(tokens_a, similarity_p)[:b_size]
return (tokens_a, len(tokens_a)*[False], tokens_b, len(tokens_b)*[False], 15)
# Test matcher to see that submissions are matched and comparison objects are created
def test_run_match_exercise(self):
site = ApiNamespace(600)
site.save()

course = Course(id=600, api_id=600, namespace_id=600)
course.save()

class TestBenchmark(TestCase):
"""For the match algorithm specified in the settings module, run benchmark tests with random data and assert that the amount of successful iterations is large enough"""
exercise = course.get_exercise("TestCourse")

def benchmark(self, match_args, min_iterations=10):
timeout_seconds = 0.5
iterations = 0
total_time = 0
while total_time < timeout_seconds:
start_time = time.perf_counter()
match_algorithm(*match_args)
end_time = time.perf_counter()
total_time += end_time - start_time
iterations += 1
self.assertGreater(iterations, min_iterations,
"Expected match algorithm {0!r} to compute its result at least {1} times in {2} seconds but it managed only {3} iterations before {2} second timeout."
.format(match_algorithm, min_iterations, timeout_seconds, iterations))
student_a = Student(key=3000, course=course)
student_b = Student(key=4000, course=course)
submission_a = Submission(key=3000, exercise=exercise, student=student_a, matched=False, tokens=TOKENS1)
submission_b = Submission(key=4000, exercise=exercise, student=student_b, matched=False, tokens=TOKENS2)

def test_a1_very_unlikely_equal_tiny(self):
self.benchmark(generate_data(100, 100, 0))
def test_a2_unlikely_equal_tiny(self):
self.benchmark(generate_data(100, 100, 0.25))
def test_a3_likely_equal_tiny(self):
self.benchmark(generate_data(100, 100, 0.75))
def test_a4_very_likely_equal_tiny(self):
self.benchmark(generate_data(100, 100, 1))
student_a.save()
student_b.save()
submission_a.save()
submission_b.save()

def test_b1_very_unlikely_equal_average(self):
self.benchmark(generate_data(500, 500, 0))
def test_b2_unlikely_equal_average(self):
self.benchmark(generate_data(500, 500, 0.25))
def test_b3_likely_equal_average(self):
self.benchmark(generate_data(500, 500, 0.75))
def test_b4_very_likely_equal_average(self):
self.benchmark(generate_data(500, 500, 1))
exercise.touch_all_timestamps()

def test_c1_very_unlikely_equal_large(self):
self.benchmark(generate_data(1000, 1000, 0))
def test_c2_unlikely_equal_large(self):
self.benchmark(generate_data(1000, 1000, 0.25))
def test_c3_likely_equal_large(self):
self.benchmark(generate_data(1000, 1000, 0.75))
def test_c4_very_likely_equal_large(self):
self.benchmark(generate_data(1000, 1000, 1))
tasks.match_exercise(exercise.pk, delay=False)

self.assertTrue(submission_a in exercise.valid_matched_submissions)
self.assertTrue(submission_b in exercise.valid_matched_submissions)

class TestMatcherState(TestCase):
"""Attempt to cover as many failure states as possible when calling matcher.match with some submission object."""
pass
# TODO: Create more tests here
24 changes: 0 additions & 24 deletions review/templates/review/_comparisontable.html

This file was deleted.

11 changes: 11 additions & 0 deletions review/templates/review/_comparisontable_unique.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{% load review %}
<table class="comparison">
<tr>
{% for comparison in comparisons %}
<td>{% student_td course comparison %}</td>
{% if forloop.counter|divisibleby:10 %}
</tr><tr>
{% endif %}
{% endfor %}
</tr>
</table>
2 changes: 1 addition & 1 deletion review/templates/review/comparison.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

<button type="button"><a href="{% url 'pair_view' course_key=course.key a_key=a.student.key b_key=b.student.key %}">See all comparisons for this pair of students</a></button>

{% include 'review/_comparisontable.html' %}
{% include 'review/_comparisontable_unique.html' %}

<div class="code-comparison"{% if reverse %} data-reverse{% endif %}>
<p>Similarity: <b>{{ comparison.similarity|percent }}</b></p>
Expand Down
2 changes: 1 addition & 1 deletion review/templates/review/exercise.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ <h4>Comparison pairs with highest similarity</h4>
<a href="{% url 'exercise' course_key=exercise.course.key exercise_key=exercise.key %}?rows=100" class="btn btn-default btn-xs"}">
Show 100 rows
</a>
{% include 'review/_comparisontable.html' %}
{% include 'review/_comparisontable_unique.html' %}

<pre id="js" class="well">
Waiting for Javascript...
Expand Down