apluslms · ihalaij1 · Feb 13, 2024 · Feb 8, 2024 · Feb 9, 2024
diff --git a/data/models.py b/data/models.py
@@ -198,25 +198,44 @@ def submissions_max_similarity_json(self):
 
     def top_comparisons(self, rows):
         max_list = (self.valid_matched_submissions
-                .values('student__id')
-                .annotate(m=models.Max('max_similarity'))
-                .order_by('-m')[:rows])
-        return self._comparisons_by_submission(
+                    .values('student__id')
+                    .annotate(m=models.Max('max_similarity'))
+                    .order_by('-m')[:rows])
+
+        compared_list = self._comparisons_by_submission(
             self.valid_matched_submissions
             .filter(student__id=each['student__id'])
             .order_by('-max_similarity')
             .first().id
             for each in max_list
         )
 
+        # Filter the comparisons such that only unique ones are maintained, while identical ones are removed.
+        # Done using Python sets which cannot have duplicate values.
+        unique_set = set()
+
+        for comparison_row in compared_list:
+            unique_set.update(comparison_row["matches"])
+
+        sorted_unique_set = sorted(unique_set, key=lambda comparison: comparison.similarity, reverse=True)
+
+        return sorted_unique_set
+
     def comparisons_for_student(self, student):
-        return self._comparisons_by_submission(
-            self.valid_matched_submissions\
-                .filter(student=student)\
-                .order_by("created")\
+        student_list = self._comparisons_by_submission(
+            self.valid_matched_submissions
+                .filter(student=student)
+                .order_by("created")
                 .values_list("id", flat=True)
         )
 
+        unique_set = set()
+
+        for student in student_list:
+            unique_set.update(student["matches"])
+
+        return unique_set
+
     def _comparisons_by_submission(self, submissions):
         comparisons = []
         for s_id in submissions:

diff --git a/data/tests.py b/data/tests.py
@@ -1,90 +1,39 @@
-import logging
-logging.disable(logging.CRITICAL)
-
-from django.conf import settings
 from django.test import TestCase
+import random
 
-from data.models import Course, Submission, Comparison, Exercise
-from matcher import matcher
-from radar.config import named_function
+from data.models import Course, Submission, Comparison, Student
+from aplus_client.django.models import ApiNamespace
 
 
-TOKENS1 = "abcdefghi" # total 9, authored 4, longest 4
-TOKENS2 = "abcxxxxxxxxxfgxab" # total 17, authored 4, longest 4
-TEMPLATE = "abcdexxxx"
+# Test for exercise view table generation
+class TestExerciseTable(TestCase):
+    def test_run_ex_table(self):
+        site = ApiNamespace(600)
+        site.save()
 
-class MatcherTestCase(TestCase):
+        course = Course(id=600, api_id=600, namespace_id=600)
+        course.save()
 
-    def test_algorithm(self):
-        for function_def in settings.MATCH_ALGORITHMS.values():
-            if function_def["callable"] is None:
-                continue
-            f = named_function(function_def["callable"])
-            a = TOKENS1
-            b = TOKENS2
-            ms = f(a, [ False ] * len(a), b, [ False ] * len(b), 2)
-            self.assertEqual(len(ms.store), 2)
-            self.assertEqual(ms.store[0].a, 0)
-            self.assertEqual(ms.store[0].b, 0)
-            self.assertEqual(ms.store[0].length, 3)
-            self.assertEqual(ms.store[1].a, 5)
-            self.assertEqual(ms.store[1].b, 12)
-            self.assertEqual(ms.store[1].length, 2)
+        exercise = course.get_exercise("TestCourse")
 
-    def test_submission(self):
-        self._create_test_course()
-        for submission in Submission.objects.filter(matched=False).order_by("student__key"):
-            matcher.match(submission)
-        s = Submission.objects.get(student__key="001")
-        self.assertEqual(s.authored_token_count, 9)
-        self.assertEqual(s.longest_authored_tile, 9)
-        s = Submission.objects.get(student__key="002")
-        self.assertEqual(s.authored_token_count, 17)
-        self.assertEqual(s.longest_authored_tile, 17)
-        self.assertEqual(Comparison.objects.all().count(), 3)
-        cts = Comparison.objects.filter(submission_b__isnull=True)
-        self.assertEqual(len(cts), 2)
-        self.assertAlmostEqual(cts[0].similarity, 0.0, 1)
-        self.assertAlmostEqual(cts[1].similarity, 0.0, 1)
-        self.assertEqual(cts[0].matches_json, "[]")
-        c = Comparison.objects.exclude(submission_b__isnull=True).first()
-        self.assertAlmostEqual(c.similarity, 9 / 26, 1)
-        self.assertEqual(c.matches_json, "[[0,0,3],[12,5,2]]")
+        comparison_set = []
 
-    def test_template(self):
-        self._create_test_course()
-        exercise = Exercise.objects.all().first()
-        exercise.template_tokens = TEMPLATE
-        exercise.save()
-        for submission in Submission.objects.filter(matched=False).order_by("student__key"):
-            matcher.match(submission)
-        s = Submission.objects.get(student__key="001")
-        self.assertEqual(s.authored_token_count, 4)
-        self.assertEqual(s.longest_authored_tile, 4)
-        s = Submission.objects.get(student__key="002")
-        self.assertEqual(s.authored_token_count, 10)
-        self.assertEqual(s.longest_authored_tile, 10, "Submission with tokens {} should have longest authored tile {}".format(s.tokens, 10))
-        self.assertEqual(Comparison.objects.all().count(), 3)
-        cts = Comparison.objects.filter(submission_b__isnull=True).order_by("submission_a")
-        self.assertEqual(len(cts), 2)
-        self.assertAlmostEqual(cts[0].similarity, 5 / 9, 1)
-        self.assertAlmostEqual(cts[1].similarity, 7 / 17, 1)
-        self.assertEqual(cts[0].matches_json, "[[0,0,5]]")
-        self.assertEqual(cts[1].matches_json, "[[0,0,3],[3,5,4]]")
-        c = Comparison.objects.exclude(submission_b__isnull=True).first()
-        self.assertAlmostEqual(c.similarity, 4 / 14, 1)
-        self.assertEqual(c.matches_json, "[[12,5,2]]")
+        for i in range(50):
+            student_a = Student(key=i+1000, course=course)
+            student_b = Student(key=i+2000, course=course)
+            submission_a = Submission(key=i+1000, exercise=exercise, student=student_a, matched=True)
+            submission_b = Submission(key=i+2000, exercise=exercise, student=student_b, matched=True)
 
-    def _create_test_course(self):
-        course = Course(key="test", name="Test", provider="filesystem", tokenizer="scala", minimum_match_tokens=2, api_id="0", namespace_id="0")
-        course.save()
-        exercise = course.get_exercise("1")
-        student1 = course.get_student("001")
-        student2 = course.get_student("002")
-        submissions = [
-            Submission(key="1", exercise=exercise, student=student1, tokens=TOKENS1, indexes_json="[]"),
-            Submission(key="2", exercise=exercise, student=student2, tokens=TOKENS2, indexes_json="[]"),
-        ]
-        for s in submissions:
-            s.save()
+            comparison = Comparison(submission_a=submission_a, submission_b=submission_b, similarity=random.random())
+
+            comparison_set.append(comparison)
+
+            student_a.save()
+            student_b.save()
+            submission_a.save()
+            submission_b.save()
+            comparison.save()
+
+        sorted_comparison_set = sorted(set(comparison_set), key=lambda comparison: comparison.similarity, reverse=True)
 
+        self.assertQuerySetEqual(sorted_comparison_set, exercise.top_comparisons(100))
diff --git a/matcher/tests.py b/matcher/tests.py
@@ -1,76 +1,40 @@
-import logging
-import random
-import string
-import time
-logging.disable(logging.CRITICAL)
-
 from django.test import TestCase
-from django.conf import settings
-from django.utils.module_loading import import_string
-
-match_algorithm = import_string(settings.MATCH_ALGORITHMS["jplag_ext"]["callable"])
+from data.models import Student, Course, Submission
+from matcher import tasks
+from aplus_client.django.models import ApiNamespace
 
-def random_char():
-    return random.choice(string.printable)
+TOKENS1 = "ABCD, Testing"
+TOKENS2 = "123123 Test"
 
-def random_string(size):
-    return ''.join(random_char() for _ in range(size))
 
-def random_string_copy(string, copy_pr):
-    # note that copy_pr == 0 does not guarantee that the randomly drawn char does not happen to be equal to c
-    return ''.join((random_char() if copy_pr < random.random() else c) for c in string)
+# Test for matcher calls
+class TestMatcher(TestCase):
 
-def generate_data(a_size, b_size, similarity_p):
-    tokens_a = random_string(a_size)
-    tokens_b = random_string_copy(tokens_a, similarity_p)[:b_size]
-    return (tokens_a, len(tokens_a)*[False], tokens_b, len(tokens_b)*[False], 15)
+    # Test matcher to see that submissions are matched and comparison objects are created
+    def test_run_match_exercise(self):
+        site = ApiNamespace(600)
+        site.save()
 
+        course = Course(id=600, api_id=600, namespace_id=600)
+        course.save()
 
-class TestBenchmark(TestCase):
-    """For the match algorithm specified in the settings module, run benchmark tests with random data and assert that the amount of successful iterations is large enough"""
+        exercise = course.get_exercise("TestCourse")
 
-    def benchmark(self, match_args, min_iterations=10):
-        timeout_seconds = 0.5
-        iterations = 0
-        total_time = 0
-        while total_time < timeout_seconds:
-            start_time = time.perf_counter()
-            match_algorithm(*match_args)
-            end_time = time.perf_counter()
-            total_time += end_time - start_time
-            iterations += 1
-        self.assertGreater(iterations, min_iterations,
-                "Expected match algorithm {0!r} to compute its result at least {1} times in {2} seconds but it managed only {3} iterations before {2} second timeout."
-                .format(match_algorithm, min_iterations, timeout_seconds, iterations))
+        student_a = Student(key=3000, course=course)
+        student_b = Student(key=4000, course=course)
+        submission_a = Submission(key=3000, exercise=exercise, student=student_a, matched=False, tokens=TOKENS1)
+        submission_b = Submission(key=4000, exercise=exercise, student=student_b, matched=False, tokens=TOKENS2)
 
-    def test_a1_very_unlikely_equal_tiny(self):
-        self.benchmark(generate_data(100, 100, 0))
-    def test_a2_unlikely_equal_tiny(self):
-        self.benchmark(generate_data(100, 100, 0.25))
-    def test_a3_likely_equal_tiny(self):
-        self.benchmark(generate_data(100, 100, 0.75))
-    def test_a4_very_likely_equal_tiny(self):
-        self.benchmark(generate_data(100, 100, 1))
+        student_a.save()
+        student_b.save()
+        submission_a.save()
+        submission_b.save()
 
-    def test_b1_very_unlikely_equal_average(self):
-        self.benchmark(generate_data(500, 500, 0))
-    def test_b2_unlikely_equal_average(self):
-        self.benchmark(generate_data(500, 500, 0.25))
-    def test_b3_likely_equal_average(self):
-        self.benchmark(generate_data(500, 500, 0.75))
-    def test_b4_very_likely_equal_average(self):
-        self.benchmark(generate_data(500, 500, 1))
+        exercise.touch_all_timestamps()
 
-    def test_c1_very_unlikely_equal_large(self):
-        self.benchmark(generate_data(1000, 1000, 0))
-    def test_c2_unlikely_equal_large(self):
-        self.benchmark(generate_data(1000, 1000, 0.25))
-    def test_c3_likely_equal_large(self):
-        self.benchmark(generate_data(1000, 1000, 0.75))
-    def test_c4_very_likely_equal_large(self):
-        self.benchmark(generate_data(1000, 1000, 1))
+        tasks.match_exercise(exercise.pk, delay=False)
 
+        self.assertTrue(submission_a in exercise.valid_matched_submissions)
+        self.assertTrue(submission_b in exercise.valid_matched_submissions)
 
-class TestMatcherState(TestCase):
-    """Attempt to cover as many failure states as possible when calling matcher.match with some submission object."""
-    pass
+# TODO: Create more tests here
diff --git a/review/templates/review/_comparisontable.html b/review/templates/review/_comparisontable.html
diff --git a/review/templates/review/_comparisontable_unique.html b/review/templates/review/_comparisontable_unique.html
@@ -0,0 +1,11 @@
+{% load review %}
+<table class="comparison">
+	<tr>
+		{% for comparison in comparisons %}
+			<td>{% student_td course comparison %}</td>
+			{% if forloop.counter|divisibleby:10 %}
+				</tr><tr>
+			{% endif %}
+		{% endfor %}
+	</tr>
+</table>
diff --git a/review/templates/review/comparison.html b/review/templates/review/comparison.html
@@ -17,7 +17,7 @@
 
 <button type="button"><a href="{% url 'pair_view' course_key=course.key a_key=a.student.key b_key=b.student.key %}">See all comparisons for this pair of students</a></button>
 
-{% include 'review/_comparisontable.html' %}
+{% include 'review/_comparisontable_unique.html' %}
 
 <div class="code-comparison"{% if reverse %} data-reverse{% endif %}>
 	<p>Similarity: <b>{{ comparison.similarity|percent }}</b></p>

diff --git a/review/templates/review/exercise.html b/review/templates/review/exercise.html
@@ -16,7 +16,7 @@ <h4>Comparison pairs with highest similarity</h4>
 <a href="{% url 'exercise' course_key=exercise.course.key exercise_key=exercise.key %}?rows=100" class="btn btn-default btn-xs"}">
     Show 100 rows
 </a>
-{% include 'review/_comparisontable.html' %}
+{% include 'review/_comparisontable_unique.html' %}
 
 <pre id="js" class="well">
 Waiting for Javascript...