From 4e9af62148ec072e542561a104c6f131b8319833 Mon Sep 17 00:00:00 2001
From: JakobEliasWagner <jakob@winter-wind.de>
Date: Thu, 13 Jun 2024 01:20:31 +0200
Subject: [PATCH] add documentation

---
 src/continuiti/networks/attention/scaled_dot_product.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/continuiti/networks/attention/scaled_dot_product.py b/src/continuiti/networks/attention/scaled_dot_product.py
index 3385ad34..a0a308bd 100644
--- a/src/continuiti/networks/attention/scaled_dot_product.py
+++ b/src/continuiti/networks/attention/scaled_dot_product.py
@@ -10,6 +10,14 @@
 
 
 class ScaledDotProduct(Attention):
+    """Scaled dot product attention module.
+
+    This module is a wrapper for the torch implementation of the scaled dot product attention mechanism as described in
+    the paper "Attention Is All You Need" by Vaswani et al. (2017). This attention mechanism computes the attention
+     weights based on the dot product of the query and key matrices, scaled by the square root of the dimension of the
+     key vectors. The weights are then applied to the value vectors to obtain the final output.
+    """
+
     def __init__(self, dropout_p: float = 0.0):
         super().__init__(dropout_p)