diff --git a/README.md b/README.md
index ffa4135..2810e01 100644
--- a/README.md
+++ b/README.md
@@ -178,6 +178,25 @@ asreview data dedup synergy:van_de_schoot_2018 -o van_de_schoot_2018_dedup.csv
 Removed 104 records from dataset with 6189 records.
 ```
 
+### Add missing DOIs
+
+Add missing DOIs to a dataset. This tool uses the [Crossref API](https://www.crossref.org/) to find missing DOIs. The tool requires a column with titles in the dataset. The output file will contain the original dataset with the added DOIs, or `None` when the DOI was not found.
+
+```bash
+asreview data doi MY_DATASET.csv
+```
+
+Export the dataset with added DOIs to a file (`output.csv`)
+
+```bash
+asreview data doi MY_DATASET.csv -o output.csv
+```
+
+By default, the tool uses a delay of 750 milliseconds between requests and a similarity threshold of 0.95 for deduplication. The tool offers the option to use a different delay and similarity threshold. The tool also offers the option to use a more strict similarity for deduplication and print verbose output.
+
+```bash
+asreview data doi MY_DATASET.csv -o output.csv --delay 1000 --threshold 0.9 --strict_similarity --verbose
+```
 
 ### Data Vstack (Experimental)
 
@@ -186,7 +205,7 @@ Vertical stacking: combine as many datasets in the same file format as you want
 ❗ Vstack is an experimental feature. We would love to hear your feedback.
 Please keep in mind that this feature can change in the future.
 
-Stack several datasets on top of each other: 
+Stack several datasets on top of each other:
 ```
 asreview data vstack output.csv MY_DATASET_1.csv MY_DATASET_2.csv MY_DATASET_3.csv
 ```
@@ -206,7 +225,7 @@ Compose is where datasets containing records with different labels (or no
 labels) can be assembled into a single dataset.
 
 ❗ Compose is an experimental feature. We would love to hear your feedback.
-Please keep in mind that this feature can change in the future. 
+Please keep in mind that this feature can change in the future.
 
 Overview of possible input files and corresponding properties, use at least
 one of the following arguments:
@@ -231,7 +250,7 @@ case of conflicts, use the `--conflict_resolve`/`-c` flag. This is set to
 | Resolve method | Action in case of conflict                                                              |
 |----------------|-----------------------------------------------------------------------------------------|
 | `keep_one`     | Keep one label, using `--hierarchy` to determine which label to keep                    |
-| `keep_all`     | Keep conflicting records as duplicates in the composed dataset (ignoring `--hierarchy`) | 
+| `keep_all`     | Keep conflicting records as duplicates in the composed dataset (ignoring `--hierarchy`) |
 | `abort`        | Abort                                                                                   |
 
 
diff --git a/asreviewcontrib/datatools/doi.py b/asreviewcontrib/datatools/doi.py
new file mode 100644
index 0000000..e50ab27
--- /dev/null
+++ b/asreviewcontrib/datatools/doi.py
@@ -0,0 +1,162 @@
+import re
+from difflib import SequenceMatcher
+from random import random
+from time import sleep
+from typing import Any
+from urllib.parse import quote
+
+import ftfy
+import pandas as pd
+import requests
+from asreview import ASReviewData
+from requests.exceptions import ConnectTimeout
+from requests.exceptions import HTTPError
+from tqdm import tqdm
+
+_SPACES_REGEX = re.compile(r'\s+')
+_SYMBOLS_REGEX = re.compile(r'[^ \w\d\-_]')
+_SEQ_MATCHER = SequenceMatcher()
+
+
+def _fetch_doi(
+        title: str,
+        authors: None | str = None,
+        verbose: bool = False,
+        ) -> None | dict[str, Any]:
+    # https://www.crossref.org/documentation/retrieve-metadata/xml-api/retrieving-dois-by-title/
+    if authors is None:
+        url = f"https://api.crossref.org/works?rows=1&query.title={title}" \
+              "&select=title,DOI"
+    else:
+        url = f"https://api.crossref.org/works?rows=1&query.title={title}" \
+              "&select=title,DOI,author" \
+              f"&query.bibliographic={quote(authors, safe='')}"
+
+    response = requests.get(url)
+    try:
+        response.raise_for_status()
+
+    except ConnectTimeout as e:
+        if verbose:
+            tqdm.write(f'Timeout for {title}. Wait for 30s and try again.\n{e}')
+
+        raise e
+
+    except HTTPError:
+        if authors is None:
+            if verbose:
+                tqdm.write(f'Could not fetch doi for {title}')
+
+            return None
+
+        url = f"https://api.crossref.org/works?rows=1&query.title={title}" \
+                "&select=title,DOI"
+
+        response = requests.get(url)
+        try:
+            response.raise_for_status()
+
+        except ConnectTimeout as e:
+            if verbose:
+                tqdm.write(f'Timeout for {title}. Wait for 30s and try again.\n{e}')
+
+            raise e
+
+        except HTTPError:
+            if verbose:
+                tqdm.write(f'Could not fetch doi for {title}')
+
+            return None
+
+    return response.json()
+
+
+def _confirm_doi_title(
+        title: str,
+        title_from_api: str,
+        data: dict[str, Any],
+        similarity: float,
+        strict_similarity: bool,
+        verbose: bool,
+        ) -> None | str:
+    clean_title = _SYMBOLS_REGEX.sub('', title.lower())
+    clean_title = _SPACES_REGEX.sub(' ', clean_title)
+
+    clean_title_from_api = _SYMBOLS_REGEX.sub('', title_from_api.lower())
+    clean_title_from_api = _SPACES_REGEX.sub(' ', clean_title_from_api)
+
+    _SEQ_MATCHER.set_seq1(clean_title)
+    _SEQ_MATCHER.set_seq2(clean_title_from_api)
+
+    if _SEQ_MATCHER.real_quick_ratio() > similarity and \
+        _SEQ_MATCHER.quick_ratio() > similarity and \
+        (not strict_similarity or _SEQ_MATCHER.ratio() > similarity):
+
+        doi = data['message']['items'][0]['DOI']
+
+        if verbose:
+            tqdm.write(f'Doi found for {title}: {doi}')
+
+        return doi
+
+    if verbose:
+        tqdm.write(f'No doi found for {title}')
+
+    return None
+
+
+def find_dois(
+        asdata: ASReviewData,
+        delay: int = 750,
+        similarity: float = 0.95,
+        strict_similarity: bool = False,
+        verbose: bool = False) -> int:
+    titles = asdata.df['title'].apply(ftfy.fix_text).str.strip()
+
+    if 'authors' in asdata.df.columns:
+        authors = asdata.df['authors'].apply(ftfy.fix_text).str.strip()
+    else:
+        authors = None
+
+    delay_in_seconds = delay / 1000
+    dois = []
+
+    for i, title in enumerate(tqdm(titles, desc="Finding DOIs")):
+        if 'authors' in asdata.df.columns:
+            data = _fetch_doi(title, authors[i], verbose)
+        else:
+            data = _fetch_doi(title, None, verbose)
+
+        if data is None:
+            dois.append(None)
+            continue
+
+        try:
+            title_from_api = ftfy.fix_text(data['message']['items'][0]['title'][0])
+
+        except IndexError:
+            if verbose:
+                tqdm.write(f'No doi found for {title}')
+
+            dois.append(None)
+            continue
+
+        doi = _confirm_doi_title(
+            title,
+            title_from_api,
+            data,
+            similarity,
+            strict_similarity,
+            verbose,
+            )
+
+        dois.append(doi)
+
+        # sleep for delay_in_seconds + random to avoid overloading with requests
+        sleep(delay_in_seconds + random())
+
+    # if 'doi' column already exists, merge the dois, giving preference to the old ones
+    if 'doi' in asdata.df.columns:
+        asdata.df['doi'] = asdata.df['doi'].combine_first(pd.Series(dois))
+    else:
+        asdata.df['doi'] = dois
diff --git a/asreviewcontrib/datatools/entrypoint.py b/asreviewcontrib/datatools/entrypoint.py
index 647bc6a..f3759d2 100644
--- a/asreviewcontrib/datatools/entrypoint.py
+++ b/asreviewcontrib/datatools/entrypoint.py
@@ -10,6 +10,7 @@
 from asreviewcontrib.datatools.convert import convert
 from asreviewcontrib.datatools.describe import _parse_arguments_describe
 from asreviewcontrib.datatools.describe import describe
+from asreviewcontrib.datatools.doi import find_dois
 from asreviewcontrib.datatools.sample import _parse_arguments_sample
 from asreviewcontrib.datatools.sample import sample
 from asreviewcontrib.datatools.snowball import _parse_arguments_snowball
@@ -17,7 +18,7 @@
 from asreviewcontrib.datatools.stack import _parse_arguments_vstack
 from asreviewcontrib.datatools.stack import vstack
 
-DATATOOLS = ["describe", "dedup", "convert", "compose", "vstack", "snowball", "sample"]
+DATATOOLS = ["describe", "dedup", "doi", "convert", "compose", "vstack", "snowball", "sample"]
 
 
 class DataEntryPoint(BaseEntryPoint):
@@ -89,6 +90,77 @@ def execute(self, argv):
                         f"Found {n_dup} duplicates in dataset with"
                         f" {initial_length} records."
                     )
+            if argv[0] == "doi":
+                doi_parser = argparse.ArgumentParser(prog="asreview data doi")
+                doi_parser.add_argument(
+                    "input_path", type=str, help="The file path of the dataset."
+                )
+                doi_parser.add_argument(
+                    "--output_path",
+                    "-o",
+                    default=None,
+                    type=str,
+                    help="The file path of the dataset.",
+                )
+                doi_parser.add_argument(
+                    "--delay",
+                    default=750,
+                    type=int,
+                    help="Delay between requests in milliseconds. Default: 750.",
+                )
+                doi_parser.add_argument(
+                    "--threshold",
+                    default=0.95,
+                    type=float,
+                    help="Similarity threshold for deduplication. Default: 0.95.",
+                )
+                doi_parser.add_argument(
+                    "--strict_similarity",
+                    action='store_true',
+                    help="Use a more strict similarity for deduplication.",
+                )
+                doi_parser.add_argument(
+                    "--verbose",
+                    action='store_true',
+                    help="Print verbose output.",
+                )
+
+                args_doi = doi_parser.parse_args(argv[1:])
+
+                # read data in ASReview data object
+                asdata = load_data(args_doi.input_path)
+
+                if 'doi' in asdata.df.columns:
+                    previous_dois = len(asdata.df) - asdata.df['doi'].isna().sum()
+                    print(f"Dataset already contains dois for {previous_dois} entries. "
+                           "Adding missing dois.")
+
+                else:
+                    print("Dataset does not contain dois. Adding dois.")
+                    previous_dois = 0
+
+                find_dois(
+                    asdata,
+                    args_doi.delay,
+                    args_doi.threshold,
+                    args_doi.strict_similarity,
+                    args_doi.verbose,
+                )
+
+                added_dois = len(asdata.df) - asdata.df['doi'].isna().sum() - previous_dois
+
+                if args_doi.output_path:
+                    asdata.to_file(args_doi.output_path)
+                    print(
+                        f"Added doi for {added_dois} records in dataset with"
+                        f" {len(asdata.df)} records."
+                    )
+                else:
+                    print(
+                        f"Found doi for {added_dois} records in dataset with"
+                        f" {len(asdata.df)} records."
+                    )
+
             if argv[0] == "compose":
                 args_compose_parser = _parse_arguments_compose()
                 args_compose = args_compose_parser.parse_args(argv[1:])
diff --git a/pyproject.toml b/pyproject.toml
index 0034c41..f76b995 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11"
 ]
 license = {text = "MIT License"}
-dependencies = ["asreview>=1.1,<2", "pandas", "pyalex"]
+dependencies = ["asreview>=1.1,<2", "ftfy", "pandas", "pyalex", "tqdm"]
 dynamic = ["version"]
 requires-python = ">=3.8"