From 01b96851e4f80838f67ef5dc6c2b17c96da42087 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Tue, 1 Oct 2024 18:48:52 -0400
Subject: [PATCH 01/22] update curated papers list

---
 src/bioregistry/curation/curated_papers.csv | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/bioregistry/curation/curated_papers.csv b/src/bioregistry/curation/curated_papers.csv
index 66b3363c9..160d9ebbc 100644
--- a/src/bioregistry/curation/curated_papers.csv
+++ b/src/bioregistry/curation/curated_papers.csv
@@ -1 +1,8 @@
-pmid, relevant, relevancy_type, notes
\ No newline at end of file
+pmid,relevant,relevancy_type,notes
+39104285,1,new_provider,“Provider for UniProt IDs, issue with multiple URI formats depending on plant species”
+39074139,1,new_prefix,"Uses PDP and RNAcentral IDs to create RNA-ligand interaction page but unsure if creating “new” identifiers or rehashing existing ones"
+39014503,1,no_website
+39047988,0,irrelevant_other
+39115390,0,irrelevant_other
+39095357,0,irrelevant_other
+39084442,0,not_identifiers_resource

From 1824a51f441f8f1439c54c33b10d11332e5dad81 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Mon, 7 Oct 2024 11:04:11 -0400
Subject: [PATCH 02/22] update curated papers list with papers identified on
 Aug 9 batch. Curate new prefix for PEPhub. Curate new provider for PDB

---
 src/bioregistry/curation/curated_papers.csv | 27 +++++++++++++++------
 src/bioregistry/data/bioregistry.json       | 22 +++++++++++++++++
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/src/bioregistry/curation/curated_papers.csv b/src/bioregistry/curation/curated_papers.csv
index 160d9ebbc..e76545c25 100644
--- a/src/bioregistry/curation/curated_papers.csv
+++ b/src/bioregistry/curation/curated_papers.csv
@@ -1,8 +1,21 @@
 pmid,relevant,relevancy_type,notes
-39104285,1,new_provider,“Provider for UniProt IDs, issue with multiple URI formats depending on plant species”
-39074139,1,new_prefix,"Uses PDP and RNAcentral IDs to create RNA-ligand interaction page but unsure if creating “new” identifiers or rehashing existing ones"
-39014503,1,no_website
-39047988,0,irrelevant_other
-39115390,0,irrelevant_other
-39095357,0,irrelevant_other
-39084442,0,not_identifiers_resource
+39104285,1,new_provider,"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species"
+39074139,1,new_provider,"Resolver for PDB IDs"
+39014503,0,no_website,
+39047988,0,irrelevant_other,
+39115390,0,irrelevant_other,
+39095357,0,irrelevant_other,
+39084442,0,not_identifiers_resource,
+38991851,0,new_prefix,"identifiers for sharing, retrieving, and validating sample metadata."
+38991828,0,irrelevant_other,
+39049520,0,not_identifiers_resource,
+39104826,1,existing,"Already present in the bioregistry as a provider for mesh prefix"
+39050757,0,irrelevant_other,
+39064021,0,irrelevant_other,
+39028894,0,not_identifiers_resource,
+39044201,0,not_identifiers,"Potential resource for rare diseases identifiers, but not identifier information"
+39088253,0,irrelevant_other,
+39119155,0,irrelevant_other,
+39005357,0,irrelevant_other,
+39044130,0,irrelevant_other,
+39010878,0,irrelevant_other,
\ No newline at end of file
diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 4deb6abe7..8c260542e 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -85731,6 +85731,15 @@
       "prefix": "pdbj",
       "uri_format": "http://service.pdbj.org/mine/Detail?PDBID=$1&PAGEID=Summary"
     },
+    "providers": [
+      {
+        "code": "furna",
+        "description": "FURNA (Functions of RNAs) is a database of ligand-RNA interactions and Gene Ontology annotations for RNAs in the Protein Data Bank (PDB).",
+        "homepage": "https://seq2fun.dcmb.med.umich.edu/furna/",
+        "name": "furna",
+        "uri_format": "https://seq2fun.dcmb.med.umich.edu/furna/pdb.cgi?pdbid=$1"
+      }
+    ],
     "publications": [
       {
         "doi": "10.1002/pro.4211",
@@ -86670,6 +86679,19 @@
       "orcid": "0000-0003-4423-4370"
     }
   },
+  "pephub": {
+    "contact": {
+      "email": "nsheffield@virginia.edu",
+      "name": "Nathan Sheffield",
+      "orcid": "0000-0001-5643-4068"
+    },
+    "description": "PEPhub is a database, web interface, and API for sharing, retrieving, and validating sample metadata. PEPhub uses Portable Encapsulated Projects (PEP) biological metadata standard to store, edit, and access PEPs in one place.",
+    "example": "gse185244",
+    "homepage": "https://pephub.databio.org/",
+    "name": "PEPhub",
+    "pattern": "^gse[0-9]{6}$",
+    "uri_format": "https://pephub.databio.org/geo/$1"
+  },
   "peptideatlas": {
     "biocontext": {
       "prefix": "PEPTIDEATLAS"

From c3967ac558ffd4fc77ac2b8a635bba3e90cc7a40 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Mon, 7 Oct 2024 15:34:17 -0400
Subject: [PATCH 03/22] Add contributor information, update regex pattern and
 examples for pephub

---
 src/bioregistry/data/bioregistry.json | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 8c260542e..450474eda 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -86685,12 +86685,21 @@
       "name": "Nathan Sheffield",
       "orcid": "0000-0001-5643-4068"
     },
+    "contributor": {
+      "email": "m.naguthana@hotmail.com",
+      "github": "nagutm",
+      "name": "Mufaddal Naguthanawala",
+      "orcid": "0009-0009-5240-7463"
+    },
     "description": "PEPhub is a database, web interface, and API for sharing, retrieving, and validating sample metadata. PEPhub uses Portable Encapsulated Projects (PEP) biological metadata standard to store, edit, and access PEPs in one place.",
-    "example": "gse185244",
+    "example": [
+      "geo",
+      "geo/gse185244"
+    ],
     "homepage": "https://pephub.databio.org/",
     "name": "PEPhub",
-    "pattern": "^gse[0-9]{6}$",
-    "uri_format": "https://pephub.databio.org/geo/$1"
+    "pattern": "^[A-Za-z0-9_\\-]+(/[A-Za-z0-9_\\-]+)?$",
+    "uri_format": "https://pephub.databio.org/$1"
   },
   "peptideatlas": {
     "biocontext": {

From 129f37040a4852d9f7e72416fbd9d978f949566e Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Mon, 7 Oct 2024 15:54:20 -0400
Subject: [PATCH 04/22] add example_extras to pephub

---
 src/bioregistry/data/bioregistry.json | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 450474eda..1eac9d3f4 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -86692,9 +86692,11 @@
       "orcid": "0009-0009-5240-7463"
     },
     "description": "PEPhub is a database, web interface, and API for sharing, retrieving, and validating sample metadata. PEPhub uses Portable Encapsulated Projects (PEP) biological metadata standard to store, edit, and access PEPs in one place.",
-    "example": [
-      "geo",
-      "geo/gse185244"
+    "example": "geo",
+    "example_extras": [
+      "geo/gse185244",
+      "bedbase/gse198944",
+      "databio/encode_batch_1"
     ],
     "homepage": "https://pephub.databio.org/",
     "name": "PEPhub",

From 17a349084fdaf6b0b6144aa0c6534a8c93ba011e Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Mon, 7 Oct 2024 18:47:35 -0400
Subject: [PATCH 05/22] ammend 'relevancy_type' and 'relevant' data for two
 entries in curated papers

---
 src/bioregistry/curation/curated_papers.csv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bioregistry/curation/curated_papers.csv b/src/bioregistry/curation/curated_papers.csv
index e76545c25..4856a262c 100644
--- a/src/bioregistry/curation/curated_papers.csv
+++ b/src/bioregistry/curation/curated_papers.csv
@@ -6,14 +6,14 @@ pmid,relevant,relevancy_type,notes
 39115390,0,irrelevant_other,
 39095357,0,irrelevant_other,
 39084442,0,not_identifiers_resource,
-38991851,0,new_prefix,"identifiers for sharing, retrieving, and validating sample metadata."
+38991851,1,new_prefix,"identifiers for sharing, retrieving, and validating sample metadata."
 38991828,0,irrelevant_other,
 39049520,0,not_identifiers_resource,
 39104826,1,existing,"Already present in the bioregistry as a provider for mesh prefix"
 39050757,0,irrelevant_other,
 39064021,0,irrelevant_other,
 39028894,0,not_identifiers_resource,
-39044201,0,not_identifiers,"Potential resource for rare diseases identifiers, but not identifier information"
+39044201,0,not_identifiers_resource,"Potential resource for rare diseases identifiers, but not identifier information"
 39088253,0,irrelevant_other,
 39119155,0,irrelevant_other,
 39005357,0,irrelevant_other,

From 4ab43a2058dffe683879833bdeeab6c228afe91b Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Tue, 8 Oct 2024 12:30:06 -0400
Subject: [PATCH 06/22] remove PEPhub as a prefix

---
 src/bioregistry/curation/curated_papers.csv |  2 +-
 src/bioregistry/data/bioregistry.json       | 24 ---------------------
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/src/bioregistry/curation/curated_papers.csv b/src/bioregistry/curation/curated_papers.csv
index 4856a262c..6442a293f 100644
--- a/src/bioregistry/curation/curated_papers.csv
+++ b/src/bioregistry/curation/curated_papers.csv
@@ -6,7 +6,7 @@ pmid,relevant,relevancy_type,notes
 39115390,0,irrelevant_other,
 39095357,0,irrelevant_other,
 39084442,0,not_identifiers_resource,
-38991851,1,new_prefix,"identifiers for sharing, retrieving, and validating sample metadata."
+38991851,1,unclear,"identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry"
 38991828,0,irrelevant_other,
 39049520,0,not_identifiers_resource,
 39104826,1,existing,"Already present in the bioregistry as a provider for mesh prefix"
diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 1eac9d3f4..a45ce5dba 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -86679,30 +86679,6 @@
       "orcid": "0000-0003-4423-4370"
     }
   },
-  "pephub": {
-    "contact": {
-      "email": "nsheffield@virginia.edu",
-      "name": "Nathan Sheffield",
-      "orcid": "0000-0001-5643-4068"
-    },
-    "contributor": {
-      "email": "m.naguthana@hotmail.com",
-      "github": "nagutm",
-      "name": "Mufaddal Naguthanawala",
-      "orcid": "0009-0009-5240-7463"
-    },
-    "description": "PEPhub is a database, web interface, and API for sharing, retrieving, and validating sample metadata. PEPhub uses Portable Encapsulated Projects (PEP) biological metadata standard to store, edit, and access PEPs in one place.",
-    "example": "geo",
-    "example_extras": [
-      "geo/gse185244",
-      "bedbase/gse198944",
-      "databio/encode_batch_1"
-    ],
-    "homepage": "https://pephub.databio.org/",
-    "name": "PEPhub",
-    "pattern": "^[A-Za-z0-9_\\-]+(/[A-Za-z0-9_\\-]+)?$",
-    "uri_format": "https://pephub.databio.org/$1"
-  },
   "peptideatlas": {
     "biocontext": {
       "prefix": "PEPTIDEATLAS"

From 65f1d8926f243a9aaeecdcaa0f1fa430669af983 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Wed, 9 Oct 2024 09:26:00 -0400
Subject: [PATCH 07/22] update curated_papers with orcid and date

---
 src/bioregistry/curation/curated_papers.csv | 42 ++++++++++-----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/bioregistry/curation/curated_papers.csv b/src/bioregistry/curation/curated_papers.csv
index 6442a293f..0fa0ca9a9 100644
--- a/src/bioregistry/curation/curated_papers.csv
+++ b/src/bioregistry/curation/curated_papers.csv
@@ -1,21 +1,21 @@
-pmid,relevant,relevancy_type,notes
-39104285,1,new_provider,"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species"
-39074139,1,new_provider,"Resolver for PDB IDs"
-39014503,0,no_website,
-39047988,0,irrelevant_other,
-39115390,0,irrelevant_other,
-39095357,0,irrelevant_other,
-39084442,0,not_identifiers_resource,
-38991851,1,unclear,"identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry"
-38991828,0,irrelevant_other,
-39049520,0,not_identifiers_resource,
-39104826,1,existing,"Already present in the bioregistry as a provider for mesh prefix"
-39050757,0,irrelevant_other,
-39064021,0,irrelevant_other,
-39028894,0,not_identifiers_resource,
-39044201,0,not_identifiers_resource,"Potential resource for rare diseases identifiers, but not identifier information"
-39088253,0,irrelevant_other,
-39119155,0,irrelevant_other,
-39005357,0,irrelevant_other,
-39044130,0,irrelevant_other,
-39010878,0,irrelevant_other,
\ No newline at end of file
+pmid,relevant,relevancy_type,notes,orcid,date
+39104285,1,new_provider,"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species",0009-0009-5240-7463,09/24/2024
+39074139,1,new_provider,"Resolver for PDB IDs",0009-0009-5240-7463,09/24/2024
+39014503,0,no_website,,0009-0009-5240-7463,09/25/2024
+39047988,0,irrelevant_other,,0009-0009-5240-7463,09/25/2024
+39115390,0,irrelevant_other,,0009-0009-5240-7463,09/26/2024
+39095357,0,irrelevant_other,,0009-0009-5240-7463,09/26/2024
+39084442,0,not_identifiers_resource,,0009-0009-5240-7463,09/27/2024
+38991851,1,unclear,"identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry",0009-0009-5240-7463,09/28/2024
+38991828,0,irrelevant_other,,0009-0009-5240-7463,09/28/2024
+39049520,0,not_identifiers_resource,,0009-0009-5240-7463,09/30/2024
+39104826,1,existing,"Already present in the bioregistry as a provider for mesh prefix",0009-0009-5240-7463,10/1/2024
+39050757,0,irrelevant_other,,0009-0009-5240-7463,10/1/2024
+39064021,0,irrelevant_other,,0009-0009-5240-7463,10/1/2024
+39028894,0,not_identifiers_resource,,0009-0009-5240-7463,10/4/2024
+39044201,0,not_identifiers_resource,"Potential resource for rare diseases identifiers, but not identifier information",0009-0009-5240-7463,10/4/2024
+39088253,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
+39119155,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
+39005357,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
+39044130,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
+39010878,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
\ No newline at end of file

From 31a6106f8215e38b2ec33ad9d87c62d39db2ad6f Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Fri, 11 Oct 2024 13:44:24 -0400
Subject: [PATCH 08/22] add unit test for validating input in curated_papers
 file.

---
 src/bioregistry/constants.py                | 12 +++++
 src/bioregistry/curation/curated_papers.csv | 21 --------
 src/bioregistry/data/curated_papers.txt     | 21 ++++++++
 tests/test_curated_papers.py                | 57 +++++++++++++++++++++
 4 files changed, 90 insertions(+), 21 deletions(-)
 delete mode 100644 src/bioregistry/curation/curated_papers.csv
 create mode 100644 src/bioregistry/data/curated_papers.txt
 create mode 100644 tests/test_curated_papers.py

diff --git a/src/bioregistry/constants.py b/src/bioregistry/constants.py
index 29b75926c..5f4b7f7d5 100644
--- a/src/bioregistry/constants.py
+++ b/src/bioregistry/constants.py
@@ -19,6 +19,7 @@
     "MISMATCH_PATH",
     "BIOREGISTRY_MODULE",
     "RAW_DIRECTORY",
+    "CURATED_PAPERS_PATH"
 ]
 
 PYDANTIC_1 = importlib.metadata.version("pydantic").startswith("1.")
@@ -33,6 +34,17 @@
 COLLECTIONS_PATH = DATA_DIRECTORY / "collections.json"
 MISMATCH_PATH = DATA_DIRECTORY / "mismatch.json"
 CONTEXTS_PATH = DATA_DIRECTORY / "contexts.json"
+CURATED_PAPERS_PATH = DATA_DIRECTORY / "curated_papers.txt"
+CURATED_PAPERS_RELEVANCY_VOCAB = [
+    "new_prefix", 
+    "new_provider", 
+    "new_publication", 
+    "not_identifiers_resource", 
+    "no_website", 
+    "existing", 
+    "unclear", 
+    "irrelevant_other"
+]
 
 BIOREGISTRY_MODULE = pystow.module("bioregistry")
 
diff --git a/src/bioregistry/curation/curated_papers.csv b/src/bioregistry/curation/curated_papers.csv
deleted file mode 100644
index 0fa0ca9a9..000000000
--- a/src/bioregistry/curation/curated_papers.csv
+++ /dev/null
@@ -1,21 +0,0 @@
-pmid,relevant,relevancy_type,notes,orcid,date
-39104285,1,new_provider,"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species",0009-0009-5240-7463,09/24/2024
-39074139,1,new_provider,"Resolver for PDB IDs",0009-0009-5240-7463,09/24/2024
-39014503,0,no_website,,0009-0009-5240-7463,09/25/2024
-39047988,0,irrelevant_other,,0009-0009-5240-7463,09/25/2024
-39115390,0,irrelevant_other,,0009-0009-5240-7463,09/26/2024
-39095357,0,irrelevant_other,,0009-0009-5240-7463,09/26/2024
-39084442,0,not_identifiers_resource,,0009-0009-5240-7463,09/27/2024
-38991851,1,unclear,"identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry",0009-0009-5240-7463,09/28/2024
-38991828,0,irrelevant_other,,0009-0009-5240-7463,09/28/2024
-39049520,0,not_identifiers_resource,,0009-0009-5240-7463,09/30/2024
-39104826,1,existing,"Already present in the bioregistry as a provider for mesh prefix",0009-0009-5240-7463,10/1/2024
-39050757,0,irrelevant_other,,0009-0009-5240-7463,10/1/2024
-39064021,0,irrelevant_other,,0009-0009-5240-7463,10/1/2024
-39028894,0,not_identifiers_resource,,0009-0009-5240-7463,10/4/2024
-39044201,0,not_identifiers_resource,"Potential resource for rare diseases identifiers, but not identifier information",0009-0009-5240-7463,10/4/2024
-39088253,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
-39119155,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
-39005357,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
-39044130,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
-39010878,0,irrelevant_other,,0009-0009-5240-7463,10/5/2024
\ No newline at end of file
diff --git a/src/bioregistry/data/curated_papers.txt b/src/bioregistry/data/curated_papers.txt
new file mode 100644
index 000000000..fbba71d80
--- /dev/null
+++ b/src/bioregistry/data/curated_papers.txt
@@ -0,0 +1,21 @@
+pmid	relevant	relevancy_type	notes	orcid   date_curated    
+39104285	1	new_provider	"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species"	0009-0009-5240-7463	2024-09-24
+39074139	1	new_provider	Resolver for PDB IDs	0009-0009-5240-7463	2024-09-24
+39014503	0	no_website		0009-0009-5240-7463	2024-09-25
+39047988	0	irrelevant_other		0009-0009-5240-7463	2024-09-25
+39115390	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
+39095357	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
+39084442	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-27
+38991851	1	unclear	"identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry"	0009-0009-5240-7463	2024-09-28
+38991828	0	irrelevant_other		0009-0009-5240-7463	2024-09-28
+39049520	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-30
+39104826	1	existing	Already present in the bioregistry as a provider for mesh prefix	0009-0009-5240-7463	2024-10-01
+39050757	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
+39064021	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
+39028894	0	not_identifiers_resource		0009-0009-5240-7463	2024-10-04
+39044201	0	not_identifiers_resource	Potential resource for rare diseases identifiers, but not identifier information	0009-0009-5240-7463	2024-10-04
+39088253	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39119155	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39005357	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39044130	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39010878	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
new file mode 100644
index 000000000..da4fd3934
--- /dev/null
+++ b/tests/test_curated_papers.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+"""Test for checking the integrity of the curated_papers TSV file."""
+
+import csv
+from datetime import datetime
+import re
+import unittest
+
+from bioregistry.constants import CURATED_PAPERS_PATH, ORCID_PATTERN, CURATED_PAPERS_RELEVANCY_VOCAB
+
+
+class TestTSV(unittest.TestCase):
+    """Tests for curated_papers tsv file"""
+
+    def setUp(self):
+        """Set up the test case."""
+        self.tsv_file_path = CURATED_PAPERS_PATH
+        self.relevancy_vocab = CURATED_PAPERS_RELEVANCY_VOCAB
+        self.orcid_pattern = re.compile(ORCID_PATTERN)
+    
+    def validate_row(self, row):
+        """Validates a single row from the TSV file"""
+        
+        # Validate required fields
+        required_fields = ["pmid", "relevant", "relevancy_type", "orcid", "date_curated"]
+        for field in required_fields:
+            self.assertIn(field, row)
+
+        # Validate pmid is an integer
+        self.assertTrue(row["pmid"].isdigit())
+
+        # Validate relevant is 0 or 1
+        self.assertIn(row["relevant"], ["0", "1"])
+        
+        # Validate relevancy_type is in relevancy_vocab
+        self.assertIn(row["relevancy_type"], self.relevancy_vocab)
+        
+        # Validate orcid against oricd_pattern
+        self.assertTrue(self.orcid_pattern.match(row["orcid"]))
+        
+        # Validate date_curated format
+        try:
+            datetime.strptime(row["date_curated"], "%Y-%m-%d")
+        except ValueError:
+            self.fail(f"Date_curated should follow format YYYY-MM-DD")
+        
+    def test_tsv_file(self):
+        """Tests all rows in TSV file are valid"""
+        with open(self.tsv_file_path, mode='r') as tsv_file:
+            tsv_reader = csv.DictReader(tsv_file, delimiter='\t')
+            for row in tsv_reader:
+                with self.subTest(row=row):
+                    self.validate_row(row)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file

From a2f731bcba445a8c118ca2bc05e35d3b7f5e966a Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Fri, 11 Oct 2024 14:39:22 -0400
Subject: [PATCH 09/22] fix style issues

---
 src/bioregistry/constants.py            | 17 ++++++------
 src/bioregistry/data/curated_papers.txt |  2 +-
 tests/test_curated_papers.py            | 36 ++++++++++++++-----------
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/src/bioregistry/constants.py b/src/bioregistry/constants.py
index 5f4b7f7d5..ab14b2957 100644
--- a/src/bioregistry/constants.py
+++ b/src/bioregistry/constants.py
@@ -19,7 +19,6 @@
     "MISMATCH_PATH",
     "BIOREGISTRY_MODULE",
     "RAW_DIRECTORY",
-    "CURATED_PAPERS_PATH"
 ]
 
 PYDANTIC_1 = importlib.metadata.version("pydantic").startswith("1.")
@@ -36,14 +35,14 @@
 CONTEXTS_PATH = DATA_DIRECTORY / "contexts.json"
 CURATED_PAPERS_PATH = DATA_DIRECTORY / "curated_papers.txt"
 CURATED_PAPERS_RELEVANCY_VOCAB = [
-    "new_prefix", 
-    "new_provider", 
-    "new_publication", 
-    "not_identifiers_resource", 
-    "no_website", 
-    "existing", 
-    "unclear", 
-    "irrelevant_other"
+    "new_prefix",
+    "new_provider",
+    "new_publication",
+    "not_identifiers_resource",
+    "no_website",
+    "existing",
+    "unclear",
+    "irrelevant_other",
 ]
 
 BIOREGISTRY_MODULE = pystow.module("bioregistry")
diff --git a/src/bioregistry/data/curated_papers.txt b/src/bioregistry/data/curated_papers.txt
index fbba71d80..5455d7050 100644
--- a/src/bioregistry/data/curated_papers.txt
+++ b/src/bioregistry/data/curated_papers.txt
@@ -1,4 +1,4 @@
-pmid	relevant	relevancy_type	notes	orcid   date_curated    
+pmid	relevant	relevancy_type	notes	orcid	date_curated
 39104285	1	new_provider	"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species"	0009-0009-5240-7463	2024-09-24
 39074139	1	new_provider	Resolver for PDB IDs	0009-0009-5240-7463	2024-09-24
 39014503	0	no_website		0009-0009-5240-7463	2024-09-25
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index da4fd3934..434487909 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -3,25 +3,28 @@
 """Test for checking the integrity of the curated_papers TSV file."""
 
 import csv
-from datetime import datetime
 import re
 import unittest
+from datetime import datetime
 
-from bioregistry.constants import CURATED_PAPERS_PATH, ORCID_PATTERN, CURATED_PAPERS_RELEVANCY_VOCAB
+from bioregistry.constants import (
+    CURATED_PAPERS_PATH,
+    CURATED_PAPERS_RELEVANCY_VOCAB,
+    ORCID_PATTERN,
+)
 
 
 class TestTSV(unittest.TestCase):
-    """Tests for curated_papers tsv file"""
+    """Tests for curated_papers tsv file."""
 
     def setUp(self):
         """Set up the test case."""
         self.tsv_file_path = CURATED_PAPERS_PATH
         self.relevancy_vocab = CURATED_PAPERS_RELEVANCY_VOCAB
         self.orcid_pattern = re.compile(ORCID_PATTERN)
-    
+
     def validate_row(self, row):
-        """Validates a single row from the TSV file"""
-        
+        """Validate a single row from the TSV file."""
         # Validate required fields
         required_fields = ["pmid", "relevant", "relevancy_type", "orcid", "date_curated"]
         for field in required_fields:
@@ -32,26 +35,27 @@ def validate_row(self, row):
 
         # Validate relevant is 0 or 1
         self.assertIn(row["relevant"], ["0", "1"])
-        
+
         # Validate relevancy_type is in relevancy_vocab
         self.assertIn(row["relevancy_type"], self.relevancy_vocab)
-        
+
         # Validate orcid against oricd_pattern
         self.assertTrue(self.orcid_pattern.match(row["orcid"]))
-        
+
         # Validate date_curated format
         try:
             datetime.strptime(row["date_curated"], "%Y-%m-%d")
         except ValueError:
-            self.fail(f"Date_curated should follow format YYYY-MM-DD")
-        
+            self.fail("Date_curated should follow format YYYY-MM-DD")
+
     def test_tsv_file(self):
-        """Tests all rows in TSV file are valid"""
-        with open(self.tsv_file_path, mode='r') as tsv_file:
-            tsv_reader = csv.DictReader(tsv_file, delimiter='\t')
+        """Tests all rows in TSV file are valid."""
+        with open(self.tsv_file_path, mode="r") as tsv_file:
+            tsv_reader = csv.DictReader(tsv_file, delimiter="\t")
             for row in tsv_reader:
                 with self.subTest(row=row):
                     self.validate_row(row)
 
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+
+if __name__ == "__main__":
+    unittest.main()

From c74c63afa5a363df7ade5fdc817c442de67fa118 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 12 Oct 2024 15:01:18 +0200
Subject: [PATCH 10/22] Refactor

---
 src/bioregistry/constants.py            |  2 +-
 src/bioregistry/data/curated_papers.tsv | 21 ++++++++++
 src/bioregistry/data/curated_papers.txt | 21 ----------
 tests/test_curated_papers.py            | 51 ++++++++++++++++---------
 4 files changed, 54 insertions(+), 41 deletions(-)
 create mode 100644 src/bioregistry/data/curated_papers.tsv
 delete mode 100644 src/bioregistry/data/curated_papers.txt

diff --git a/src/bioregistry/constants.py b/src/bioregistry/constants.py
index ab14b2957..22d8515e0 100644
--- a/src/bioregistry/constants.py
+++ b/src/bioregistry/constants.py
@@ -33,7 +33,7 @@
 COLLECTIONS_PATH = DATA_DIRECTORY / "collections.json"
 MISMATCH_PATH = DATA_DIRECTORY / "mismatch.json"
 CONTEXTS_PATH = DATA_DIRECTORY / "contexts.json"
-CURATED_PAPERS_PATH = DATA_DIRECTORY / "curated_papers.txt"
+CURATED_PAPERS_PATH = DATA_DIRECTORY / "curated_papers.tsv"
 CURATED_PAPERS_RELEVANCY_VOCAB = [
     "new_prefix",
     "new_provider",
diff --git a/src/bioregistry/data/curated_papers.tsv b/src/bioregistry/data/curated_papers.tsv
new file mode 100644
index 000000000..fae1446ac
--- /dev/null
+++ b/src/bioregistry/data/curated_papers.tsv
@@ -0,0 +1,21 @@
+pmid	relevant	relevancy_type	prefix	notes	pr_added	orcid	date_curated
+39104285	1	new_provider		Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species	1193	0009-0009-5240-7463	2024-09-24
+39074139	1	new_provider		Resolver for PDB IDs		0009-0009-5240-7463	2024-09-24
+39014503	0	no_website				0009-0009-5240-7463	2024-09-25
+39047988	0	irrelevant_other				0009-0009-5240-7463	2024-09-25
+39115390	0	irrelevant_other				0009-0009-5240-7463	2024-09-26
+39095357	0	irrelevant_other				0009-0009-5240-7463	2024-09-26
+39084442	0	not_identifiers_resource				0009-0009-5240-7463	2024-09-27
+38991851	1	unclear		identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry		0009-0009-5240-7463	2024-09-28
+38991828	0	irrelevant_other				0009-0009-5240-7463	2024-09-28
+39049520	0	not_identifiers_resource				0009-0009-5240-7463	2024-09-30
+39104826	1	existing		Already present in the bioregistry as a provider for mesh prefix		0009-0009-5240-7463	2024-10-01
+39050757	0	irrelevant_other				0009-0009-5240-7463	2024-10-01
+39064021	0	irrelevant_other				0009-0009-5240-7463	2024-10-01
+39028894	0	not_identifiers_resource				0009-0009-5240-7463	2024-10-04
+39044201	0	not_identifiers_resource		Potential resource for rare diseases identifiers, but not identifier information		0009-0009-5240-7463	2024-10-04
+39088253	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
+39119155	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
+39005357	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
+39044130	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
+39010878	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
diff --git a/src/bioregistry/data/curated_papers.txt b/src/bioregistry/data/curated_papers.txt
deleted file mode 100644
index 5455d7050..000000000
--- a/src/bioregistry/data/curated_papers.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-pmid	relevant	relevancy_type	notes	orcid	date_curated
-39104285	1	new_provider	"Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species"	0009-0009-5240-7463	2024-09-24
-39074139	1	new_provider	Resolver for PDB IDs	0009-0009-5240-7463	2024-09-24
-39014503	0	no_website		0009-0009-5240-7463	2024-09-25
-39047988	0	irrelevant_other		0009-0009-5240-7463	2024-09-25
-39115390	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
-39095357	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
-39084442	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-27
-38991851	1	unclear	"identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry"	0009-0009-5240-7463	2024-09-28
-38991828	0	irrelevant_other		0009-0009-5240-7463	2024-09-28
-39049520	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-30
-39104826	1	existing	Already present in the bioregistry as a provider for mesh prefix	0009-0009-5240-7463	2024-10-01
-39050757	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
-39064021	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
-39028894	0	not_identifiers_resource		0009-0009-5240-7463	2024-10-04
-39044201	0	not_identifiers_resource	Potential resource for rare diseases identifiers, but not identifier information	0009-0009-5240-7463	2024-10-04
-39088253	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39119155	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39005357	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39044130	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39010878	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 434487909..11e8e0104 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -3,59 +3,72 @@
 """Test for checking the integrity of the curated_papers TSV file."""
 
 import csv
-import re
 import unittest
 from datetime import datetime
 
+import bioregistry
 from bioregistry.constants import (
     CURATED_PAPERS_PATH,
     CURATED_PAPERS_RELEVANCY_VOCAB,
     ORCID_PATTERN,
 )
 
+required_fields = [
+    "pmid",
+    "relevant",
+    "relevancy_type",
+    "orcid",
+    "date_curated",
+    "notes",
+    "pr_added",  # links back to the PR where curations were done
+]
+
 
 class TestTSV(unittest.TestCase):
     """Tests for curated_papers tsv file."""
 
-    def setUp(self):
-        """Set up the test case."""
-        self.tsv_file_path = CURATED_PAPERS_PATH
-        self.relevancy_vocab = CURATED_PAPERS_RELEVANCY_VOCAB
-        self.orcid_pattern = re.compile(ORCID_PATTERN)
-
     def validate_row(self, row):
         """Validate a single row from the TSV file."""
         # Validate required fields
-        required_fields = ["pmid", "relevant", "relevancy_type", "orcid", "date_curated"]
+
         for field in required_fields:
             self.assertIn(field, row)
 
-        # Validate pmid is an integer
-        self.assertTrue(row["pmid"].isdigit())
+        self.assertTrue(row["pmid"].isdigit(), msg="PubMed identifier should be an integer")
+        self.assertTrue(row["pr_added"].isdigit(), msg="Pull Request should be an integer")
 
         # Validate relevant is 0 or 1
         self.assertIn(row["relevant"], ["0", "1"])
 
+        if row["relevant"] == "1":
+            prefix = row["prefix"]
+            self.assertIsNotNone(prefix, msg="prefix should be set for all relevant entries")
+            self.assertNotEqual("", prefix, msg="prefix should not be empty for relevant entries")
+            self.assertEqual(
+                bioregistry.normalize_prefix(prefix),
+                prefix,
+                msg="prefix should be standardized for relevant entries",
+            )
+
         # Validate relevancy_type is in relevancy_vocab
-        self.assertIn(row["relevancy_type"], self.relevancy_vocab)
+        self.assertIn(row["relevancy_type"], CURATED_PAPERS_RELEVANCY_VOCAB)
 
-        # Validate orcid against oricd_pattern
-        self.assertTrue(self.orcid_pattern.match(row["orcid"]))
+        self.assertRegex(row["orcid"], ORCID_PATTERN)
+
+        self.assertFalse(row["notes"].startswith('"'))
+        self.assertFalse(row["notes"].endswith('"'))
 
         # Validate date_curated format
         try:
             datetime.strptime(row["date_curated"], "%Y-%m-%d")
         except ValueError:
-            self.fail("Date_curated should follow format YYYY-MM-DD")
+            self.fail("date_curated should follow format YYYY-MM-DD")
 
     def test_tsv_file(self):
         """Tests all rows in TSV file are valid."""
-        with open(self.tsv_file_path, mode="r") as tsv_file:
+        with CURATED_PAPERS_PATH.open() as tsv_file:
             tsv_reader = csv.DictReader(tsv_file, delimiter="\t")
             for row in tsv_reader:
+                print(row)
                 with self.subTest(row=row):
                     self.validate_row(row)
-
-
-if __name__ == "__main__":
-    unittest.main()

From 0d7a8bf20add4bc6539ec24634b0459ad20e74a7 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 12 Oct 2024 15:17:05 +0200
Subject: [PATCH 11/22] Add example full rows

---
 src/bioregistry/data/curated_papers.tsv | 6 +++---
 tests/test_curated_papers.py            | 9 ++++-----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/bioregistry/data/curated_papers.tsv b/src/bioregistry/data/curated_papers.tsv
index fae1446ac..5c5c4875f 100644
--- a/src/bioregistry/data/curated_papers.tsv
+++ b/src/bioregistry/data/curated_papers.tsv
@@ -1,7 +1,7 @@
 pmid	relevant	relevancy_type	prefix	notes	pr_added	orcid	date_curated
-39104285	1	new_provider		Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species	1193	0009-0009-5240-7463	2024-09-24
-39074139	1	new_provider		Resolver for PDB IDs		0009-0009-5240-7463	2024-09-24
-39014503	0	no_website				0009-0009-5240-7463	2024-09-25
+39104285	1	new_provider	uniprot	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species	1193	0009-0009-5240-7463	2024-09-24
+39074139	1	new_provider	pdb	Resolver for PDB IDs	1193	0009-0009-5240-7463	2024-09-24
+39014503	0	no_website			1193	0009-0009-5240-7463	2024-09-25
 39047988	0	irrelevant_other				0009-0009-5240-7463	2024-09-25
 39115390	0	irrelevant_other				0009-0009-5240-7463	2024-09-26
 39095357	0	irrelevant_other				0009-0009-5240-7463	2024-09-26
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 11e8e0104..436203da3 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -67,8 +67,7 @@ def validate_row(self, row):
     def test_tsv_file(self):
         """Tests all rows in TSV file are valid."""
         with CURATED_PAPERS_PATH.open() as tsv_file:
-            tsv_reader = csv.DictReader(tsv_file, delimiter="\t")
-            for row in tsv_reader:
-                print(row)
-                with self.subTest(row=row):
-                    self.validate_row(row)
+            reader = csv.DictReader(tsv_file, delimiter="\t")
+            for row, data in enumerate(reader, start=1):
+                with self.subTest(row=row, data=data):
+                    self.validate_row(data)

From 3587530ed715f207dd9eea909758246cd5a0abbb Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 12 Oct 2024 15:26:48 +0200
Subject: [PATCH 12/22] Centralize code into reusable module

---
 src/bioregistry/constants.py           | 10 ---------
 src/bioregistry/curation/literature.py | 31 ++++++++++++++++++++++++++
 tests/test_curated_papers.py           | 27 +++++++---------------
 3 files changed, 39 insertions(+), 29 deletions(-)
 create mode 100644 src/bioregistry/curation/literature.py

diff --git a/src/bioregistry/constants.py b/src/bioregistry/constants.py
index 22d8515e0..6041ab787 100644
--- a/src/bioregistry/constants.py
+++ b/src/bioregistry/constants.py
@@ -34,16 +34,6 @@
 MISMATCH_PATH = DATA_DIRECTORY / "mismatch.json"
 CONTEXTS_PATH = DATA_DIRECTORY / "contexts.json"
 CURATED_PAPERS_PATH = DATA_DIRECTORY / "curated_papers.tsv"
-CURATED_PAPERS_RELEVANCY_VOCAB = [
-    "new_prefix",
-    "new_provider",
-    "new_publication",
-    "not_identifiers_resource",
-    "no_website",
-    "existing",
-    "unclear",
-    "irrelevant_other",
-]
 
 BIOREGISTRY_MODULE = pystow.module("bioregistry")
 
diff --git a/src/bioregistry/curation/literature.py b/src/bioregistry/curation/literature.py
new file mode 100644
index 000000000..858b68e40
--- /dev/null
+++ b/src/bioregistry/curation/literature.py
@@ -0,0 +1,31 @@
+"""Utilities for working with the data produced by the semi-automated curation workflow."""
+
+import enum
+
+__all__ = [
+    "CurationRelevance",
+    "COLUMNS",
+]
+
+COLUMNS = [
+    "pmid",
+    "relevant",
+    "relevancy_type",
+    "orcid",
+    "date_curated",
+    "notes",
+    "pr_added",  # links back to the PR where curations were done
+]
+
+
+class CurationRelevance(str, enum.Enum):
+    """An enumeration for curation relevance."""
+
+    new_prefix = enum.auto()
+    new_provider = enum.auto()
+    new_publication = enum.auto()
+    not_identifiers_resource = enum.auto()
+    no_website = enum.auto()
+    existing = enum.auto()
+    unclear = enum.auto()
+    irrelevant_other = enum.auto()
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 436203da3..72dec545f 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -7,31 +7,20 @@
 from datetime import datetime
 
 import bioregistry
-from bioregistry.constants import (
-    CURATED_PAPERS_PATH,
-    CURATED_PAPERS_RELEVANCY_VOCAB,
-    ORCID_PATTERN,
-)
-
-required_fields = [
-    "pmid",
-    "relevant",
-    "relevancy_type",
-    "orcid",
-    "date_curated",
-    "notes",
-    "pr_added",  # links back to the PR where curations were done
-]
+from bioregistry.constants import CURATED_PAPERS_PATH, ORCID_PATTERN
+from bioregistry.curation.literature import CurationRelevance, COLUMNS
 
 
 class TestTSV(unittest.TestCase):
     """Tests for curated_papers tsv file."""
 
+    def setUp(self):
+        """Set up the test case."""
+        self.relevancy_types = {r.name for r in CurationRelevance}
+
     def validate_row(self, row):
         """Validate a single row from the TSV file."""
-        # Validate required fields
-
-        for field in required_fields:
+        for field in COLUMNS:
             self.assertIn(field, row)
 
         self.assertTrue(row["pmid"].isdigit(), msg="PubMed identifier should be an integer")
@@ -51,7 +40,7 @@ def validate_row(self, row):
             )
 
         # Validate relevancy_type is in relevancy_vocab
-        self.assertIn(row["relevancy_type"], CURATED_PAPERS_RELEVANCY_VOCAB)
+        self.assertIn(row["relevancy_type"], self.relevancy_types)
 
         self.assertRegex(row["orcid"], ORCID_PATTERN)
 

From b3e6c0ae7a2c2c102ee24040d53f497b4d09604c Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 12 Oct 2024 15:31:24 +0200
Subject: [PATCH 13/22] Add TODO

---
 src/bioregistry/curation/literature.py | 12 ++++++++++++
 tests/test_curated_papers.py           |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/bioregistry/curation/literature.py b/src/bioregistry/curation/literature.py
index 858b68e40..6ecf6ebaf 100644
--- a/src/bioregistry/curation/literature.py
+++ b/src/bioregistry/curation/literature.py
@@ -2,6 +2,8 @@
 
 import enum
 
+import click
+
 __all__ = [
     "CurationRelevance",
     "COLUMNS",
@@ -29,3 +31,13 @@ class CurationRelevance(str, enum.Enum):
     existing = enum.auto()
     unclear = enum.auto()
     irrelevant_other = enum.auto()
+
+
+@click.command()
+def main():
+    """Import data from the literature curation into the Bioregistry."""
+    raise NotImplementedError
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 72dec545f..55a55707d 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -8,7 +8,7 @@
 
 import bioregistry
 from bioregistry.constants import CURATED_PAPERS_PATH, ORCID_PATTERN
-from bioregistry.curation.literature import CurationRelevance, COLUMNS
+from bioregistry.curation.literature import COLUMNS, CurationRelevance
 
 
 class TestTSV(unittest.TestCase):

From 7bcf7e93afdf49f2ef1c0c1136f1d92535ef96a3 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 12 Oct 2024 15:35:38 +0200
Subject: [PATCH 14/22] Add documentation

---
 docs/source/curation.rst               | 3 +++
 docs/source/index.rst                  | 1 +
 src/bioregistry/curation/literature.py | 1 +
 3 files changed, 5 insertions(+)
 create mode 100644 docs/source/curation.rst

diff --git a/docs/source/curation.rst b/docs/source/curation.rst
new file mode 100644
index 000000000..0006caff3
--- /dev/null
+++ b/docs/source/curation.rst
@@ -0,0 +1,3 @@
+Curation
+========
+.. automodapi:: bioregistry.curation.literature
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 79b226c5a..f780a1d93 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -57,6 +57,7 @@ To install in development mode, use the following:
    cli
    pandas
    deployment
+   curation
 
 Indices and Tables
 ------------------
diff --git a/src/bioregistry/curation/literature.py b/src/bioregistry/curation/literature.py
index 6ecf6ebaf..10fb0e227 100644
--- a/src/bioregistry/curation/literature.py
+++ b/src/bioregistry/curation/literature.py
@@ -23,6 +23,7 @@
 class CurationRelevance(str, enum.Enum):
     """An enumeration for curation relevance."""
 
+    #: A resource for new primary identifiers
     new_prefix = enum.auto()
     new_provider = enum.auto()
     new_publication = enum.auto()

From 7e4cc4d23446af260b7c85196f60e5286be37e40 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 12 Oct 2024 15:39:23 +0200
Subject: [PATCH 15/22] Update curation.rst

---
 docs/source/curation.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/source/curation.rst b/docs/source/curation.rst
index 0006caff3..0cb3940a1 100644
--- a/docs/source/curation.rst
+++ b/docs/source/curation.rst
@@ -1,3 +1,11 @@
 Curation
 ========
+There are several curation workflows implemented in :mod:`bioregistry.curation`.
+
+Bulk Import
+-----------
+.. automodapi:: bioregistry.curation.bulk_import
+
+Semi-automated Literature Curation
+----------------------------------
 .. automodapi:: bioregistry.curation.literature

From 50ec62b47c3ccb2f44ffbedea3d636ddbf6d2a39 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Tue, 15 Oct 2024 15:04:40 -0400
Subject: [PATCH 16/22] remove prefix column from TSV file and simplify
 relevancy check

---
 src/bioregistry/data/curated_papers.tsv | 42 ++++++++++++-------------
 tests/test_curated_papers.py            |  7 ++++-
 2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/src/bioregistry/data/curated_papers.tsv b/src/bioregistry/data/curated_papers.tsv
index 5c5c4875f..6388d4ecd 100644
--- a/src/bioregistry/data/curated_papers.tsv
+++ b/src/bioregistry/data/curated_papers.tsv
@@ -1,21 +1,21 @@
-pmid	relevant	relevancy_type	prefix	notes	pr_added	orcid	date_curated
-39104285	1	new_provider	uniprot	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species	1193	0009-0009-5240-7463	2024-09-24
-39074139	1	new_provider	pdb	Resolver for PDB IDs	1193	0009-0009-5240-7463	2024-09-24
-39014503	0	no_website			1193	0009-0009-5240-7463	2024-09-25
-39047988	0	irrelevant_other				0009-0009-5240-7463	2024-09-25
-39115390	0	irrelevant_other				0009-0009-5240-7463	2024-09-26
-39095357	0	irrelevant_other				0009-0009-5240-7463	2024-09-26
-39084442	0	not_identifiers_resource				0009-0009-5240-7463	2024-09-27
-38991851	1	unclear		identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry		0009-0009-5240-7463	2024-09-28
-38991828	0	irrelevant_other				0009-0009-5240-7463	2024-09-28
-39049520	0	not_identifiers_resource				0009-0009-5240-7463	2024-09-30
-39104826	1	existing		Already present in the bioregistry as a provider for mesh prefix		0009-0009-5240-7463	2024-10-01
-39050757	0	irrelevant_other				0009-0009-5240-7463	2024-10-01
-39064021	0	irrelevant_other				0009-0009-5240-7463	2024-10-01
-39028894	0	not_identifiers_resource				0009-0009-5240-7463	2024-10-04
-39044201	0	not_identifiers_resource		Potential resource for rare diseases identifiers, but not identifier information		0009-0009-5240-7463	2024-10-04
-39088253	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
-39119155	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
-39005357	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
-39044130	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
-39010878	0	irrelevant_other				0009-0009-5240-7463	2024-10-05
+pmid	relevant	relevancy_type	notes	pr_added	orcid	date_curated
+39104285	1	new_provider	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species	1193	0009-0009-5240-7463	2024-09-24
+39074139	1	new_provider	Resolver for PDB IDs	1193	0009-0009-5240-7463	2024-09-24
+39014503	0	no_website		1193	0009-0009-5240-7463	2024-09-25
+39047988	0	irrelevant_other			0009-0009-5240-7463	2024-09-25
+39115390	0	irrelevant_other			0009-0009-5240-7463	2024-09-26
+39095357	0	irrelevant_other			0009-0009-5240-7463	2024-09-26
+39084442	0	not_identifiers_resource			0009-0009-5240-7463	2024-09-27
+38991851	1	unclear	identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry		0009-0009-5240-7463	2024-09-28
+38991828	0	irrelevant_other			0009-0009-5240-7463	2024-09-28
+39049520	0	not_identifiers_resource			0009-0009-5240-7463	2024-09-30
+39104826	1	existing	Already present in the bioregistry as a provider for mesh prefix		0009-0009-5240-7463	2024-10-01
+39050757	0	irrelevant_other			0009-0009-5240-7463	2024-10-01
+39064021	0	irrelevant_other			0009-0009-5240-7463	2024-10-01
+39028894	0	not_identifiers_resource			0009-0009-5240-7463	2024-10-04
+39044201	0	not_identifiers_resource	Potential resource for rare diseases identifiers, but not identifier information		0009-0009-5240-7463	2024-10-04
+39088253	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
+39119155	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
+39005357	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
+39044130	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
+39010878	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 55a55707d..7d095e22c 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -6,7 +6,7 @@
 import unittest
 from datetime import datetime
 
-import bioregistry
+# import bioregistry
 from bioregistry.constants import CURATED_PAPERS_PATH, ORCID_PATTERN
 from bioregistry.curation.literature import COLUMNS, CurationRelevance
 
@@ -29,6 +29,10 @@ def validate_row(self, row):
         # Validate relevant is 0 or 1
         self.assertIn(row["relevant"], ["0", "1"])
 
+        """
+        Commenting out this check for now. This can be re-implemented if a need
+        for it arises in the future
+
         if row["relevant"] == "1":
             prefix = row["prefix"]
             self.assertIsNotNone(prefix, msg="prefix should be set for all relevant entries")
@@ -38,6 +42,7 @@ def validate_row(self, row):
                 prefix,
                 msg="prefix should be standardized for relevant entries",
             )
+        """
 
         # Validate relevancy_type is in relevancy_vocab
         self.assertIn(row["relevancy_type"], self.relevancy_types)

From 7816e3b939759b567ec9929d50ebb0a85e6b8c59 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Thu, 17 Oct 2024 00:00:01 -0400
Subject: [PATCH 17/22] Add docstrings for CurationRelevance and re-order TSV
 file

---
 src/bioregistry/curation/literature.py  |  9 +++++-
 src/bioregistry/data/curated_papers.tsv | 42 ++++++++++++-------------
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/bioregistry/curation/literature.py b/src/bioregistry/curation/literature.py
index 10fb0e227..17478b8b7 100644
--- a/src/bioregistry/curation/literature.py
+++ b/src/bioregistry/curation/literature.py
@@ -13,10 +13,10 @@
     "pmid",
     "relevant",
     "relevancy_type",
+    "pr_added",  # links back to the PR where curations were done
     "orcid",
     "date_curated",
     "notes",
-    "pr_added",  # links back to the PR where curations were done
 ]
 
 
@@ -25,12 +25,19 @@ class CurationRelevance(str, enum.Enum):
 
     #: A resource for new primary identifiers
     new_prefix = enum.auto()
+    #: A resolver for existing identifiers
     new_provider = enum.auto()
+    #: A new publication for an existing prefix
     new_publication = enum.auto()
+    #: A database, but not for identifier information
     not_identifiers_resource = enum.auto()
+    #: Paper suggestive of a new database, but no link to website provided
     no_website = enum.auto()
+    #: An existing entry in the bioregistry
     existing = enum.auto()
+    #: Not clear how to curate in the bioregistry, follow up discussion required
     unclear = enum.auto()
+    #: Completely unrelated information
     irrelevant_other = enum.auto()
 
 
diff --git a/src/bioregistry/data/curated_papers.tsv b/src/bioregistry/data/curated_papers.tsv
index 6388d4ecd..f956d1d87 100644
--- a/src/bioregistry/data/curated_papers.tsv
+++ b/src/bioregistry/data/curated_papers.tsv
@@ -1,21 +1,21 @@
-pmid	relevant	relevancy_type	notes	pr_added	orcid	date_curated
-39104285	1	new_provider	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species	1193	0009-0009-5240-7463	2024-09-24
-39074139	1	new_provider	Resolver for PDB IDs	1193	0009-0009-5240-7463	2024-09-24
-39014503	0	no_website		1193	0009-0009-5240-7463	2024-09-25
-39047988	0	irrelevant_other			0009-0009-5240-7463	2024-09-25
-39115390	0	irrelevant_other			0009-0009-5240-7463	2024-09-26
-39095357	0	irrelevant_other			0009-0009-5240-7463	2024-09-26
-39084442	0	not_identifiers_resource			0009-0009-5240-7463	2024-09-27
-38991851	1	unclear	identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry		0009-0009-5240-7463	2024-09-28
-38991828	0	irrelevant_other			0009-0009-5240-7463	2024-09-28
-39049520	0	not_identifiers_resource			0009-0009-5240-7463	2024-09-30
-39104826	1	existing	Already present in the bioregistry as a provider for mesh prefix		0009-0009-5240-7463	2024-10-01
-39050757	0	irrelevant_other			0009-0009-5240-7463	2024-10-01
-39064021	0	irrelevant_other			0009-0009-5240-7463	2024-10-01
-39028894	0	not_identifiers_resource			0009-0009-5240-7463	2024-10-04
-39044201	0	not_identifiers_resource	Potential resource for rare diseases identifiers, but not identifier information		0009-0009-5240-7463	2024-10-04
-39088253	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
-39119155	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
-39005357	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
-39044130	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
-39010878	0	irrelevant_other			0009-0009-5240-7463	2024-10-05
+pmid	relevant	relevancy_type	pr_added	orcid	date_curated	notes
+39104285	1	new_provider	1193	0009-0009-5240-7463	2024-09-24	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species
+39074139	1	new_provider	1193	0009-0009-5240-7463	2024-09-24	Resolver for PDB IDs
+39014503	0	no_website		0009-0009-5240-7463	2024-09-25
+39047988	0	irrelevant_other		0009-0009-5240-7463	2024-09-25
+39115390	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
+39095357	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
+39084442	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-27
+38991851	1	unclear	1194	0009-0009-5240-7463	2024-09-28	identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry
+38991828	0	irrelevant_other		0009-0009-5240-7463	2024-09-28
+39049520	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-30
+39104826	1	existing		0009-0009-5240-7463	2024-10-01	Already present in the bioregistry as a provider for mesh prefix
+39050757	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
+39064021	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
+39028894	0	not_identifiers_resource		0009-0009-5240-7463	2024-10-04
+39044201	0	not_identifiers_resource		0009-0009-5240-7463	2024-10-04	Potential resource for rare diseases identifiers, but not identifier information
+39088253	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39119155	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39005357	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39044130	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+39010878	0	irrelevant_other		0009-0009-5240-7463	2024-10-05

From 462f7f86feb8b7559dd16f933dffffee33d71564 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Thu, 17 Oct 2024 13:03:54 -0400
Subject: [PATCH 18/22] Handle empty inputs for notes and pr_added fields

---
 src/bioregistry/curation/literature.py  |  4 +--
 src/bioregistry/data/curated_papers.tsv | 42 ++++++++++++-------------
 tests/test_curated_papers.py            | 11 +++++--
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/src/bioregistry/curation/literature.py b/src/bioregistry/curation/literature.py
index 17478b8b7..19a99a9b9 100644
--- a/src/bioregistry/curation/literature.py
+++ b/src/bioregistry/curation/literature.py
@@ -12,10 +12,10 @@
 COLUMNS = [
     "pmid",
     "relevant",
-    "relevancy_type",
-    "pr_added",  # links back to the PR where curations were done
     "orcid",
     "date_curated",
+    "relevancy_type",
+    "pr_added",  # links back to the PR where curations were done
     "notes",
 ]
 
diff --git a/src/bioregistry/data/curated_papers.tsv b/src/bioregistry/data/curated_papers.tsv
index f956d1d87..68b964311 100644
--- a/src/bioregistry/data/curated_papers.tsv
+++ b/src/bioregistry/data/curated_papers.tsv
@@ -1,21 +1,21 @@
-pmid	relevant	relevancy_type	pr_added	orcid	date_curated	notes
-39104285	1	new_provider	1193	0009-0009-5240-7463	2024-09-24	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species
-39074139	1	new_provider	1193	0009-0009-5240-7463	2024-09-24	Resolver for PDB IDs
-39014503	0	no_website		0009-0009-5240-7463	2024-09-25
-39047988	0	irrelevant_other		0009-0009-5240-7463	2024-09-25
-39115390	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
-39095357	0	irrelevant_other		0009-0009-5240-7463	2024-09-26
-39084442	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-27
-38991851	1	unclear	1194	0009-0009-5240-7463	2024-09-28	identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry
-38991828	0	irrelevant_other		0009-0009-5240-7463	2024-09-28
-39049520	0	not_identifiers_resource		0009-0009-5240-7463	2024-09-30
-39104826	1	existing		0009-0009-5240-7463	2024-10-01	Already present in the bioregistry as a provider for mesh prefix
-39050757	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
-39064021	0	irrelevant_other		0009-0009-5240-7463	2024-10-01
-39028894	0	not_identifiers_resource		0009-0009-5240-7463	2024-10-04
-39044201	0	not_identifiers_resource		0009-0009-5240-7463	2024-10-04	Potential resource for rare diseases identifiers, but not identifier information
-39088253	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39119155	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39005357	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39044130	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
-39010878	0	irrelevant_other		0009-0009-5240-7463	2024-10-05
+pmid	relevant	orcid	date_curated	relevancy_type	pr_added	notes
+39104285	1	0009-0009-5240-7463	2024-09-24	new_provider	1193	Provider for UniProt IDs, issue with curation due to multiple URI formats depending on plant species
+39074139	1	0009-0009-5240-7463	2024-09-24	new_provider	1193	Resolver for PDB IDs
+39014503	0	0009-0009-5240-7463	2024-09-25	no_website		
+39047988	0	0009-0009-5240-7463	2024-09-25	irrelevant_other		
+39115390	0	0009-0009-5240-7463	2024-09-26	irrelevant_other		
+39095357	0	0009-0009-5240-7463	2024-09-26	irrelevant_other		
+39084442	0	0009-0009-5240-7463	2024-09-27	not_identifiers_resource		
+38991851	1	0009-0009-5240-7463	2024-09-28	unclear		identifiers for sharing, retrieving, and validating sample metadata. Unclear if this should be curated as a prefix, provider or a separate registry
+38991828	0	0009-0009-5240-7463	2024-09-28	irrelevant_other		
+39049520	0	0009-0009-5240-7463	2024-09-30	not_identifiers_resource		
+39104826	1	0009-0009-5240-7463	2024-10-01	existing		Already present in the bioregistry as a provider for mesh prefix
+39050757	0	0009-0009-5240-7463	2024-10-01	irrelevant_other		
+39064021	0	0009-0009-5240-7463	2024-10-01	irrelevant_other		
+39028894	0	0009-0009-5240-7463	2024-10-04	not_identifiers_resource		
+39044201	0	0009-0009-5240-7463	2024-10-04	not_identifiers_resource		Potential resource for rare diseases identifiers, but not identifier information
+39088253	0	0009-0009-5240-7463	2024-10-05	irrelevant_other		
+39119155	0	0009-0009-5240-7463	2024-10-05	irrelevant_other		
+39005357	0	0009-0009-5240-7463	2024-10-05	irrelevant_other		
+39044130	0	0009-0009-5240-7463	2024-10-05	irrelevant_other		
+39010878	0	0009-0009-5240-7463	2024-10-05	irrelevant_other		
diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 7d095e22c..93bf6a824 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -24,7 +24,10 @@ def validate_row(self, row):
             self.assertIn(field, row)
 
         self.assertTrue(row["pmid"].isdigit(), msg="PubMed identifier should be an integer")
-        self.assertTrue(row["pr_added"].isdigit(), msg="Pull Request should be an integer")
+
+        # Allow pr_added to be empty
+        if row["pr_added"]:
+            self.assertTrue(row["pr_added"].isdigit(), msg="Pull Request should be an integer")
 
         # Validate relevant is 0 or 1
         self.assertIn(row["relevant"], ["0", "1"])
@@ -49,8 +52,10 @@ def validate_row(self, row):
 
         self.assertRegex(row["orcid"], ORCID_PATTERN)
 
-        self.assertFalse(row["notes"].startswith('"'))
-        self.assertFalse(row["notes"].endswith('"'))
+        # Handle None values values for notes
+        if row["notes"] is not None:
+            self.assertFalse(row["notes"].startswith('"'))
+            self.assertFalse(row["notes"].endswith('"'))
 
         # Validate date_curated format
         try:

From 4948f4623c1fc4d1d64b65fb134acede371e0fea Mon Sep 17 00:00:00 2001
From: "Benjamin M. Gyori" <ben.gyori@gmail.com>
Date: Thu, 17 Oct 2024 14:40:23 -0400
Subject: [PATCH 19/22] Fix typo

---
 tests/test_curated_papers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index 93bf6a824..ab515d532 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -52,7 +52,7 @@ def validate_row(self, row):
 
         self.assertRegex(row["orcid"], ORCID_PATTERN)
 
-        # Handle None values values for notes
+        # Handle None values for notes
         if row["notes"] is not None:
             self.assertFalse(row["notes"].startswith('"'))
             self.assertFalse(row["notes"].endswith('"'))

From 0169e361d4a4abe5e048d3ab38a3e805e1a836a2 Mon Sep 17 00:00:00 2001
From: Mufaddal Naguthanawala <m.nguthana@hotmail.com>
Date: Sat, 19 Oct 2024 15:14:14 -0400
Subject: [PATCH 20/22] add publication info for FURNA

---
 src/bioregistry/data/bioregistry.json | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 1fa35f2ba..b47bf18a9 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -85850,6 +85850,15 @@
         "description": "FURNA (Functions of RNAs) is a database of ligand-RNA interactions and Gene Ontology annotations for RNAs in the Protein Data Bank (PDB).",
         "homepage": "https://seq2fun.dcmb.med.umich.edu/furna/",
         "name": "furna",
+        "publications": [
+          {
+            "doi": "10.1371/journal.pbio.3002476",
+            "pmc": "PMC11309384",
+            "pubmed": "39074139",
+            "title": "FURNA: A database for functional annotations of RNA structures",
+            "year": 2024
+          }
+        ],
         "uri_format": "https://seq2fun.dcmb.med.umich.edu/furna/pdb.cgi?pdbid=$1"
       }
     ],

From 98eb707ec35c36222fbcf6fe5d7e56b8658c78c5 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 19 Oct 2024 22:23:23 +0200
Subject: [PATCH 21/22] Update test_curated_papers.py

---
 tests/test_curated_papers.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_curated_papers.py b/tests/test_curated_papers.py
index ab515d532..f646df47e 100644
--- a/tests/test_curated_papers.py
+++ b/tests/test_curated_papers.py
@@ -1,12 +1,9 @@
-# -*- coding: utf-8 -*-
-
 """Test for checking the integrity of the curated_papers TSV file."""
 
 import csv
 import unittest
 from datetime import datetime
 
-# import bioregistry
 from bioregistry.constants import CURATED_PAPERS_PATH, ORCID_PATTERN
 from bioregistry.curation.literature import COLUMNS, CurationRelevance
 

From 269204ba2dadde2be0bc7a2e45445717117b771f Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 19 Oct 2024 22:23:46 +0200
Subject: [PATCH 22/22] Update literature.py

---
 src/bioregistry/curation/literature.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/bioregistry/curation/literature.py b/src/bioregistry/curation/literature.py
index 19a99a9b9..04d6c13a2 100644
--- a/src/bioregistry/curation/literature.py
+++ b/src/bioregistry/curation/literature.py
@@ -2,8 +2,6 @@
 
 import enum
 
-import click
-
 __all__ = [
     "CurationRelevance",
     "COLUMNS",
@@ -39,13 +37,3 @@ class CurationRelevance(str, enum.Enum):
     unclear = enum.auto()
     #: Completely unrelated information
     irrelevant_other = enum.auto()
-
-
-@click.command()
-def main():
-    """Import data from the literature curation into the Bioregistry."""
-    raise NotImplementedError
-
-
-if __name__ == "__main__":
-    main()