-
Notifications
You must be signed in to change notification settings - Fork 3
/
content.js
194 lines (153 loc) · 7.1 KB
/
content.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// 100 chs before it
let windowHref = window.location.href;
// remove any trailing #
let windowHrefSplit = windowHref.split('#');
let windowHrefNoHash = windowHrefSplit[0];
document.addEventListener('mouseup', function(event) {
// If the mouseup event is happening on the 'X' button, just return
if (event.target.id === 'closeDrawer') {
return;
}
var selection = window.getSelection();
var selectedText = selection.toString();
if (selectedText) {
var parentNode = selection.anchorNode.parentNode;
var parentText = parentNode.textContent;
var startIndex = parentText.indexOf(selectedText);
var precedingIndex = Math.max(0, startIndex - 100);
var precedingText = parentText.substring(precedingIndex, startIndex).trim();
handleSelection(windowHrefNoHash, selectedText, precedingText);
}
});
// wait until the DOM is fully loaded
if (document.readyState !== 'loading') {
scrapeMethodsAndRefs();
} else {
document.addEventListener('DOMContentLoaded', scrapeMethodsAndRefs);
}
// scrape methods and refs in advance
async function scrapeMethodsAndRefs() {
let windowHref = window.location.href;
// remove any trailing #
let windowHrefSplit = windowHref.split('#');
let windowHrefNoHash = windowHrefSplit[0];
const pubmedAPIlink = "https://pubmed-api-q1u2.onrender.com";
let preScrapedRefs = null;
// scrape the data
if (windowHref.includes("nature.com")) {
console.log("Pre scraping")
const allMethods = await NaturePreScrapeMethods(windowHrefNoHash);
const allRefs = await NaturePreScrapeRef(windowHrefNoHash);
console.log("PreScraped Methods:",allMethods);
console.log("Prescraped Refs:",allRefs);
preScrapedRefs = allRefs;
localStorage.setItem('eprMethods', JSON.stringify(allMethods));
localStorage.setItem('eprRefs', JSON.stringify(allRefs));
} else if (windowHref.includes("science.org")) {
const allRefs = await SciencePreScrapeRef(windowHrefNoHash);
console.log("Starting to Scrape in Science")
console.log("Prescraped Refs:",allRefs)
preScrapedRefs = allRefs;
localStorage.setItem('eprRefs', JSON.stringify(allRefs));
}
if (preScrapedRefs != null) {
// Create an object that defines the HTTP method, headers, and body
let requestOptions = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(preScrapedRefs) // Convert the array to JSON
};
// Send a POST request
fetch(`${pubmedAPIlink}/prescrape`, requestOptions)
.then(response => response.json()) // If response is in json format
.then(data => {
localStorage.setItem('eprPrescrapedAbstracts', JSON.stringify(data));
console.log('prescraped abstracts',data);
}) // Print the response data
.catch((error) => {
console.error('Error:', error); // Catch and print errors, if any
});
}
}
// ---- Functions that Pre-scrape Methods and Refs
// (Somehow can't add them in a seperate file due to early loading?)
// scrape methods from a nature paper
async function NaturePreScrapeMethods(url) {
try {
const response = await fetch(url);
const html = await response.text();
let parser = new DOMParser();
let doc = parser.parseFromString(html, 'text/html');
let methodsSection = doc.querySelector('section[data-title="Methods"]');
let methodTitles = methodsSection.querySelectorAll('h3');
let methods = Array.from(methodTitles).map((titleElement) => {
let contentElements = [];
let sibling = titleElement.nextElementSibling;
while (sibling && sibling.tagName !== 'H3') {
contentElements.push(sibling.outerHTML);
sibling = sibling.nextElementSibling;
}
return {
title: titleElement.textContent,
content: contentElements.join('\n')
};
});
return methods;
} catch (err) {
console.log('In showFig: Failed to fetch page: ', err);
}
}
// extract Ref from link
async function NaturePreScrapeRef(url) {
// Fetch HTML content from the given URL
const response = await fetch(url);
const html = await response.text();
// Use DOMParser to convert the HTML string to a Document object
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// Query the document for the list items that contain the references
const items = doc.querySelectorAll('.c-article-references__item');
// Map each item to an object that contains the extracted information
const references = Array.from(items).map(item => {
const title = item.querySelector('.c-article-references__text').textContent;
const pubMedLinkElement = item.querySelector('a[data-track-action="pubmed reference"]');
const googleScholarLinkElement = item.querySelector('a[data-track-action="google scholar reference"]');
const casLinkElement = item.querySelector('a[data-track-action="cas reference"]');
const articleLinkElement = item.querySelector('a[data-track-action="article reference"]');
return {
Title: title,
PubMedLink: pubMedLinkElement ? pubMedLinkElement.href : "",
GoogleScholarLink: googleScholarLinkElement ? googleScholarLinkElement.href : "",
CASLink: casLinkElement ? casLinkElement.href : "",
ArticleLink: articleLinkElement ? articleLinkElement.href : "",
};
});
return references;
}
// extract all Refs
async function SciencePreScrapeRef(url) {
// Fetch HTML content from the given URL
const response = await fetch(url);
const html = await response.text();
// Use DOMParser to convert the HTML string to a Document object
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// Query the document for the list items that contain the references
const items = doc.querySelectorAll('div[role="listitem"]');
// Map each item to an object that contains the extracted information
const references = Array.from(items).map(item => {
const title = item.querySelector('.citation-content').innerHTML;
const crossrefLinkElement = item.querySelector('.core-xlink-crossref a');
const pubMedLinkElement = item.querySelector('.core-xlink-pubmed a');
const isiLinkElement = item.querySelector('.core-xlink-isi a');
const googleScholarLinkElement = item.querySelector('.core-xlink-google-scholar a');
return {
Title: title,
ArticleLink: crossrefLinkElement ? crossrefLinkElement.href : "",
PubMedLink: pubMedLinkElement ? pubMedLinkElement.href : "",
CASLink: "", // As there was no CAS link in the given format
GoogleScholarLink: googleScholarLinkElement ? googleScholarLinkElement.href : "",
};
});
return references;
}