-
-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathbackup.mjs
367 lines (322 loc) · 13 KB
/
backup.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
import { execSync, spawn } from "child_process";
import { existsSync, mkdtempSync, rmSync, statSync } from "fs";
import { MongoClient, ObjectId } from "mongodb";
import os from "os";
import path from "path";
import net from "net";
// Expected arguments: first arg is project ID (5dbf805650b51914727e06c4) or URL (http://localhost:8080/app/lexicon/5dbf805650b51914727e06c4)
// Second arg is "qa" or "staging" to copy from staging, "live" or "prod" or "production" to copy from production
// NOTE: You must edit the context names below if they don't match the context names you have (see `kubectl config get-contexts` output)
// A MONGO_PASS env var must be available for the selected environment
// ===== EDIT THIS =====
const stagingContext = "dallas-rke";
const prodContext = "aws-rke";
// ===== END of EDIT THIS =====
let defaultContext = stagingContext;
let defaultContextName = "staging";
// Create a temp dir reliably
const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-"))
// Work around kubectl bug where Windows drive letters are interpreted as pod names by kubectl cp
.replace(/^C:\\/, "\\\\localhost\\C$\\");
let portForwardProcess;
let localConn;
let remoteConn;
let remoteTarball = undefined;
let remotePodname = undefined;
async function cleanup() {
try {
if (existsSync(tempdir)) {
console.warn(`Cleaning up temporary directory ${tempdir}...`);
rmSync(tempdir, { recursive: true, force: true });
}
} catch (_) {}
try {
if (remotePodname && remoteTarball) {
console.warn(`Cleaning up assets tarball from remote side...`);
execSync(
`kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- rm -f ${remoteTarball}`,
);
}
} catch (_) {}
try {
if (localConn) await localConn.close();
} catch (_) {}
try {
if (remoteConn) await remoteConn.close();
} catch (_) {}
try {
if (portForwardProcess) await portForwardProcess.kill();
} catch (_) {}
}
async function randomFreePort() {
return new Promise((resolve) => {
const server = net.createServer();
server.listen(0, () => {
// Asking for port 0 makes Node automatically find a free port
const port = server.address().port;
server.close((_) => resolve(port));
});
});
}
process.on("exit", cleanup);
process.on("uncaughtExceptionMonitor", cleanup);
function run(cmd) {
return execSync(cmd).toString().trimEnd();
}
function getContexts() {
var stdout = run("kubectl config get-contexts -o name");
return stdout.split("\n");
}
function reallyExists(name) {
// Sometimes the audio and/or pictures folders in assets are symlinks, and sometimes they're broken symlinks
// This returns true if the name is a real file/directory *or* a symlink with a valid target, or false if it doesn't exist or is broken
const result = execSync(
`kubectl --context=${context} --namespace=languageforge exec -c app pod/${remotePodname} -- sh -c "readlink -eq ${name} >/dev/null && echo yes || echo no"`,
)
.toString()
.trimEnd();
if (result === "yes") return true;
if (result === "no") return false;
throw new Error(`Unexpected result from readlink ${name}: ${result}`);
}
// Sanity check
var contexts = getContexts();
if (!contexts.includes(stagingContext)) {
console.warn("Staging context not found. Tried", stagingContext, "but did not find it in", contexts);
console.warn("Might need to edit the top level of this file and try again");
process.exit(1);
}
if (!contexts.includes(prodContext)) {
console.warn("Prod context not found. Tried", prodContext, "but did not find it in", contexts);
console.warn("Might need to edit the top level of this file and try again");
process.exit(1);
}
// Process args
if (process.argv.length < 3) {
console.warn("Please pass project ID or URL as argument, e.g. node backup.mjs 5dbf805650b51914727e06c4");
process.exit(2);
}
const mongoPass = process.env.MONGO_PASS;
if (!mongoPass) {
console.warn("Please provide a MongoDB password in the MONGO_PASS environment variable");
process.exit(2);
}
let projId;
const arg = process.argv[2];
if (URL.canParse(arg)) {
const url = new URL(arg);
if (url.pathname.startsWith("/app/lexicon/")) {
projId = url.pathname.substring("/app/lexicon/".length).split("/")[0];
} else {
projId = url.pathname; // Will probably fail, but worth a try
}
} else {
projId = arg;
}
let context = defaultContext;
let contextName = defaultContextName;
if (process.argv.length > 3) {
const env = process.argv[3];
switch (env) {
case "qa":
context = stagingContext;
contextName = "staging";
break;
case "staging":
context = stagingContext;
contextName = "staging";
break;
case "live":
context = prodContext;
contextName = "production";
break;
case "prod":
context = prodContext;
contextName = "production";
break;
case "production":
context = prodContext;
contextName = "production";
break;
default:
console.warn(`Unknown environment ${env}`);
console.warn(`Valid values are qa, staging, live, prod, or production`);
process.exit(2);
}
} else {
console.warn("No environment selected. Defaulting to staging environment.");
console.warn('Pass "prod" or "production" as second arg to copy projects from production envrionment instead.');
}
projId = projId.trim();
console.warn(`Fetching project with ID ${projId} from ${contextName} context, named "${context}"`);
console.warn("If that looks wrong, hit Ctrl+C right NOW!");
console.warn();
console.warn("Pausing for 2 seconds to give you time to hit Ctrl+C...");
await new Promise((resolve) => setTimeout(resolve, 2000));
// Start running
console.warn("Setting up kubectl port forwarding for remote Mongo...");
const remoteMongoPort = await randomFreePort();
let portForwardingReady;
const portForwardingPromise = new Promise((resolve) => {
portForwardingReady = resolve;
});
portForwardProcess = spawn(
"kubectl",
[`--context=${context}`, "--namespace=languageforge", "port-forward", "deploy/db", `${remoteMongoPort}:27017`],
{
stdio: "pipe",
},
);
portForwardProcess.stdout.on("data", (data) => {
portForwardingReady();
});
portForwardProcess.stderr.on("data", (data) => {
console.warn("Port forwarding failed:");
console.warn(data.toString());
console.warn("Exiting");
process.exit(1);
});
console.warn("Setting up local Mongo connection...");
const localMongoPort = run("docker compose port db 27017").split(":")[1];
const localConnStr = `mongodb://admin:pass@localhost:${localMongoPort}/?authSource=admin`;
localConn = await MongoClient.connect(localConnStr);
const localAdmin = await localConn.db("scriptureforge").collection("users").findOne({ username: "admin" });
const adminId = localAdmin._id.toString();
console.log(`Local admin ID: ${adminId}`);
console.warn("If that doesn't look right, hit Ctrl+C NOW");
await portForwardingPromise;
console.warn("Port forwarding is ready. Setting up remote Mongo connection...");
const remoteConnStr = `mongodb://admin:${mongoPass}@localhost:${remoteMongoPort}`;
remoteConn = await MongoClient.connect(remoteConnStr);
console.warn("Remote Mongo connection established. Fetching project record...");
// Get project record
const project = await remoteConn
.db("scriptureforge")
.collection("projects")
.findOne({ _id: new ObjectId(projId) });
console.log("Project code:", project.projectCode);
const dbname = `sf_${project.projectCode}`;
project.users = { [adminId]: { role: "project_manager" } };
project.ownerRef = new ObjectId(adminId);
// Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that
console.warn(`Copying ${dbname} database...`);
const collections = await remoteConn.db(dbname).collections();
for (const remoteColl of collections) {
const name = remoteColl.collectionName;
console.log(` Copying ${name} collection...`);
const indexes = await remoteColl.indexes();
const cursor = remoteColl.find();
const docs = await cursor.toArray();
const localColl = await localConn.db(dbname).collection(name);
try {
await localColl.drop();
} catch (_) {} // Throws if collection doesn't exist, which is fine
try {
await localColl.dropIndexes();
} catch (_) {} // Throws if collection doesn't exist, which is fine
if (indexes?.length) await localColl.createIndexes(indexes);
if (docs?.length) await localColl.insertMany(docs);
console.log(` ${docs.length} documents copied`);
}
console.warn(`${dbname} database successfully copied`);
// Copy project record after its database has been copied, so there's never a race condition where the project exists but its entry database doesn't
console.warn("Copying project record...");
await localConn
.db("scriptureforge")
.collection("projects")
.findOneAndReplace({ _id: new ObjectId(projId) }, project, { upsert: true });
// NOTE: mongodump/mongorestore approach below can be revived once Kubernetes 1.30 is installed on client *and* server, so kubectl exec is finally reliable
// console.warn(`About to try fetching ${dbname} database from remote, will retry until success`);
// let done = false;
// while (!done) {
// try {
// console.warn(`Fetching ${dbname} database...`);
// execSync(
// `kubectl --context="${context}" --namespace=languageforge exec -i deploy/db -- mongodump --archive -d "${dbname}" > ${tempdir}/dump`,
// );
// console.warn(`Uploading to local ${dbname} database...`);
// execSync(`docker exec -i lf-db mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`);
// console.warn(`Successfully uploaded ${dbname} database`);
// done = true;
// } catch (err) {
// console.warn("mongodump failed, retrying...");
// }
// }
console.warn("Getting name of remote app pod...");
remotePodname = run(
`kubectl --context="${context}" --namespace=languageforge get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`,
);
console.warn("Checking that remote assets really exist...");
const includeAudio = reallyExists(`/var/www/html/assets/lexicon/${dbname}/audio`);
const includePictures = reallyExists(`/var/www/html/assets/lexicon/${dbname}/pictures`);
console.log(`Copy audio? ${includeAudio ? "yes" : "no"}`);
console.log(`Copy pictures? ${includePictures ? "yes" : "no"}`);
const filesNeeded = [];
if (includeAudio) {
filesNeeded.push("audio");
}
if (includePictures) {
filesNeeded.push("pictures");
}
if (filesNeeded.length === 0) {
console.warn("Project has no assets. Copy complete.");
process.exit(0);
}
const tarTargets = filesNeeded.join(" ");
console.warn("Creating assets tarball in remote...");
remoteTarball = `/tmp/assets-${dbname}.tar`;
execSync(
`kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- tar chf ${remoteTarball} --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" ${tarTargets}`,
);
const sizeStr = run(
`kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- sh -c "ls -l ${remoteTarball} | cut -d' ' -f5"`,
);
const correctSize = +sizeStr;
console.warn(`Asserts tarball size is ${sizeStr}`);
console.warn("Trying to fetch assets tarball with kubectl cp...");
let failed = false;
try {
execSync(
`kubectl --context="${context}" --namespace=languageforge cp ${remotePodname}:${remoteTarball} ${tempdir}/assets-${dbname}.tar`,
);
} catch (_) {
console.warn("kubectl cp failed. Will try to continue with rsync...");
failed = true;
}
if (!failed) {
const localSize = statSync(`${tempdir}/assets-${dbname}.tar`).size;
if (localSize < correctSize) {
console.warn(`Got only ${localSize} bytes instead of ${correctSize}. Will try to continue with rsync...`);
failed = true;
}
}
if (failed) {
console.warn("Ensuring rsync exists in target container...");
execSync(
`kubectl exec --context="${context}" -c app pod/${remotePodname} -- bash -c "which rsync || (apt update && apt install rsync -y)"`,
);
console.warn("\n===== IMPORTANT NOTE =====");
console.warn(
"The rsync transfer may (probably will) stall at 100%. You'll have to find the rsync process and kill it. Sorry about that.",
);
console.warn("===== IMPORTANT NOTE =====\n");
let done = false;
while (!done) {
try {
execSync(
`rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} --namespace=languageforge exec -i -c app pod/${remotePodname} -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`,
{ stdio: "inherit" }, // Allows us to see rsync progress
);
done = true;
} catch (err) {
console.warn(`Rsync failed with error: ${err}. Retrying...`);
}
}
}
console.warn("Uploading assets tarball to local...");
execSync(
`docker exec lf-app mkdir -p "/var/www/html/assets/lexicon/${dbname}" ; docker exec lf-app chown www-data:www-data "/var/www/html/assets/lexicon/${dbname}" || true`,
);
execSync(`docker cp - lf-app:/var/www/html/assets/lexicon/${dbname}/ < ${tempdir}/assets-${dbname}.tar`);
console.warn("Assets successfully uploaded");
process.exit(0);