Skip to content

Commit

Permalink
Merge pull request #49 from hlqviet/refactor/domain-processing
Browse files Browse the repository at this point in the history
Further enhance the domain processing code
  • Loading branch information
mrrfv authored Nov 19, 2023
2 parents c91aea1 + 7189599 commit 23cab68
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 54 deletions.
64 changes: 28 additions & 36 deletions cf_list_create.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
extractDomain,
isComment,
isValidDomain,
memoize,
readFile,
} from "./lib/utils.js";

Expand All @@ -33,6 +34,7 @@ let processedDomainCount = 0;
let unnecessaryDomainCount = 0;
let duplicateDomainCount = 0;
let allowedDomainCount = 0;
const memoizedNormalizeDomain = memoize(normalizeDomain);

// Read allowlist
console.log(`Processing ${allowlistFilename}`);
Expand All @@ -43,7 +45,7 @@ await readFile(resolve(`./${allowlistFilename}`), (line) => {

if (isComment(_line)) return;

const domain = normalizeDomain(_line, true);
const domain = memoizedNormalizeDomain(_line, true);

if (!isValidDomain(domain)) return;

Expand All @@ -65,45 +67,36 @@ await readFile(resolve(`./${blocklistFilename}`), (line, rl) => {
if (isComment(_line)) return;

// Remove prefixes and suffixes in hosts, wildcard or adblock format
const domain = normalizeDomain(_line);
const domain = memoizedNormalizeDomain(_line);

// Check if it is a valid domain which is not a URL or does not contain
// characters like * in the middle of the domain
if (!isValidDomain(domain)) return;

processedDomainCount++;

if (allowlist.has(domain)) {
console.log(`Found ${domain} in allowlist - Skipping`);
allowedDomainCount++;
return;
}

if (blocklist.has(domain)) {
console.log(`Found ${domain} in blocklist already - Skipping`);
duplicateDomainCount++;
return;
}

// Get all the levels of the domain and check from the highest
// because we are blocking all subdomains
// Example: fourth.third.example.com => ["example.com", "third.example.com", "fourth.third.example.com"]
const anyDomainExists = extractDomain(domain)
.reverse()
.some((item) => {
if (blocklist.has(item)) {
if (item === domain) {
// The exact domain is already blocked
console.log(`Found ${item} in blocklist already - Skipping`);
duplicateDomainCount++;
} else {
// The higher-level domain is already blocked
// so it's not necessary to block this domain
console.log(
`Found ${item} in blocklist already - Skipping ${domain}`
);
unnecessaryDomainCount++;
}

return true;
}

return false;
});

if (anyDomainExists) return;
for (const item of extractDomain(domain).slice(1)) {
if (!blocklist.has(item)) continue;

if (allowlist.has(domain)) {
console.log(`Found ${domain} in allowlist - Skipping`);
allowedDomainCount++;
// The higher-level domain is already blocked
// so it's not necessary to block this domain
console.log(`Found ${item} in blocklist already - Skipping ${domain}`);
unnecessaryDomainCount++;
return;
}

Expand All @@ -124,8 +117,8 @@ console.log("\n\n");
console.log(`Number of processed domains: ${processedDomainCount}`);
console.log(`Number of duplicate domains: ${duplicateDomainCount}`);
console.log(`Number of unnecessary domains: ${unnecessaryDomainCount}`);
console.log(`Number of blocked domains: ${domains.length}`);
console.log(`Number of allowed domains: ${allowedDomainCount}`);
console.log(`Number of blocked domains: ${domains.length}`);
console.log(`Number of lists to be created: ${numberOfLists}`);
console.log("\n\n");

Expand All @@ -143,12 +136,11 @@ console.log("\n\n");

if (FAST_MODE) {
await createZeroTrustListsAtOnce(domains);
// TODO: make this less repetitive
await notifyWebhook(`CF List Create script finished running (${domains.length} domains, ${numberOfLists} lists)`);
return;
} else {
await createZeroTrustListsOneByOne(domains);
}

await createZeroTrustListsOneByOne(domains);

await notifyWebhook(`CF List Create script finished running (${domains.length} domains, ${numberOfLists} lists)`);
await notifyWebhook(
`CF List Create script finished running (${domains.length} domains, ${numberOfLists} lists)`
);
})();
1 change: 1 addition & 0 deletions download_lists.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const downloadLists = async (filename, urls) => {
} catch (err) {
console.error(`An error occurred while processing ${filename}:\n`, err);
console.error("URLs:\n", urls);
throw err;
}
};

Expand Down
6 changes: 6 additions & 0 deletions lib/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export const createZeroTrustListsOneByOne = async (items) => {
console.log(`Created "${listName}" list - ${totalListNumber} left`);
} catch (err) {
console.error(`Could not create "${listName}" - ${err.toString()}`);
throw err;
}
}
};
Expand All @@ -77,6 +78,7 @@ export const createZeroTrustListsAtOnce = async (items) => {
console.log("Created lists successfully");
} catch (err) {
console.error(`Error occurred while creating lists - ${err.toString()}`);
throw err;
}
};

Expand Down Expand Up @@ -106,6 +108,7 @@ export const deleteZeroTrustListsOneByOne = async (lists) => {
console.log(`Deleted ${name} list - ${totalListNumber} left`);
} catch (err) {
console.error(`Could not delete ${name} - ${err.toString()}`);
throw err;
}
}
};
Expand All @@ -124,6 +127,7 @@ export const deleteZeroTrustListsAtOnce = async (lists) => {
console.log("Deleted lists successfully");
} catch (err) {
console.error(`Error occurred while deleting lists - ${err.toString()}`);
throw err;
}
};

Expand Down Expand Up @@ -161,6 +165,7 @@ export const createZeroTrustRule = async (wirefilterExpression) => {
console.log("Created rule successfully");
} catch (err) {
console.error(`Error occurred while creating rule - ${err.toString()}`);
throw err;
}
};

Expand All @@ -180,5 +185,6 @@ export const deleteZeroTrustRule = async (id) => {
console.log("Deleted rule successfully");
} catch (err) {
console.error(`Error occurred while deleting rule - ${err.toString()}`);
throw err;
}
};
55 changes: 37 additions & 18 deletions lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ export const isValidDomain = (value) =>
* @param {string} domain The domain to be extracted.
* @returns {string[]}
*/
export const extractDomain = (domain) =>
domain.split(".").reduce((previous, current, index, array) => {
const nextIndex = index + 1;
export const extractDomain = (domain) => {
const parts = domain.split(".");
const extractedDomains = [];

if (nextIndex > array.length - 1) return previous;
for (let i = 0; i < parts.length; i++) {
const subdomains = parts.slice(i).join(".");

const domain = [current, ...array.slice(nextIndex)].join(".");

previous.push(domain);
extractedDomains.unshift(subdomains);
}

return previous;
}, []);
return extractedDomains;
};

/**
* Checks if the value is a comment.
Expand All @@ -49,19 +49,16 @@ export const isComment = (value) =>
* @param {string[]} urls The URLs to the files to be downloaded.
*/
export const downloadFiles = async (filePath, urls) => {
const writeStream = createWriteStream(filePath, { flags: "a" });
const responses = await Promise.all(urls.map((url) => fetch(url)));

for (const response of responses) {
const readable = ReadStream.from(response.body, {
autoDestroy: true,
});
const writeStream = createWriteStream(filePath, { flags: "a" });

readable.on("end", () => {
writeStream.write("\n");
});

readable.pipe(writeStream, { end: false });
ReadStream.from(response.body)
.on("end", () => {
writeStream.write("\n");
})
.pipe(writeStream);
}
};

Expand Down Expand Up @@ -90,5 +87,27 @@ export const readFile = async (filePath, onLine) => {
console.error(
`Error occurred while reading ${basename(filePath)} - ${err.toString()}`
);
throw err;
}
};

/**
* Memoizes a function
* @template T The argument type of the function.
* @template R The return type of the function.
* @param {(...fnArgs: T[]) => R} fn The function to be memoized.
*/
export const memoize = (fn) => {
const cache = new Map();

return (...args) => {
const key = args.join("-");

if (cache.has(key)) return cache.get(key);

const result = fn(...args);

cache.set(key, result);
return result;
};
};

0 comments on commit 23cab68

Please sign in to comment.