Refactor database composer code to be in line with other pages

This commit is contained in:
Yuri Sizov
2023-03-21 00:02:38 +01:00
parent a4760eab50
commit 1f75fdd2a1

View File

@@ -1,23 +1,13 @@
const fs = require('fs').promises; const fs = require('fs').promises;
const fsConstants = require('fs').constants; const fsConstants = require('fs').constants;
const path = require('path');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const teams = {};
const reviewers = {};
const authors = {};
const pulls = [];
const PULLS_PER_PAGE = 100;
let page_count = 1;
let last_cursor = "";
const ExitCodes = { const ExitCodes = {
"RequestFailure": 1, "RequestFailure": 1,
"ParseFailure": 2, "ParseFailure": 2,
}; };
const API_REPOSITORY_ID = `owner:"godotengine" name:"godot"`; const PULLS_PER_PAGE = 100;
const API_RATE_LIMIT = ` const API_RATE_LIMIT = `
rateLimit { rateLimit {
limit limit
@@ -36,184 +26,179 @@ const GH_MAGIC_KEYWORDS = [
const GH_MAGIC_RE = RegExp("(" + GH_MAGIC_KEYWORDS.join("|") + ") ([a-z0-9-_]+/[a-z0-9-_]+)?#([0-9]+)", "gi"); const GH_MAGIC_RE = RegExp("(" + GH_MAGIC_KEYWORDS.join("|") + ") ([a-z0-9-_]+/[a-z0-9-_]+)?#([0-9]+)", "gi");
const GH_MAGIC_FULL_RE = RegExp("(" + GH_MAGIC_KEYWORDS.join("|") + ") https://github.com/([a-z0-9-_]+/[a-z0-9-_]+)/issues/([0-9]+)", "gi"); const GH_MAGIC_FULL_RE = RegExp("(" + GH_MAGIC_KEYWORDS.join("|") + ") https://github.com/([a-z0-9-_]+/[a-z0-9-_]+)/issues/([0-9]+)", "gi");
async function fetchGithub(query) { class DataFetcher {
const init = {}; constructor(data_owner, data_repo) {
init.method = "POST"; this.api_repository_id = `owner:"${data_owner}" name:"${data_repo}"`;
init.headers = {};
init.headers["Content-Type"] = "application/json"; this.page_count = 1;
init.headers["Accept"] = "application/vnd.github.merge-info-preview+json"; this.last_cursor = "";
if (process.env.GRAPHQL_TOKEN) {
init.headers["Authorization"] = `token ${process.env.GRAPHQL_TOKEN}`;
} else if (process.env.GITHUB_TOKEN) {
init.headers["Authorization"] = `token ${process.env.GITHUB_TOKEN}`;
} }
init.body = JSON.stringify({ async _logResponse(data, name) {
query,
});
return await fetch("https://api.github.com/graphql", init);
}
async function logResponse(data, name) {
try {
try { try {
await fs.access("logs", fsConstants.R_OK | fsConstants.W_OK); try {
await fs.access("logs", fsConstants.R_OK | fsConstants.W_OK);
} catch (err) {
await fs.mkdir("logs");
}
await fs.writeFile(`logs/${name}.json`, JSON.stringify(data, null, 4), {encoding: "utf-8"});
} catch (err) { } catch (err) {
await fs.mkdir("logs"); console.error("Error saving log file: " + err);
} }
await fs.writeFile(`logs/${name}.json`, JSON.stringify(data, null, 4), {encoding: "utf-8"});
} catch (err) {
console.error("Error saving log file: " + err);
} }
}
_handleResponseErrors(res) {
function handleResponseErrors(res) { console.warn(` Failed to get pull requests for '${this.api_repository_id}'; server responded with ${res.status} ${res.statusText}`);
console.warn(` Failed to get pull requests for '${API_REPOSITORY_ID}'; server responded with ${res.status} ${res.statusText}`); const retry_header = res.headers.get("Retry-After");
const retry_header = res.headers.get("Retry-After"); if (retry_header) {
if (retry_header) { console.log(` Retry after: ${retry_header}`);
console.log(` Retry after: ${retry_header}`);
}
}
function handleDataErrors(data) {
if (typeof data["errors"] === "undefined") {
return;
}
console.warn(` Server handled the request, but there were errors:`);
data.errors.forEach((item) => {
console.log(` [${item.type}] ${item.message}`);
});
}
function mapNodes(object) {
return object.edges.map((item) => item["node"])
}
function sluggifyTeam(name) {
let slug = name
.toLowerCase()
// Replace runs of non-alphanumerical characters with '-'; '_' is also allowed.
.replace(/[^0-9a-z_]+/g, "-")
// Trim trailing '-' characters.
.replace(/[-]+$/, "");
return slug;
}
async function checkRates() {
try {
const query = `
query {
${API_RATE_LIMIT}
} }
`; }
_handleDataErrors(data) {
if (typeof data["errors"] === "undefined") {
return;
}
console.warn(` Server handled the request, but there were errors:`);
data.errors.forEach((item) => {
console.log(` [${item.type}] ${item.message}`);
});
}
const res = await fetchGithub(query); async fetchGithub(query) {
if (res.status !== 200) { const init = {};
handleResponseErrors(res); init.method = "POST";
init.headers = {};
init.headers["Content-Type"] = "application/json";
init.headers["Accept"] = "application/vnd.github.merge-info-preview+json";
if (process.env.GRAPHQL_TOKEN) {
init.headers["Authorization"] = `token ${process.env.GRAPHQL_TOKEN}`;
} else if (process.env.GITHUB_TOKEN) {
init.headers["Authorization"] = `token ${process.env.GITHUB_TOKEN}`;
}
init.body = JSON.stringify({
query,
});
return await fetch("https://api.github.com/graphql", init);
}
async checkRates() {
try {
const query = `
query {
${API_RATE_LIMIT}
}
`;
const res = await this.fetchGithub(query);
if (res.status !== 200) {
this._handleResponseErrors(res);
process.exitCode = ExitCodes.RequestFailure;
return;
}
const data = await res.json();
await this._logResponse(data, "_rate_limit");
this._handleDataErrors(data);
const rate_limit = data.data["rateLimit"];
console.log(` [$${rate_limit.cost}] Available API calls: ${rate_limit.remaining}/${rate_limit.limit}; resets at ${rate_limit.resetAt}`);
} catch (err) {
console.error(" Error checking the API rate limits: " + err);
process.exitCode = ExitCodes.RequestFailure; process.exitCode = ExitCodes.RequestFailure;
return; return;
} }
const data = await res.json();
await logResponse(data, "_rate_limit");
handleDataErrors(data);
const rate_limit = data.data["rateLimit"];
console.log(` [$${rate_limit.cost}] Available API calls: ${rate_limit.remaining}/${rate_limit.limit}; resets at ${rate_limit.resetAt}`);
} catch (err) {
console.error(" Error checking the API rate limits: " + err);
process.exitCode = ExitCodes.RequestFailure;
return;
} }
}
async fetchPulls(page) {
async function fetchPulls(page) { try {
try { let after_cursor = "";
let after_cursor = ""; let after_text = "initial";
let after_text = "initial"; if (this.last_cursor !== "") {
if (last_cursor !== "") { after_cursor = `after: "${this.last_cursor}"`;
after_cursor = `after: "${last_cursor}"`; after_text = after_cursor;
after_text = after_cursor; }
}
// FIXME: mergeStateStatus for pullRequests is temporarily disabled as is seems to cause
// FIXME: mergeStateStatus for pullRequests is temporarily disabled as is seems to cause // a lot of 500 errors.
// a lot of 500 errors. const query = `
const query = ` query {
query { ${API_RATE_LIMIT}
${API_RATE_LIMIT} repository(${this.api_repository_id}) {
repository(${API_REPOSITORY_ID}) { pullRequests(first:${PULLS_PER_PAGE} ${after_cursor} states: OPEN) {
pullRequests(first:${PULLS_PER_PAGE} ${after_cursor} states: OPEN) { totalCount
totalCount pageInfo {
pageInfo { endCursor
endCursor hasNextPage
hasNextPage
}
edges {
node {
id
number
url
title
state
isDraft
mergeable
createdAt
updatedAt
bodyText
baseRef {
name
} }
edges {
author { node {
login
avatarUrl
url
... on User {
id id
} number
} url
title
milestone { state
id isDraft
title mergeable
url
} createdAt
updatedAt
labels (first: 100) {
edges { bodyText
node {
id baseRef {
name name
color
} }
}
} author {
login
reviewRequests(first: 100) { avatarUrl
edges { url
node {
id
requestedReviewer {
__typename
... on Team { ... on User {
id
}
}
milestone {
id
title
url
}
labels (first: 100) {
edges {
node {
id id
name name
avatarUrl color
} }
}
... on User { }
reviewRequests(first: 100) {
edges {
node {
id id
login requestedReviewer {
avatarUrl __typename
... on Team {
id
name
avatarUrl
}
... on User {
id
login
avatarUrl
}
}
} }
} }
} }
@@ -222,269 +207,307 @@ async function fetchPulls(page) {
} }
} }
} }
} `;
}
`; let page_text = page;
if (this.page_count > 1) {
let page_text = page; page_text = `${page}/${this.page_count}`;
if (page_count > 1) { }
page_text = `${page}/${page_count}`; console.log(` Requesting page ${page_text} of pull request data (${after_text}).`);
}
console.log(` Requesting page ${page_text} of pull request data (${after_text}).`); const res = await this.fetchGithub(query);
if (res.status !== 200) {
const res = await fetchGithub(query); this._handleResponseErrors(res);
if (res.status !== 200) { process.exitCode = ExitCodes.RequestFailure;
handleResponseErrors(res); return [];
}
const data = await res.json();
await this._logResponse(data, `data_page_${page}`);
this._handleDataErrors(data);
const rate_limit = data.data["rateLimit"];
const repository = data.data["repository"];
const pulls_data = mapNodes(repository.pullRequests);
console.log(` [$${rate_limit.cost}] Retrieved ${pulls_data.length} pull requests; processing...`);
this.last_cursor = repository.pullRequests.pageInfo.endCursor;
this.page_count = Math.ceil(repository.pullRequests.totalCount / PULLS_PER_PAGE);
return pulls_data;
} catch (err) {
console.error(" Error fetching pull request data: " + err);
process.exitCode = ExitCodes.RequestFailure; process.exitCode = ExitCodes.RequestFailure;
return []; return [];
} }
const data = await res.json();
await logResponse(data, `data_page_${page}`);
handleDataErrors(data);
const rate_limit = data.data["rateLimit"];
const repository = data.data["repository"];
const pulls_data = mapNodes(repository.pullRequests);
console.log(` [$${rate_limit.cost}] Retrieved ${pulls_data.length} pull requests; processing...`);
last_cursor = repository.pullRequests.pageInfo.endCursor;
page_count = Math.ceil(repository.pullRequests.totalCount / PULLS_PER_PAGE);
return pulls_data;
} catch (err) {
console.error(" Error fetching pull request data: " + err);
process.exitCode = ExitCodes.RequestFailure;
return [];
} }
} }
function processPulls(pullsRaw) { class DataProcessor {
try { constructor() {
pullsRaw.forEach((item) => { this.teams = {};
// Compile basic information about a PR. this.reviewers = {};
let pr = { this.authors = {};
"id": item.id, this.pulls = [];
"public_id": item.number, }
"url": item.url,
"diff_url": `${item.url}.diff`,
"patch_url": `${item.url}.patch`,
"title": item.title, _sluggifyTeam(name) {
"state": item.state, let slug = name
"is_draft": item.isDraft, .toLowerCase()
"authored_by": null, // Replace runs of non-alphanumerical characters with '-'; '_' is also allowed.
"created_at": item.createdAt, .replace(/[^0-9a-z_]+/g, "-")
"updated_at": item.updatedAt, // Trim trailing '-' characters.
.replace(/[-]+$/, "");
"target_branch": item.baseRef.name, return slug;
}
"mergeable_state": item.mergeable, _extractLinkedIssues(pullBody) {
"mergeable_reason": 'UNKNOWN', //item.mergeStateStatus, const links = [];
"labels": [], if (!pullBody) {
"milestone": null, return links;
"links": [], }
"teams": [], const matches = [
"reviewers": [], ...pullBody.matchAll(GH_MAGIC_RE),
}; ...pullBody.matchAll(GH_MAGIC_FULL_RE)
];
// Compose and link author information.
const author = { matches.forEach((item) => {
"id": "", let repository = item[2];
"user": "ghost", if (!repository) {
"avatar": "https://avatars.githubusercontent.com/u/10137?v=4", repository = "godotengine/godot";
"url": "https://github.com/ghost",
"pull_count": 0,
};
if (item.author != null) {
author["id"] = item.author.id;
author["user"] = item.author.login;
author["avatar"] = item.author.avatarUrl;
author["url"] = item.author.url;
} }
pr.authored_by = author.id;
const issue_number = item[3];
// Store the author if they haven't been stored. const issue_url = `https://github.com/${repository}/issues/${issue_number}`;
if (typeof authors[author.id] === "undefined") {
authors[author.id] = author; const exists = links.find((item) => {
return item.url === issue_url
});
if (exists) {
return;
} }
authors[author.id].pull_count++;
let keyword = item[1].toLowerCase();
if (keyword.startsWith("clo")) {
keyword = "closes";
} else if (keyword.startsWith("fix")) {
keyword = "fixes";
} else if (keyword.startsWith("reso")) {
keyword = "resolves";
}
links.push({
"full_match": item[0],
"keyword": keyword,
"repo": repository,
"issue": issue_number,
"url": issue_url,
});
});
return links;
}
// Add the milestone, if available. processPulls(pullsRaw) {
if (item.milestone) { try {
pr.milestone = { pullsRaw.forEach((item) => {
"id": item.milestone.id, // Compile basic information about a PR.
"title": item.milestone.title, let pr = {
"url": item.milestone.url, "id": item.id,
"public_id": item.number,
"url": item.url,
"diff_url": `${item.url}.diff`,
"patch_url": `${item.url}.patch`,
"title": item.title,
"state": item.state,
"is_draft": item.isDraft,
"authored_by": null,
"created_at": item.createdAt,
"updated_at": item.updatedAt,
"target_branch": item.baseRef.name,
"mergeable_state": item.mergeable,
"mergeable_reason": 'UNKNOWN', //item.mergeStateStatus,
"labels": [],
"milestone": null,
"links": [],
"teams": [],
"reviewers": [],
}; };
}
// Add labels, if available. // Compose and link author information.
let labels = mapNodes(item.labels); const author = {
labels.forEach((labelItem) => { "id": "",
pr.labels.push({ "user": "ghost",
"id": labelItem.id, "avatar": "https://avatars.githubusercontent.com/u/10137?v=4",
"name": labelItem.name, "url": "https://github.com/ghost",
"color": "#" + labelItem.color "pull_count": 0,
};
if (item.author != null) {
author["id"] = item.author.id;
author["user"] = item.author.login;
author["avatar"] = item.author.avatarUrl;
author["url"] = item.author.url;
}
pr.authored_by = author.id;
// Store the author if they haven't been stored.
if (typeof this.authors[author.id] === "undefined") {
this.authors[author.id] = author;
}
this.authors[author.id].pull_count++;
// Add the milestone, if available.
if (item.milestone) {
pr.milestone = {
"id": item.milestone.id,
"title": item.milestone.title,
"url": item.milestone.url,
};
}
// Add labels, if available.
let labels = mapNodes(item.labels);
labels.forEach((labelItem) => {
pr.labels.push({
"id": labelItem.id,
"name": labelItem.name,
"color": "#" + labelItem.color
});
});
pr.labels.sort((a, b) => {
if (a.name > b.name) return 1;
if (a.name < b.name) return -1;
return 0;
}); });
});
pr.labels.sort((a, b) => {
if (a.name > b.name) return 1;
if (a.name < b.name) return -1;
return 0;
});
// Look for linked issues in the body. // Look for linked issues in the body.
pr.links = extractLinkedIssues(item.body); pr.links = this._extractLinkedIssues(item.body);
// Extract requested reviewers. // Extract requested reviewers.
let review_requests = mapNodes(item.reviewRequests).map(it => it.requestedReviewer); let review_requests = mapNodes(item.reviewRequests).map(it => it.requestedReviewer);
// Add teams, if available. // Add teams, if available.
let requested_teams = review_requests.filter(it => it && it["__typename"] === "Team"); let requested_teams = review_requests.filter(it => it && it["__typename"] === "Team");
if (requested_teams.length > 0) { if (requested_teams.length > 0) {
requested_teams.forEach((teamItem) => { requested_teams.forEach((teamItem) => {
const team = {
"id": teamItem.id,
"name": teamItem.name,
"avatar": teamItem.avatarUrl,
"slug": this._sluggifyTeam(teamItem.name),
"pull_count": 0,
};
// Store the team if it hasn't been stored before.
if (typeof this.teams[team.id] == "undefined") {
this.teams[team.id] = team;
}
this.teams[team.id].pull_count++;
// Reference the team.
pr.teams.push(team.id);
});
} else {
// If there are no teams, use a fake "empty" team to track those PRs as well.
const team = { const team = {
"id": teamItem.id, "id": "",
"name": teamItem.name, "name": "No team assigned",
"avatar": teamItem.avatarUrl, "avatar": "",
"slug": sluggifyTeam(teamItem.name), "slug": "_",
"pull_count": 0, "pull_count": 0,
}; };
// Store the team if it hasn't been stored before. // Store the team if it hasn't been stored before.
if (typeof teams[team.id] == "undefined") { if (typeof this.teams[team.id] === "undefined") {
teams[team.id] = team; this.teams[team.id] = team;
} }
teams[team.id].pull_count++; this.teams[team.id].pull_count++;
// Reference the team. // Reference the team.
pr.teams.push(team.id); pr.teams.push(team.id);
});
} else {
// If there are no teams, use a fake "empty" team to track those PRs as well.
const team = {
"id": "",
"name": "No team assigned",
"avatar": "",
"slug": "_",
"pull_count": 0,
};
// Store the team if it hasn't been stored before.
if (typeof teams[team.id] === "undefined") {
teams[team.id] = team;
} }
teams[team.id].pull_count++;
// Reference the team. // Add individual reviewers, if available
pr.teams.push(team.id); let requested_reviewers = review_requests.filter(it => it && it["__typename"] === "User");
} if (requested_reviewers.length > 0) {
requested_reviewers.forEach((reviewerItem) => {
const reviewer = {
"id": reviewerItem.id,
"name": reviewerItem.login,
"avatar": reviewerItem.avatarUrl,
"slug": reviewerItem.login,
"pull_count": 0,
};
// Add individual reviewers, if available // Store the reviewer if it hasn't been stored before.
let requested_reviewers = review_requests.filter(it => it && it["__typename"] === "User"); if (typeof this.reviewers[reviewer.id] == "undefined") {
if (requested_reviewers.length > 0) { this.reviewers[reviewer.id] = reviewer;
requested_reviewers.forEach((reviewerItem) => { }
const reviewer = { this.reviewers[reviewer.id].pull_count++;
"id": reviewerItem.id,
"name": reviewerItem.login,
"avatar": reviewerItem.avatarUrl,
"slug": reviewerItem.login,
"pull_count": 0,
};
// Store the reviewer if it hasn't been stored before. // Reference the reviewer.
if (typeof reviewers[reviewer.id] == "undefined") { pr.reviewers.push(reviewer.id);
reviewers[reviewer.id] = reviewer; });
} }
reviewers[reviewer.id].pull_count++;
// Reference the reviewer. this.pulls.push(pr);
pr.reviewers.push(reviewer.id); });
}); } catch (err) {
} console.error(" Error parsing pull request data: " + err);
process.exitCode = ExitCodes.ParseFailure;
pulls.push(pr); }
});
} catch (err) {
console.error(" Error parsing pull request data: " + err);
process.exitCode = ExitCodes.ParseFailure;
} }
} }
function extractLinkedIssues(pullBody) { function mapNodes(object) {
const links = []; return object.edges.map((item) => item["node"])
if (!pullBody) {
return links;
}
const matches = [
...pullBody.matchAll(GH_MAGIC_RE),
...pullBody.matchAll(GH_MAGIC_FULL_RE)
];
matches.forEach((item) => {
let repository = item[2];
if (!repository) {
repository = "godotengine/godot";
}
const issue_number = item[3];
const issue_url = `https://github.com/${repository}/issues/${issue_number}`;
const exists = links.find((item) => {
return item.url === issue_url
});
if (exists) {
return;
}
let keyword = item[1].toLowerCase();
if (keyword.startsWith("clo")) {
keyword = "closes";
} else if (keyword.startsWith("fix")) {
keyword = "fixes";
} else if (keyword.startsWith("reso")) {
keyword = "resolves";
}
links.push({
"full_match": item[0],
"keyword": keyword,
"repo": repository,
"issue": issue_number,
"url": issue_url,
});
});
return links;
}
function checkForExit() {
if (process.exitCode > 0) {
process.exit();
}
}
async function delay(msec) {
return new Promise(resolve => setTimeout(resolve, msec));
} }
async function main() { async function main() {
// Internal utility methods.
const checkForExit = () => {
if (process.exitCode > 0) {
process.exit();
}
}
const delay = async (msec) => {
return new Promise(resolve => setTimeout(resolve, msec));
}
console.log("[*] Building local pull request database."); console.log("[*] Building local pull request database.");
let data_owner = "godotengine";
let data_repo = "godot";
process.argv.forEach((arg) => {
if (arg.indexOf("owner:") === 0) {
data_owner = arg.substring(6);
}
if (arg.indexOf("repo:") === 0) {
data_repo = arg.substring(5);
}
});
console.log(`[*] Configured for the "${data_owner}/${data_repo}" repository.`);
const dataFetcher = new DataFetcher(data_owner, data_repo);
const dataProcessor = new DataProcessor();
console.log("[*] Checking the rate limits before.") console.log("[*] Checking the rate limits before.")
await checkRates(); await dataFetcher.checkRates();
checkForExit(); checkForExit();
console.log("[*] Fetching pull request data from GitHub."); console.log("[*] Fetching pull request data from GitHub.");
// Pages are starting with 1 for better presentation. // Pages are starting with 1 for better presentation.
let page = 1; let page = 1;
while (page <= page_count) { while (page <= dataFetcher.page_count) {
const pullsRaw = await fetchPulls(page); const pullsRaw = await dataFetcher.fetchPulls(page);
processPulls(pullsRaw); dataProcessor.processPulls(pullsRaw);
checkForExit(); checkForExit();
page++; page++;
@@ -494,20 +517,22 @@ async function main() {
} }
console.log("[*] Checking the rate limits after.") console.log("[*] Checking the rate limits after.")
await checkRates(); await dataFetcher.checkRates();
checkForExit(); checkForExit();
console.log("[*] Finalizing database.") console.log("[*] Finalizing database.")
const output = { const output = {
"generated_at": Date.now(), "generated_at": Date.now(),
"teams": teams, "teams": dataProcessor.teams,
"reviewers": reviewers, "reviewers": dataProcessor.reviewers,
"authors": authors, "authors": dataProcessor.authors,
"pulls": pulls, "pulls": dataProcessor.pulls,
}; };
try { try {
console.log("[*] Storing database to file.") console.log("[*] Storing database to file.");
// NOTE: The repository owner and name are not respected here, the file will be overwritten.
await fs.writeFile("out/data.json", JSON.stringify(output), {encoding: "utf-8"}); await fs.writeFile("out/data.json", JSON.stringify(output), {encoding: "utf-8"});
console.log("[*] Database built.");
} catch (err) { } catch (err) {
console.error("Error saving database file: " + err); console.error("Error saving database file: " + err);
} }