mirror of
https://github.com/godotengine/godot-prs-by-file.git
synced 2025-12-31 21:48:29 +03:00
540 lines
18 KiB
JavaScript
540 lines
18 KiB
JavaScript
const fs = require('fs').promises;
|
|
const fsConstants = require('fs').constants;
|
|
const fetch = require('node-fetch');
|
|
|
|
const ExitCodes = {
|
|
"RequestFailure": 1,
|
|
"ParseFailure": 2,
|
|
};
|
|
|
|
const PULLS_PER_PAGE = 100;
|
|
const API_DELAY_MSEC = 2500;
|
|
const API_MAX_RETRIES = 10;
|
|
const API_RATE_LIMIT = `
|
|
rateLimit {
|
|
limit
|
|
cost
|
|
remaining
|
|
resetAt
|
|
}
|
|
`;
|
|
|
|
class DataFetcher {
|
|
constructor(data_owner, data_repo) {
|
|
this.api_rest_path = `https://api.github.com/repos/${data_owner}/${data_repo}`;
|
|
this.api_repository_id = `owner:"${data_owner}" name:"${data_repo}"`;
|
|
|
|
this.page_count = 1;
|
|
this.last_cursor = "";
|
|
}
|
|
|
|
async _logResponse(data, name) {
|
|
try {
|
|
try {
|
|
await fs.access("logs", fsConstants.R_OK | fsConstants.W_OK);
|
|
} catch (err) {
|
|
await fs.mkdir("logs");
|
|
}
|
|
|
|
await fs.writeFile(`logs/${name}.json`, JSON.stringify(data, null, 4), {encoding: "utf-8"});
|
|
} catch (err) {
|
|
console.error("Error saving log file: " + err);
|
|
}
|
|
}
|
|
|
|
_handleResponseErrors(queryID, res) {
|
|
console.warn(` Failed to get data from '${queryID}'; server responded with ${res.status} ${res.statusText}`);
|
|
const retry_header = res.headers.get("Retry-After");
|
|
if (retry_header) {
|
|
console.log(` Retry after: ${retry_header}`);
|
|
}
|
|
}
|
|
|
|
_handleDataErrors(data) {
|
|
if (typeof data["errors"] === "undefined") {
|
|
return;
|
|
}
|
|
|
|
console.warn(` Server handled the request, but there were errors:`);
|
|
data.errors.forEach((item) => {
|
|
console.log(` [${item.type}] ${item.message}`);
|
|
});
|
|
}
|
|
|
|
async delay(msec) {
|
|
return new Promise(resolve => setTimeout(resolve, msec));
|
|
}
|
|
|
|
async fetchGithub(query, retries = 0) {
|
|
const init = {};
|
|
init.method = "POST";
|
|
init.headers = {};
|
|
init.headers["Content-Type"] = "application/json";
|
|
if (process.env.GRAPHQL_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GRAPHQL_TOKEN}`;
|
|
} else if (process.env.GITHUB_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GITHUB_TOKEN}`;
|
|
} else {
|
|
console.error(" Unable to find environment variable: `GRAPHQL_TOKEN`. Did you forget to set it in your local environment or a root `.env` file?");
|
|
process.exitCode = buildCommon.ExitCodes.ParseFailure;
|
|
return [null, null];
|
|
}
|
|
|
|
init.body = JSON.stringify({
|
|
query,
|
|
});
|
|
|
|
let res = await fetch("https://api.github.com/graphql", init);
|
|
let attempt = 0;
|
|
|
|
while (true) {
|
|
if (attempt > retries) {
|
|
return [res, null];
|
|
}
|
|
|
|
if (res.status === 200) {
|
|
try {
|
|
const json = await res.json()
|
|
return [res, json];
|
|
}
|
|
catch (err) {
|
|
console.log(` Failed due to invalid response body, retrying (${attempt}/${retries})...`);
|
|
}
|
|
}
|
|
else {
|
|
console.log(` Failed with status ${res.status}, retrying (${attempt}/${retries})...`);
|
|
}
|
|
|
|
// GitHub API is flaky, so we add an extra delay to let it calm down a bit.
|
|
await this.delay(API_DELAY_MSEC);
|
|
attempt += 1;
|
|
res = await fetch("https://api.github.com/graphql", init);
|
|
}
|
|
}
|
|
|
|
async fetchGithubRest(query) {
|
|
const init = {};
|
|
init.method = "GET";
|
|
init.headers = {};
|
|
init.headers["Content-Type"] = "application/json";
|
|
if (process.env.GRAPHQL_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GRAPHQL_TOKEN}`;
|
|
} else if (process.env.GITHUB_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GITHUB_TOKEN}`;
|
|
} else {
|
|
console.error(" Unable to find environment variable: `GRAPHQL_TOKEN`. Did you forget to set it in your local environment or a root `.env` file?");
|
|
process.exitCode = buildCommon.ExitCodes.ParseFailure;
|
|
return null;
|
|
}
|
|
|
|
return await fetch(`${this.api_rest_path}${query}`, init);
|
|
}
|
|
|
|
async checkRates() {
|
|
try {
|
|
const query = `
|
|
query {
|
|
${API_RATE_LIMIT}
|
|
}
|
|
`;
|
|
|
|
const [res, data] = await this.fetchGithub(query);
|
|
if (res.status !== 200 || data == null) {
|
|
this._handleResponseErrors(this.api_repository_id, res);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return;
|
|
}
|
|
|
|
await this._logResponse(data, "_rate_limit");
|
|
this._handleDataErrors(data);
|
|
|
|
const rate_limit = data.data["rateLimit"];
|
|
console.log(` [$${rate_limit.cost}] Available API calls: ${rate_limit.remaining}/${rate_limit.limit}; resets at ${rate_limit.resetAt}`);
|
|
} catch (err) {
|
|
console.error(" Error checking the API rate limits: " + err);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return;
|
|
}
|
|
}
|
|
|
|
async fetchPulls(page) {
|
|
try {
|
|
let after_cursor = "";
|
|
let after_text = "initial";
|
|
if (this.last_cursor !== "") {
|
|
after_cursor = `after: "${this.last_cursor}"`;
|
|
after_text = after_cursor;
|
|
}
|
|
|
|
const query = `
|
|
query {
|
|
${API_RATE_LIMIT}
|
|
repository(${this.api_repository_id}) {
|
|
pullRequests(first:${PULLS_PER_PAGE} ${after_cursor} states: OPEN) {
|
|
totalCount
|
|
pageInfo {
|
|
endCursor
|
|
hasNextPage
|
|
}
|
|
edges {
|
|
node {
|
|
id
|
|
number
|
|
url
|
|
title
|
|
state
|
|
isDraft
|
|
|
|
createdAt
|
|
updatedAt
|
|
|
|
baseRef {
|
|
name
|
|
}
|
|
|
|
author {
|
|
login
|
|
avatarUrl
|
|
url
|
|
|
|
... on User {
|
|
id
|
|
}
|
|
}
|
|
|
|
milestone {
|
|
id
|
|
title
|
|
url
|
|
}
|
|
|
|
files (first: 100) {
|
|
edges {
|
|
node {
|
|
path
|
|
changeType
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
`;
|
|
|
|
let page_text = page;
|
|
if (this.page_count > 1) {
|
|
page_text = `${page}/${this.page_count}`;
|
|
}
|
|
console.log(` Requesting page ${page_text} of pull request data (${after_text}).`);
|
|
|
|
const [res, data] = await this.fetchGithub(query, API_MAX_RETRIES);
|
|
if (res.status !== 200 || data == null) {
|
|
this._handleResponseErrors(this.api_repository_id, res);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return [];
|
|
}
|
|
|
|
await this._logResponse(data, `data_page_${page}`);
|
|
this._handleDataErrors(data);
|
|
|
|
const rate_limit = data.data["rateLimit"];
|
|
const repository = data.data["repository"];
|
|
const pulls_data = mapNodes(repository.pullRequests);
|
|
|
|
console.log(` [$${rate_limit.cost}] Retrieved ${pulls_data.length} pull requests; processing...`);
|
|
|
|
this.last_cursor = repository.pullRequests.pageInfo.endCursor;
|
|
this.page_count = Math.ceil(repository.pullRequests.totalCount / PULLS_PER_PAGE);
|
|
|
|
return pulls_data;
|
|
} catch (err) {
|
|
console.error(" Error fetching pull request data: " + err);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async fetchFiles(branch) {
|
|
try {
|
|
const query = `/git/trees/${branch}?recursive=1`;
|
|
|
|
const res = await this.fetchGithubRest(query);
|
|
if (res.status !== 200) {
|
|
this._handleResponseErrors(query, res);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return [];
|
|
}
|
|
|
|
const data = await res.json();
|
|
await this._logResponse(data, `data_files_${branch}`);
|
|
this._handleDataErrors(data);
|
|
|
|
const files_data = data.tree;
|
|
|
|
console.log(` [$0] Retrieved ${files_data.length} file system entries in '${branch}'; processing...`);
|
|
|
|
return files_data;
|
|
} catch (err) {
|
|
console.error(" Error fetching pull request data: " + err);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return [];
|
|
}
|
|
}
|
|
}
|
|
|
|
class DataProcessor {
|
|
constructor() {
|
|
this.authors = {};
|
|
this.pulls = [];
|
|
this.branches = [];
|
|
this.files = {};
|
|
|
|
this._pullsByFile = {};
|
|
}
|
|
|
|
_explainFileType(type) {
|
|
switch(type) {
|
|
case "blob":
|
|
return "file";
|
|
case "tree":
|
|
return "folder";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
processPulls(pullsRaw) {
|
|
try {
|
|
pullsRaw.forEach((item) => {
|
|
// Compile basic information about a PR.
|
|
let pr = {
|
|
"id": item.id,
|
|
"public_id": item.number,
|
|
"url": item.url,
|
|
"diff_url": `${item.url}.diff`,
|
|
"patch_url": `${item.url}.patch`,
|
|
|
|
"title": item.title,
|
|
"state": item.state,
|
|
"is_draft": item.isDraft,
|
|
"authored_by": null,
|
|
"created_at": item.createdAt,
|
|
"updated_at": item.updatedAt,
|
|
|
|
"target_branch": item.baseRef.name,
|
|
"milestone": null,
|
|
|
|
"files": [],
|
|
};
|
|
|
|
// Store the target branch if it hasn't been stored.
|
|
if (!this.branches.includes(pr.target_branch)) {
|
|
this.branches.push(pr.target_branch);
|
|
}
|
|
|
|
// Compose and link author information.
|
|
const author = {
|
|
"id": "",
|
|
"user": "ghost",
|
|
"avatar": "https://avatars.githubusercontent.com/u/10137?v=4",
|
|
"url": "https://github.com/ghost",
|
|
"pull_count": 0,
|
|
};
|
|
if (item.author != null) {
|
|
author["id"] = item.author.id;
|
|
author["user"] = item.author.login;
|
|
author["avatar"] = item.author.avatarUrl;
|
|
author["url"] = item.author.url;
|
|
}
|
|
pr.authored_by = author.id;
|
|
|
|
// Store the author if they haven't been stored.
|
|
if (typeof this.authors[author.id] === "undefined") {
|
|
this.authors[author.id] = author;
|
|
}
|
|
this.authors[author.id].pull_count++;
|
|
|
|
// Add the milestone, if available.
|
|
if (item.milestone) {
|
|
pr.milestone = {
|
|
"id": item.milestone.id,
|
|
"title": item.milestone.title,
|
|
"url": item.milestone.url,
|
|
};
|
|
}
|
|
|
|
// Add changed files.
|
|
let files = mapNodes(item.files);
|
|
const visitedPaths = [];
|
|
|
|
if (typeof this._pullsByFile[pr.target_branch] === "undefined") {
|
|
this._pullsByFile[pr.target_branch] = {};
|
|
}
|
|
|
|
files.forEach((fileItem) => {
|
|
let currentPath = fileItem.path;
|
|
while (currentPath !== "") {
|
|
if (visitedPaths.includes(currentPath)) {
|
|
// Go one level up.
|
|
const pathBits = currentPath.split("/");
|
|
pathBits.pop();
|
|
currentPath = pathBits.join("/");
|
|
|
|
continue;
|
|
}
|
|
visitedPaths.push(currentPath);
|
|
|
|
pr.files.push({
|
|
"path": currentPath,
|
|
"changeType": (currentPath === fileItem.path ? fileItem.changeType : ""),
|
|
"type": (currentPath === fileItem.path ? "file" : "folder"),
|
|
});
|
|
|
|
// Cache the pull information for every file and folder that it includes.
|
|
if (typeof this._pullsByFile[pr.target_branch][currentPath] === "undefined") {
|
|
this._pullsByFile[pr.target_branch][currentPath] = [];
|
|
}
|
|
this._pullsByFile[pr.target_branch][currentPath].push(pr.public_id);
|
|
|
|
// Go one level up.
|
|
const pathBits = currentPath.split("/");
|
|
pathBits.pop();
|
|
currentPath = pathBits.join("/");
|
|
}
|
|
});
|
|
pr.files.sort((a, b) => {
|
|
if (a.name > b.name) return 1;
|
|
if (a.name < b.name) return -1;
|
|
return 0;
|
|
});
|
|
|
|
this.pulls.push(pr);
|
|
});
|
|
} catch (err) {
|
|
console.error(" Error parsing pull request data: " + err);
|
|
process.exitCode = ExitCodes.ParseFailure;
|
|
}
|
|
}
|
|
|
|
processFiles(targetBranch, filesRaw) {
|
|
try {
|
|
this.files[targetBranch] = [];
|
|
|
|
filesRaw.forEach((item) => {
|
|
let file = {
|
|
"type": this._explainFileType(item.type),
|
|
"name": item.path.split("/").pop(),
|
|
"path": item.path,
|
|
"parent": "",
|
|
"pulls": [],
|
|
};
|
|
|
|
// Store the parent path for future reference.
|
|
let parentPath = item.path.split("/");
|
|
parentPath.pop();
|
|
if (parentPath.length > 0) {
|
|
file.parent = parentPath.join("/");
|
|
}
|
|
|
|
// Fetch the PRs touching this file or folder from the cache.
|
|
if (typeof this._pullsByFile[targetBranch] !== "undefined"
|
|
&& typeof this._pullsByFile[targetBranch][file.path] !== "undefined") {
|
|
|
|
this._pullsByFile[targetBranch][file.path].forEach((pullNumber) => {
|
|
if (!file.pulls.includes(pullNumber)) {
|
|
file.pulls.push(pullNumber);
|
|
}
|
|
});
|
|
}
|
|
|
|
this.files[targetBranch].push(file);
|
|
});
|
|
} catch (err) {
|
|
console.error(" Error parsing repository file system: " + err);
|
|
process.exitCode = ExitCodes.ParseFailure;
|
|
}
|
|
}
|
|
}
|
|
|
|
function mapNodes(object) {
|
|
return object.edges.map((item) => item["node"])
|
|
}
|
|
|
|
async function main() {
|
|
// Internal utility methods.
|
|
const checkForExit = () => {
|
|
if (process.exitCode > 0) {
|
|
process.exit();
|
|
}
|
|
}
|
|
const delay = async (msec) => {
|
|
return new Promise(resolve => setTimeout(resolve, msec));
|
|
}
|
|
|
|
console.log("[*] Building local pull request database.");
|
|
|
|
let data_owner = "godotengine";
|
|
let data_repo = "godot";
|
|
process.argv.forEach((arg) => {
|
|
if (arg.indexOf("owner:") === 0) {
|
|
data_owner = arg.substring(6);
|
|
}
|
|
if (arg.indexOf("repo:") === 0) {
|
|
data_repo = arg.substring(5);
|
|
}
|
|
});
|
|
|
|
console.log(`[*] Configured for the "${data_owner}/${data_repo}" repository.`);
|
|
const dataFetcher = new DataFetcher(data_owner, data_repo);
|
|
const dataProcessor = new DataProcessor();
|
|
|
|
console.log("[*] Checking the rate limits before.");
|
|
await dataFetcher.checkRates();
|
|
checkForExit();
|
|
|
|
console.log("[*] Fetching pull request data from GitHub.");
|
|
// Pages are starting with 1 for better presentation.
|
|
let page = 1;
|
|
while (page <= dataFetcher.page_count) {
|
|
const pullsRaw = await dataFetcher.fetchPulls(page);
|
|
dataProcessor.processPulls(pullsRaw);
|
|
checkForExit();
|
|
page++;
|
|
|
|
// Wait for a bit before proceeding to avoid hitting the secondary rate limit in GitHub API.
|
|
// See https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits.
|
|
await delay(1500);
|
|
}
|
|
|
|
console.log("[*] Fetching repository file system from GitHub.");
|
|
for (let branch of dataProcessor.branches) {
|
|
const filesRaw = await dataFetcher.fetchFiles(branch);
|
|
dataProcessor.processFiles(branch, filesRaw);
|
|
checkForExit();
|
|
}
|
|
|
|
console.log("[*] Checking the rate limits after.")
|
|
await dataFetcher.checkRates();
|
|
checkForExit();
|
|
|
|
console.log("[*] Finalizing database.")
|
|
const output = {
|
|
"generated_at": Date.now(),
|
|
"authors": dataProcessor.authors,
|
|
"pulls": dataProcessor.pulls,
|
|
"branches": dataProcessor.branches,
|
|
"files": dataProcessor.files,
|
|
};
|
|
try {
|
|
console.log("[*] Storing database to file.");
|
|
await fs.writeFile(`out/${data_owner}.${data_repo}.data.json`, JSON.stringify(output), {encoding: "utf-8"});
|
|
console.log("[*] Database built.");
|
|
} catch (err) {
|
|
console.error("Error saving database file: " + err);
|
|
}
|
|
}
|
|
|
|
main();
|