mirror of
https://github.com/godotengine/godot-interactive-changelog.git
synced 2025-12-31 01:49:28 +03:00
882 lines
30 KiB
JavaScript
882 lines
30 KiB
JavaScript
const fs = require('fs').promises;
|
|
const fsConstants = require('fs').constants;
|
|
const nodeUtil = require('util');
|
|
const fetch = require('node-fetch');
|
|
const exec = nodeUtil.promisify(require('child_process').exec);
|
|
|
|
const ExitCodes = {
|
|
"RequestFailure": 1,
|
|
"ParseFailure": 2,
|
|
"ExecFailure": 3,
|
|
"IOFailure": 4,
|
|
};
|
|
|
|
const LogFormat = {
|
|
"Raw": 0,
|
|
"JSON": 1,
|
|
};
|
|
|
|
const COMMITS_PER_PAGE = 150;
|
|
const API_DELAY_MSEC = 2500;
|
|
const API_MAX_RETRIES = 5;
|
|
const API_RATE_LIMIT = `
|
|
rateLimit {
|
|
limit
|
|
cost
|
|
nodeCount
|
|
remaining
|
|
resetAt
|
|
}
|
|
`;
|
|
|
|
const EXEC_MAX_BUFFER = 1024 * 1024 * 32;
|
|
|
|
const GIT_HEAD_COMMIT_RE = RegExp("^commit ([a-zA-Z0-9-_]+)$");
|
|
const GIT_HEAD_MERGE_RE = RegExp("^Merge: (.+) (.+)$");
|
|
const GIT_HEAD_AUTHOR_RE = RegExp("^Author: (.+)$");
|
|
const GIT_HEAD_COMMITTER_RE = RegExp("^Commit: (.+)$");
|
|
const GIT_BODY_LINE_RE = RegExp("^[\\s]{2,}(.*)$");
|
|
const GIT_BODY_CHERRYPICK_RE = RegExp("^[\\s]{2,}\\(cherry picked from commit ([a-zA-Z0-9-_]+)\\)$");
|
|
const COMMIT_CHERRYPICK_RE = RegExp("^\\(cherry picked from commit ([a-zA-Z0-9-_]+)\\)$", "gm");
|
|
|
|
class DataFetcher {
|
|
constructor(data_owner, data_repo) {
|
|
this.data_owner = data_owner;
|
|
this.data_repo = data_repo;
|
|
|
|
this.repo_ssh_path = `git@github.com:${data_owner}/${data_repo}.git`;
|
|
this.api_rest_path = `https://api.github.com/repos/${data_owner}/${data_repo}`;
|
|
this.api_repository_id = `owner:"${data_owner}" name:"${data_repo}"`;
|
|
}
|
|
|
|
async _logResponse(data, name, format = LogFormat.JSON) {
|
|
try {
|
|
await ensureDir("./logs");
|
|
|
|
let filename = `./logs/${name}`;
|
|
let fileContent = "" + data;
|
|
|
|
if (format === LogFormat.JSON) {
|
|
filename = `./logs/${name}.json`;
|
|
fileContent = JSON.stringify(data, null, 4);
|
|
}
|
|
|
|
await fs.writeFile(filename, fileContent, {encoding: "utf-8"});
|
|
} catch (err) {
|
|
console.error(" Error saving log file: " + err);
|
|
}
|
|
}
|
|
|
|
_handleResponseErrors(queryID, res) {
|
|
console.warn(` Failed to get data from '${queryID}'; server responded with ${res.status} ${res.statusText}`);
|
|
const retry_header = res.headers.get("Retry-After");
|
|
if (retry_header) {
|
|
console.log(` Retry after: ${retry_header}`);
|
|
}
|
|
}
|
|
|
|
_handleDataErrors(data) {
|
|
if (typeof data["errors"] === "undefined") {
|
|
return;
|
|
}
|
|
|
|
console.warn(` Server handled the request, but there were errors:`);
|
|
data.errors.forEach((item) => {
|
|
console.log(` [${item.type}] ${item.message}`);
|
|
});
|
|
}
|
|
|
|
async delay(msec) {
|
|
return new Promise(resolve => setTimeout(resolve, msec));
|
|
}
|
|
|
|
async checkoutRepo(fromTag, atCommit) {
|
|
try {
|
|
// Make sure that the temp folder exists and is empty.
|
|
await ensureDir("./temp");
|
|
await clearDir("./temp");
|
|
|
|
// Checkout a shallow clone of the repository; we are only interested in its history.
|
|
await exec(`git clone --filter=tree:0 --branch ${fromTag} --single-branch ${this.repo_ssh_path}`, { cwd: "./temp", maxBuffer: EXEC_MAX_BUFFER });
|
|
if (fromTag !== atCommit) {
|
|
await exec(`git reset --hard ${atCommit}`, { cwd: `./temp/${this.data_repo}`, maxBuffer: EXEC_MAX_BUFFER });
|
|
}
|
|
} catch (err) {
|
|
console.error(" Error checking out a copy of the target repository: " + err);
|
|
process.exitCode = ExitCodes.ExecFailure;
|
|
return;
|
|
}
|
|
}
|
|
|
|
async countCommitHistory(fromCommit, toCommit) {
|
|
try {
|
|
const { stdout, stderr } = await exec(`git log --pretty=oneline ${fromCommit}..${toCommit}`, { cwd: `./temp/${this.data_repo}`, maxBuffer: EXEC_MAX_BUFFER });
|
|
|
|
const commitHistory = stdout.trimEnd();
|
|
await this._logResponse(commitHistory, "_commit_shortlog", LogFormat.Raw);
|
|
return commitHistory.split("\n").length;
|
|
} catch (err) {
|
|
console.error(" Error extracting the commit history: " + err);
|
|
process.exitCode = ExitCodes.ExecFailure;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
async getCommitHistory(fromCommit, toCommit) {
|
|
try {
|
|
const { stdout, stderr } = await exec(`git log --pretty=full ${fromCommit}..${toCommit}`, { cwd: `./temp/${this.data_repo}`, maxBuffer: EXEC_MAX_BUFFER });
|
|
|
|
const commitHistory = stdout;
|
|
await this._logResponse(commitHistory, "_commit_history", LogFormat.Raw);
|
|
return commitHistory;
|
|
} catch (err) {
|
|
console.error(" Error extracting the commit history: " + err);
|
|
process.exitCode = ExitCodes.ExecFailure;
|
|
return "";
|
|
}
|
|
}
|
|
|
|
async fetchGithub(query, retries = 0) {
|
|
const init = {};
|
|
init.method = "POST";
|
|
init.headers = {};
|
|
init.headers["Content-Type"] = "application/json";
|
|
if (process.env.GRAPHQL_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GRAPHQL_TOKEN}`;
|
|
} else if (process.env.GITHUB_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GITHUB_TOKEN}`;
|
|
}
|
|
|
|
init.body = JSON.stringify({
|
|
query,
|
|
});
|
|
|
|
let res = await fetch("https://api.github.com/graphql", init);
|
|
let attempt = 0;
|
|
while (res.status !== 200 && attempt < retries) {
|
|
attempt += 1;
|
|
console.log(` Failed with status ${res.status}, retrying (${attempt}/${retries})...`);
|
|
|
|
// GitHub API is flaky, so we add an extra delay to let it calm down a bit.
|
|
await this.delay(API_DELAY_MSEC);
|
|
res = await fetch("https://api.github.com/graphql", init);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
async fetchGithubRest(query) {
|
|
const init = {};
|
|
init.method = "GET";
|
|
init.headers = {};
|
|
init.headers["Content-Type"] = "application/json";
|
|
if (process.env.GRAPHQL_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GRAPHQL_TOKEN}`;
|
|
} else if (process.env.GITHUB_TOKEN) {
|
|
init.headers["Authorization"] = `token ${process.env.GITHUB_TOKEN}`;
|
|
}
|
|
|
|
return await fetch(`${this.api_rest_path}${query}`, init);
|
|
}
|
|
|
|
async checkRates() {
|
|
try {
|
|
const query = `
|
|
query {
|
|
${API_RATE_LIMIT}
|
|
}
|
|
`;
|
|
|
|
const res = await this.fetchGithub(query);
|
|
if (res.status !== 200) {
|
|
this._handleResponseErrors(this.api_repository_id, res);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return;
|
|
}
|
|
|
|
const data = await res.json();
|
|
await this._logResponse(data, "_rate_limit");
|
|
this._handleDataErrors(data);
|
|
|
|
const rate_limit = data.data["rateLimit"];
|
|
console.log(` [$${rate_limit.cost}][${rate_limit.nodeCount}] Available API calls: ${rate_limit.remaining}/${rate_limit.limit}; resets at ${rate_limit.resetAt}`);
|
|
} catch (err) {
|
|
console.error(" Error checking the API rate limits: " + err);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return;
|
|
}
|
|
}
|
|
|
|
_getCommitQuery(commitHash) {
|
|
return `
|
|
commit_${commitHash}: repository (${this.api_repository_id}) {
|
|
object (expression: "${commitHash}") {
|
|
... on Commit {
|
|
oid
|
|
commitUrl
|
|
|
|
messageHeadline
|
|
messageBody
|
|
|
|
authors(first: 12) {
|
|
edges {
|
|
node {
|
|
user {
|
|
login
|
|
avatarUrl
|
|
url
|
|
id
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
associatedPullRequests (first: 20) {
|
|
edges {
|
|
node {
|
|
id
|
|
number
|
|
url
|
|
title
|
|
state
|
|
isDraft
|
|
|
|
createdAt
|
|
updatedAt
|
|
|
|
baseRef {
|
|
name
|
|
repository {
|
|
nameWithOwner
|
|
}
|
|
}
|
|
|
|
author {
|
|
login
|
|
avatarUrl
|
|
url
|
|
|
|
... on User {
|
|
id
|
|
}
|
|
}
|
|
|
|
labels (first: 12) {
|
|
edges {
|
|
node {
|
|
id
|
|
name
|
|
color
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
`
|
|
}
|
|
|
|
async fetchCommits(commitHashes, page, totalPages) {
|
|
try {
|
|
const query = `
|
|
query {
|
|
${API_RATE_LIMIT}
|
|
|
|
${commitHashes.map((item) => {
|
|
return this._getCommitQuery(item) + "\n";
|
|
})}
|
|
}
|
|
`;
|
|
|
|
console.log(` Requesting batch ${page}/${totalPages} of commit and pull request data.`);
|
|
|
|
const res = await this.fetchGithub(query, API_MAX_RETRIES);
|
|
if (res.status !== 200) {
|
|
this._handleResponseErrors(this.api_repository_id, res);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return [];
|
|
}
|
|
|
|
const data = await res.json();
|
|
await this._logResponse(data, `data_commits`);
|
|
this._handleDataErrors(data);
|
|
|
|
let commit_data = {};
|
|
for (let dataKey in data.data) {
|
|
if (!dataKey.startsWith("commit_")) {
|
|
continue;
|
|
}
|
|
commit_data[dataKey.substring(7)] = data.data[dataKey].object;
|
|
}
|
|
|
|
const rate_limit = data.data["rateLimit"];
|
|
console.log(` [$${rate_limit.cost}][${rate_limit.nodeCount}] Retrieved ${Object.keys(commit_data).length} commits.`);
|
|
console.log(` --`);
|
|
return commit_data;
|
|
} catch (err) {
|
|
console.error(" Error fetching pull request data: " + err);
|
|
process.exitCode = ExitCodes.RequestFailure;
|
|
return [];
|
|
}
|
|
}
|
|
}
|
|
|
|
class DataProcessor {
|
|
constructor() {
|
|
this.log = [];
|
|
|
|
this.authors = {};
|
|
this.commits = {};
|
|
this.pulls = {};
|
|
}
|
|
|
|
_getCommitObject() {
|
|
return {
|
|
"hash": "",
|
|
"is_merge": false,
|
|
|
|
"authored_by": [],
|
|
"author_raw": "",
|
|
"committer_raw": "",
|
|
|
|
"summary": "",
|
|
"body": "",
|
|
|
|
"is_cherrypick": false,
|
|
"cherrypick_hash": "",
|
|
|
|
"pull": "",
|
|
};
|
|
}
|
|
|
|
_processAuthor(authorItem) {
|
|
const author = {
|
|
"id": "",
|
|
"user": "ghost",
|
|
"avatar": "https://avatars.githubusercontent.com/u/10137?v=4",
|
|
"url": "https://github.com/ghost",
|
|
|
|
"pull_count": 0,
|
|
"commit_count": 0,
|
|
};
|
|
|
|
if (authorItem != null) {
|
|
author["id"] = authorItem.id;
|
|
author["user"] = authorItem.login;
|
|
author["avatar"] = authorItem.avatarUrl;
|
|
author["url"] = authorItem.url;
|
|
}
|
|
|
|
// Store the author if they haven't been stored.
|
|
if (typeof this.authors[author.id] === "undefined") {
|
|
this.authors[author.id] = author;
|
|
}
|
|
|
|
return author.id;
|
|
}
|
|
|
|
_processCherrypick(commit) {
|
|
if (!commit.is_cherrypick) {
|
|
return;
|
|
}
|
|
|
|
const originalCommit = this._getCommitObject();
|
|
originalCommit.hash = commit.cherrypick_hash;
|
|
originalCommit.author_raw = commit.author_raw;
|
|
originalCommit.committer_raw = commit.author_raw;
|
|
|
|
originalCommit.summary = commit.summary;
|
|
originalCommit.body = commit.body.replace(COMMIT_CHERRYPICK_RE, "").trim();
|
|
|
|
this.commits[originalCommit.hash] = originalCommit;
|
|
}
|
|
|
|
_finishCommit(commit) {
|
|
commit.body = commit.body.trim();
|
|
|
|
this.log.push(commit.hash);
|
|
this.commits[commit.hash] = commit;
|
|
|
|
if (commit.is_cherrypick) {
|
|
this._processCherrypick(commit);
|
|
}
|
|
}
|
|
|
|
processLog(logRaw, logSize) {
|
|
// Parse the log, given in its "full" format. Records are presented in
|
|
// the chronological order, line by line, with each record spanning across
|
|
// several lines.
|
|
// The general format for each record is as follows:
|
|
//
|
|
// commit COMMIT_HASH
|
|
// Author: AUTHOR_NAME <AUTHOR_EMAIL>
|
|
// Commit: COMMITTER_NAME <COMMITTER_EMAIL>
|
|
//
|
|
// MESSAGE_HEADER
|
|
//
|
|
// MESSAGE_BODY_MULTILINE
|
|
//
|
|
// The last line of the body can also be as follows, for cherry-picked commits:
|
|
//
|
|
// (cherry picked from commit ORIGINAL_COMMIT_HASH)
|
|
//
|
|
|
|
// The most straightforward way to parse this format is to go line by line and check
|
|
// if we reach one of the metadata lines.
|
|
let logLines = logRaw.split("\n");
|
|
let commit = null;
|
|
|
|
while (logLines.length > 0) {
|
|
const line = logLines.shift();
|
|
|
|
// Check if the file starts with the first commit record.
|
|
if (commit == null && !GIT_HEAD_COMMIT_RE.test(line)) {
|
|
console.error(" Error parsing commit log: Invalid format.");
|
|
process.exitCode = ExitCodes.ParseFailure;
|
|
break;
|
|
}
|
|
|
|
// Start parsing a new commit; store the existing one if applicable.
|
|
let matches = line.match(GIT_HEAD_COMMIT_RE);
|
|
if (matches) {
|
|
if (commit != null) {
|
|
this._finishCommit(commit);
|
|
}
|
|
|
|
commit = this._getCommitObject();
|
|
commit.hash = matches[1];
|
|
continue;
|
|
}
|
|
|
|
// Check if this is a merge commit.
|
|
matches = line.match(GIT_HEAD_MERGE_RE);
|
|
if (matches) {
|
|
commit.is_merge = true;
|
|
continue;
|
|
}
|
|
|
|
// Parse the authorship information.
|
|
matches = line.match(GIT_HEAD_AUTHOR_RE);
|
|
if (matches) {
|
|
commit.author_raw = matches[1];
|
|
continue;
|
|
}
|
|
matches = line.match(GIT_HEAD_COMMITTER_RE);
|
|
if (matches) {
|
|
commit.committer_raw = matches[1];
|
|
continue;
|
|
}
|
|
|
|
// By this point we should have the entire header, or we're broken.
|
|
if (commit.hash === "" || commit.author_raw === "" || commit.committer_raw === "") {
|
|
console.error(" Error parsing commit log: Invalid format.");
|
|
process.exitCode = ExitCodes.ParseFailure;
|
|
break;
|
|
}
|
|
|
|
// Start parsing the body.
|
|
matches = line.match(GIT_BODY_LINE_RE);
|
|
|
|
// Look for the first line of the commit message, it's our summary.
|
|
if (commit.summary === "") {
|
|
if (!matches) {
|
|
continue;
|
|
}
|
|
|
|
commit.summary = matches[1];
|
|
continue;
|
|
}
|
|
|
|
// Treat as an empty line.
|
|
if (!matches) {
|
|
commit.body += "\n";
|
|
continue;
|
|
}
|
|
// Use the catch group to strip leading spaces.
|
|
commit.body += `${matches[1]}\n`;
|
|
|
|
// Check if this is a cherry-pick.
|
|
matches = line.match(GIT_BODY_CHERRYPICK_RE);
|
|
if (matches) {
|
|
commit.is_cherrypick = true;
|
|
commit.cherrypick_hash = matches[1];
|
|
}
|
|
}
|
|
|
|
// Store the last commit.
|
|
if (commit != null) {
|
|
this._finishCommit(commit);
|
|
}
|
|
|
|
if (this.log.length !== logSize) {
|
|
console.error(` Error parsing commit log: Expected to received ${logSize} commits, but got ${this.log.length} instead.`);
|
|
process.exitCode = ExitCodes.ParseFailure;
|
|
}
|
|
}
|
|
|
|
processCommits(commitsRaw, targetRepo) {
|
|
try {
|
|
for (let commitHash in commitsRaw) {
|
|
if (commitsRaw[commitHash] == null) {
|
|
console.warn(` Requested data for a commit hash "${commitHash}", but received nothing.`);
|
|
continue;
|
|
}
|
|
if (typeof this.commits[commitHash] === "undefined") {
|
|
console.warn(` Received data for a commit hash "${commitHash}", but this commit is unknown.`);
|
|
continue;
|
|
}
|
|
const item = commitsRaw[commitHash];
|
|
const commit = this.commits[commitHash];
|
|
|
|
// Commits can have multiple authors, we will list all of them. Also, associated PRs
|
|
// can be authored by somebody else entirely. We will store them with the PR, and will
|
|
// display them as well on the frontend.
|
|
|
|
const commitAuthors = mapNodes(item.authors);
|
|
commitAuthors.forEach((authorItem) => {
|
|
const authorId = this._processAuthor(authorItem.user);
|
|
commit.authored_by.push(authorId);
|
|
this.authors[authorId].commit_count++;
|
|
});
|
|
|
|
// Commits can have multiple PRs associated with them, so we need to be on the lookout
|
|
// for rogue entries. Normally, it will always be one pull per commit (except for direct
|
|
// commits, which will have none), but GitHub may sometimes link commits to PRs in other
|
|
// repos/otherwise unrelated. So some form of filtering is required.
|
|
|
|
const pullsRaw = mapNodes(item.associatedPullRequests)
|
|
.filter((pullData) => {
|
|
return pullData.baseRef && pullData.baseRef.repository.nameWithOwner === targetRepo;
|
|
});
|
|
if (pullsRaw.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
let pullItem = pullsRaw[0];
|
|
commit.pull = pullItem.number;
|
|
|
|
// Another commit is already linked to this PR.
|
|
if (typeof this.pulls[pullItem.number] !== "undefined") {
|
|
this.pulls[pullItem.number].commits.push(commitHash);
|
|
continue;
|
|
}
|
|
|
|
// Compile basic information about a PR.
|
|
let pr = {
|
|
"id": pullItem.id,
|
|
"public_id": pullItem.number,
|
|
"url": pullItem.url,
|
|
"diff_url": `${pullItem.url}.diff`,
|
|
"patch_url": `${pullItem.url}.patch`,
|
|
|
|
"title": pullItem.title,
|
|
"state": pullItem.state,
|
|
"is_draft": pullItem.isDraft,
|
|
"authored_by": null,
|
|
"created_at": pullItem.createdAt,
|
|
"updated_at": pullItem.updatedAt,
|
|
|
|
"target_branch": pullItem.baseRef.name,
|
|
"labels": [],
|
|
|
|
"commits": [ commitHash ],
|
|
};
|
|
this.pulls[pullItem.number] = pr;
|
|
|
|
// Compose and link author information.
|
|
const authorId = this._processAuthor(pullItem.author);
|
|
pr.authored_by = authorId;
|
|
this.authors[authorId].pull_count++;
|
|
|
|
// Add labels, if available.
|
|
let labels = mapNodes(pullItem.labels);
|
|
labels.forEach((labelItem) => {
|
|
pr.labels.push({
|
|
"id": labelItem.id,
|
|
"name": labelItem.name,
|
|
"color": "#" + labelItem.color
|
|
});
|
|
});
|
|
pr.labels.sort((a, b) => {
|
|
if (a.name > b.name) return 1;
|
|
if (a.name < b.name) return -1;
|
|
return 0;
|
|
});
|
|
}
|
|
} catch (err) {
|
|
console.error(" Error parsing commit and pull request data: " + err);
|
|
process.exitCode = ExitCodes.ParseFailure;
|
|
}
|
|
}
|
|
|
|
getCommitHashes() {
|
|
const commitHashes = [];
|
|
|
|
for (let commitHash in this.commits) {
|
|
const commit = this.commits[commitHash];
|
|
if (commit.is_merge) {
|
|
continue;
|
|
}
|
|
|
|
commitHashes.push(commitHash);
|
|
}
|
|
|
|
return commitHashes;
|
|
}
|
|
}
|
|
|
|
class DataIO {
|
|
constructor() {
|
|
// Configurable parameters.
|
|
this.data_owner = "godotengine";
|
|
this.data_repo = "godot";
|
|
this.data_version = "";
|
|
this.skip_checkout = false;
|
|
|
|
//
|
|
this.config = null;
|
|
this.git_tag = "";
|
|
this.first_commit = ""
|
|
this.last_commit = "";
|
|
}
|
|
|
|
parseArgs() {
|
|
process.argv.forEach((arg) => {
|
|
if (arg.indexOf("owner:") === 0) {
|
|
this.data_owner = arg.substring(6);
|
|
}
|
|
if (arg.indexOf("repo:") === 0) {
|
|
this.data_repo = arg.substring(5);
|
|
}
|
|
if (arg.indexOf("version:") === 0) {
|
|
this.data_version = arg.substring(8);
|
|
}
|
|
|
|
if (arg === "skip-checkout") {
|
|
this.skip_checkout = true;
|
|
}
|
|
});
|
|
|
|
if (this.data_owner === "" || this.data_repo === "" || this.data_version === "") {
|
|
console.error(" Error reading command-line arguments: owner, repo, and version cannot be empty.");
|
|
process.exitCode = ExitCodes.IOFailure;
|
|
return;
|
|
}
|
|
}
|
|
|
|
async loadConfig() {
|
|
try {
|
|
console.log("[*] Loading version configuration from a file.");
|
|
|
|
const configPath = `./configs/${this.data_owner}.${this.data_repo}.${this.data_version}.json`;
|
|
await fs.access(configPath, fsConstants.R_OK);
|
|
const configContent = await fs.readFile(configPath);
|
|
|
|
this.config = JSON.parse(configContent);
|
|
this.git_tag = this.config.git_tag || this.config.ref;
|
|
this.first_commit = this.config.from_ref;
|
|
this.last_commit = this.config.ref;
|
|
} catch (err) {
|
|
console.error(" Error loading version config file: " + err);
|
|
process.exitCode = ExitCodes.IOFailure;
|
|
return;
|
|
}
|
|
}
|
|
|
|
async saveData(output, fileName) {
|
|
try {
|
|
console.log("[*] Storing database to a file.");
|
|
|
|
await ensureDir("./data");
|
|
await fs.writeFile(`./data/${fileName}`, JSON.stringify(output), {encoding: "utf-8"});
|
|
} catch (err) {
|
|
console.error(" Error saving database file: " + err);
|
|
process.exitCode = ExitCodes.IOFailure;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
function mapNodes(object) {
|
|
return object.edges.map((item) => item["node"])
|
|
}
|
|
|
|
async function ensureDir(dirPath) {
|
|
try {
|
|
await fs.access(dirPath, fsConstants.R_OK | fsConstants.W_OK);
|
|
} catch (err) {
|
|
await fs.mkdir(dirPath);
|
|
}
|
|
}
|
|
|
|
async function clearDir(rootPath) {
|
|
try {
|
|
const pathStat = await fs.stat(rootPath);
|
|
if (!pathStat.isDirectory()) {
|
|
return;
|
|
}
|
|
|
|
const removeDir = async (dirPath) => {
|
|
const dirFiles = await fs.readdir(dirPath);
|
|
for (let entryName of dirFiles) {
|
|
if (entryName === "." || entryName === "..") {
|
|
continue;
|
|
}
|
|
|
|
const entryPath = `${dirPath}/${entryName}`;
|
|
const entryStat = await fs.stat(entryPath);
|
|
if (entryStat.isDirectory()) {
|
|
await removeDir(entryPath);
|
|
await fs.rmdir(entryPath);
|
|
}
|
|
else if (entryStat.isFile()) {
|
|
await fs.unlink(entryPath);
|
|
}
|
|
}
|
|
};
|
|
|
|
await removeDir(rootPath);
|
|
} catch (err) {
|
|
console.error(` Error clearing a folder at ${rootPath}: ` + err);
|
|
process.exitCode = ExitCodes.IOFailure;
|
|
return;
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
// Internal utility methods.
|
|
const checkForExit = () => {
|
|
if (process.exitCode > 0) {
|
|
console.log(` Terminating with an exit code ${process.exitCode}.`);
|
|
process.exit();
|
|
}
|
|
};
|
|
|
|
// Getting PRs between two commits is a complicated task, and must be done in
|
|
// multiple steps. GitHub API does not have a method for that, so we must improvise.
|
|
// We also need to consider that there is no easy way to fetch information for
|
|
// an arbitrary list of commits; the API can work on ranges, but not on lists.
|
|
//
|
|
// We do not need to run this operation constantly. Release versions don't change.
|
|
// (Though some metadata of PRs can change, so re-indexing should be possible, on
|
|
// demand.)
|
|
// We also have to preconfigure some information, e.g. manually supply the tags
|
|
// or hashes, which serve as release boundaries.
|
|
|
|
console.log("[*] Building local commit and pull request database.");
|
|
|
|
const dataIO = new DataIO();
|
|
dataIO.parseArgs();
|
|
checkForExit();
|
|
|
|
await dataIO.loadConfig();
|
|
checkForExit();
|
|
|
|
console.log(`[*] Configured for the "${dataIO.data_owner}/${dataIO.data_repo}" repository; version ${dataIO.data_version}.`);
|
|
|
|
const dataFetcher = new DataFetcher(dataIO.data_owner, dataIO.data_repo);
|
|
const dataProcessor = new DataProcessor();
|
|
|
|
console.log("[*] Checking the rate limits before.");
|
|
await dataFetcher.checkRates();
|
|
checkForExit();
|
|
|
|
// First, we checkout the repository for the specified branch/tag/hash. We will
|
|
// use it to retrieve a clean commit log, ignoring merge commits. This step creates
|
|
// as shallow copy, as we are only interested in the history of the branch.
|
|
// Still, it extracts all of the current files, so it may take a bit of time.
|
|
|
|
if (!dataIO.skip_checkout) {
|
|
console.log(`[*] Checking out the repository at "${dataIO.last_commit}".`);
|
|
await dataFetcher.checkoutRepo(dataIO.git_tag, dataIO.last_commit);
|
|
checkForExit();
|
|
}
|
|
|
|
console.log(`[*] Extracting the commit log between "${dataIO.first_commit}" and "${dataIO.last_commit}".`);
|
|
const commitLogSize = await dataFetcher.countCommitHistory(dataIO.first_commit, dataIO.last_commit);
|
|
const commitLog = await dataFetcher.getCommitHistory(dataIO.first_commit, dataIO.last_commit);
|
|
checkForExit();
|
|
|
|
// Second, we parse the extracted commit log, to generate a list of commit hashes
|
|
// for the next step. We also try to extract the information about this being a
|
|
// cherry-pick, and not the original commit. We can rely on the commit message body
|
|
// containing a certain string, from which we can take the original commit hash.
|
|
|
|
dataProcessor.processLog(commitLog, commitLogSize);
|
|
checkForExit();
|
|
|
|
// This method returns only non-merge commits; we don't need to fetch anything about
|
|
// merge commits. We only need them for commit history.
|
|
const commitHashes = dataProcessor.getCommitHashes();
|
|
|
|
// Third, we generate a query to the GraphQL API to fetch the information about
|
|
// linked PRs. GraphQL API doesn't have a filter to extract data for a list of
|
|
// commit hashes, but it supports having multiple sub-queries within the same request,
|
|
// which is our way in.
|
|
//
|
|
// While paginated queries are limited to 100 entries per page, sub-queries do not
|
|
// appear to be similarly limited. We are still limited by the total number of nodes
|
|
// we can theoretically fetch, which is 500 000. As such, we still want to do this
|
|
// in batches, so the number of nodes in each request is manageable.
|
|
|
|
console.log("[*] Fetching commit data from GitHub.");
|
|
let commitsRaw = {};
|
|
|
|
const totalPages = Math.ceil(commitHashes.length / COMMITS_PER_PAGE);
|
|
// Pages are starting with 1 for better presentation.
|
|
let page = 1;
|
|
while (page <= totalPages) {
|
|
const batchHashes = commitHashes.splice(0, COMMITS_PER_PAGE);
|
|
const batchCommits = await dataFetcher.fetchCommits(batchHashes, page, totalPages);
|
|
checkForExit();
|
|
|
|
Object.assign(commitsRaw, batchCommits);
|
|
page++;
|
|
|
|
// Wait for a bit before proceeding to avoid hitting the secondary rate limit in GitHub API.
|
|
// See https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits.
|
|
await dataFetcher.delay(API_DELAY_MSEC);
|
|
|
|
// Add an extra delay every few requests, because the chance to trigger the hidden rate issue
|
|
// seems to grow with the number of queries.
|
|
if (page % 8 === 0) {
|
|
console.log("[*] Waiting a bit for the API to cool down...");
|
|
await dataFetcher.delay(API_DELAY_MSEC * 4);
|
|
}
|
|
}
|
|
|
|
// Fourth, we consolidate the information. Commits are populated with links to their
|
|
// respective PRs, and PRs store references to their commits. We will save this to
|
|
// a file for the specified range, which should be between two stable releases.
|
|
//
|
|
// For intermediate releases (developer previews) we have preconfigured hashes and
|
|
// can simply pass them to the final data. Frontend will handle the rest.
|
|
|
|
console.log(`[*] Processing ${Object.keys(commitsRaw).length} commits.`);
|
|
dataProcessor.processCommits(commitsRaw, `${dataIO.data_owner}/${dataIO.data_repo}`);
|
|
checkForExit();
|
|
|
|
console.log("[*] Checking the rate limits after.")
|
|
await dataFetcher.checkRates();
|
|
checkForExit();
|
|
|
|
console.log("[*] Finalizing database.")
|
|
const output = {
|
|
"generated_at": Date.now(),
|
|
"log": dataProcessor.log,
|
|
"authors": dataProcessor.authors,
|
|
"commits": dataProcessor.commits,
|
|
"pulls": dataProcessor.pulls,
|
|
};
|
|
|
|
await dataIO.saveData(output, `${dataIO.data_owner}.${dataIO.data_repo}.${dataIO.data_version}.json`);
|
|
checkForExit();
|
|
|
|
console.log("[*] Database built.");
|
|
}
|
|
|
|
main();
|