mirror of
https://github.com/godotengine/godot-interactive-changelog.git
synced 2025-12-31 01:49:28 +03:00
Store individual commit logs for each release within version
This commit is contained in:
291
compose-db.js
291
compose-db.js
@@ -117,6 +117,10 @@ class DataFetcher {
|
||||
|
||||
const commitHistory = stdout.trimEnd();
|
||||
await this._logResponse(commitHistory, "_commit_shortlog", LogFormat.Raw);
|
||||
|
||||
if (commitHistory === "") {
|
||||
return 0;
|
||||
}
|
||||
return commitHistory.split("\n").length;
|
||||
} catch (err) {
|
||||
console.error(" Error extracting the commit history: " + err);
|
||||
@@ -142,6 +146,27 @@ class DataFetcher {
|
||||
}
|
||||
}
|
||||
|
||||
async getCommitsBetween(fromCommit, toCommit, repoFolder = "") {
|
||||
try {
|
||||
if (repoFolder === "") {
|
||||
repoFolder = `./temp/${this.data_repo}`;
|
||||
}
|
||||
const { stdout, stderr } = await exec(`git log --pretty=format:"%H" ${fromCommit}..${toCommit}`, { cwd: repoFolder, maxBuffer: EXEC_MAX_BUFFER });
|
||||
|
||||
const commitHashes = stdout;
|
||||
await this._logResponse(commitHashes, "_commit_hashes", LogFormat.Raw);
|
||||
|
||||
if (commitHashes === "") {
|
||||
return [];
|
||||
}
|
||||
return commitHashes.split("\n");
|
||||
} catch (err) {
|
||||
console.error(" Error extracting the commit history: " + err);
|
||||
process.exitCode = ExitCodes.ExecFailure;
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async fetchGithub(query, retries = 0) {
|
||||
const init = {};
|
||||
init.method = "POST";
|
||||
@@ -333,15 +358,44 @@ class DataFetcher {
|
||||
class DataProcessor {
|
||||
constructor() {
|
||||
this.log = [];
|
||||
this.releaseLogs = {};
|
||||
|
||||
this.authors = {};
|
||||
this.commits = {};
|
||||
this.pulls = {};
|
||||
|
||||
this.oldData = {};
|
||||
}
|
||||
|
||||
_getCommitObject() {
|
||||
takeData(dataObject) {
|
||||
this.oldData = {
|
||||
"log": dataObject.log || [],
|
||||
"releaseLogs": dataObject.release_logs || {},
|
||||
|
||||
"commits": dataObject.commits || {},
|
||||
"authors": dataObject.authors || {},
|
||||
"pulls": dataObject.pulls || {},
|
||||
};
|
||||
}
|
||||
|
||||
consumeOldLog() {
|
||||
this.log = this.oldData.log;
|
||||
this.releaseLogs = this.oldData.releaseLogs;
|
||||
this.commits = this.oldData.commits;
|
||||
}
|
||||
|
||||
consumeOldCommits() {
|
||||
this.authors = this.oldData.authors;
|
||||
this.pulls = this.oldData.pulls;
|
||||
}
|
||||
|
||||
_getCommitObject(commitHash) {
|
||||
if (typeof this.oldData.commits[commitHash] !== "undefined") {
|
||||
return this.oldData.commits[commitHash];
|
||||
}
|
||||
|
||||
return {
|
||||
"hash": "",
|
||||
"hash": commitHash,
|
||||
"is_merge": false,
|
||||
|
||||
"authored_by": [],
|
||||
@@ -389,8 +443,7 @@ class DataProcessor {
|
||||
return;
|
||||
}
|
||||
|
||||
const originalCommit = this._getCommitObject();
|
||||
originalCommit.hash = commit.cherrypick_hash;
|
||||
const originalCommit = this._getCommitObject(commit.cherrypick_hash);
|
||||
originalCommit.author_raw = commit.author_raw;
|
||||
originalCommit.committer_raw = commit.author_raw;
|
||||
|
||||
@@ -412,6 +465,9 @@ class DataProcessor {
|
||||
}
|
||||
|
||||
processLog(logRaw, logSize) {
|
||||
this.log = [];
|
||||
this.releaseLogs = {};
|
||||
|
||||
// Parse the log, given in its "full" format. Records are presented in
|
||||
// the chronological order, line by line, with each record spanning across
|
||||
// several lines.
|
||||
@@ -452,8 +508,10 @@ class DataProcessor {
|
||||
this._finishCommit(commit);
|
||||
}
|
||||
|
||||
commit = this._getCommitObject();
|
||||
commit.hash = matches[1];
|
||||
commit = this._getCommitObject(matches[1]);
|
||||
// These fields may come from the old data, we will override them.
|
||||
commit.summary = "";
|
||||
commit.body = "";
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -524,6 +582,9 @@ class DataProcessor {
|
||||
}
|
||||
|
||||
processCommits(commitsRaw, targetRepo) {
|
||||
this.authors = {};
|
||||
this.pulls = {};
|
||||
|
||||
try {
|
||||
for (let commitHash in commitsRaw) {
|
||||
if (commitsRaw[commitHash] == null) {
|
||||
@@ -541,6 +602,7 @@ class DataProcessor {
|
||||
// can be authored by somebody else entirely. We will store them with the PR, and will
|
||||
// display them as well on the frontend.
|
||||
|
||||
commit.authored_by = [];
|
||||
const commitAuthors = mapNodes(item.authors);
|
||||
commitAuthors.forEach((authorItem) => {
|
||||
const authorId = this._processAuthor(authorItem.user);
|
||||
@@ -618,6 +680,10 @@ class DataProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
_processReleaseLog(releaseName, commitHashes) {
|
||||
this.releaseLogs[releaseName] = commitHashes;
|
||||
}
|
||||
|
||||
getCommitHashes() {
|
||||
const commitHashes = [];
|
||||
|
||||
@@ -641,14 +707,21 @@ class DataIO {
|
||||
this.data_repo = "godot";
|
||||
this.data_version = "";
|
||||
|
||||
this.skip_checkout = false;
|
||||
this.checkout_dir = "";
|
||||
|
||||
//
|
||||
// Execution flags.
|
||||
this.update_data = false
|
||||
this.skip_checkout = false;
|
||||
this.skip_gitlog = false;
|
||||
this.skip_github = false;
|
||||
|
||||
// Loaded configuration.
|
||||
this.config = null;
|
||||
|
||||
this.git_tag = "";
|
||||
this.first_commit = ""
|
||||
this.last_commit = "";
|
||||
this.releases = [];
|
||||
}
|
||||
|
||||
parseArgs() {
|
||||
@@ -663,11 +736,21 @@ class DataIO {
|
||||
this.data_version = arg.substring(8);
|
||||
}
|
||||
|
||||
if (arg.indexOf("dir:") === 0) {
|
||||
this.checkout_dir = arg.substring(4);
|
||||
}
|
||||
|
||||
if (arg === "update-data") {
|
||||
this.update_data = true;
|
||||
}
|
||||
if (arg === "skip-checkout") {
|
||||
this.skip_checkout = true;
|
||||
}
|
||||
if (arg.indexOf("dir:") === 0) {
|
||||
this.checkout_dir = arg.substring(4);
|
||||
if (arg === "skip-gitlog") {
|
||||
this.skip_gitlog = true;
|
||||
}
|
||||
if (arg === "skip-github") {
|
||||
this.skip_github = true;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -687,9 +770,11 @@ class DataIO {
|
||||
const configContent = await fs.readFile(configPath);
|
||||
|
||||
this.config = JSON.parse(configContent);
|
||||
|
||||
this.git_tag = this.config.git_tag || this.config.ref;
|
||||
this.first_commit = this.config.from_ref;
|
||||
this.last_commit = this.config.ref;
|
||||
this.first_commit = this.config.from_ref || "";
|
||||
this.last_commit = this.config.ref || "";
|
||||
this.releases = this.config.releases || [];
|
||||
} catch (err) {
|
||||
console.error(" Error loading version config file: " + err);
|
||||
process.exitCode = ExitCodes.IOFailure;
|
||||
@@ -697,14 +782,31 @@ class DataIO {
|
||||
}
|
||||
}
|
||||
|
||||
async saveData(output, fileName) {
|
||||
async loadData(fileName) {
|
||||
try {
|
||||
console.log("[*] Storing database to a file.");
|
||||
console.log("[*] Loading version database from a file.");
|
||||
|
||||
await ensureDir("./data");
|
||||
await fs.writeFile(`./data/${fileName}`, JSON.stringify(output), {encoding: "utf-8"});
|
||||
const databasePath = `./data/${fileName}`;
|
||||
await fs.access(databasePath, fsConstants.R_OK);
|
||||
const dataContent = await fs.readFile(databasePath);
|
||||
|
||||
return JSON.parse(dataContent);
|
||||
} catch (err) {
|
||||
console.error(" Error saving database file: " + err);
|
||||
console.error(" Error loading version database file: " + err);
|
||||
process.exitCode = ExitCodes.IOFailure;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async saveData(fileName, dataObject) {
|
||||
try {
|
||||
console.log("[*] Storing version database to a file.");
|
||||
|
||||
await ensureDir("./data");
|
||||
await fs.writeFile(`./data/${fileName}`, JSON.stringify(dataObject), {encoding: "utf-8"});
|
||||
} catch (err) {
|
||||
console.error(" Error saving version database file: " + err);
|
||||
process.exitCode = ExitCodes.IOFailure;
|
||||
return;
|
||||
}
|
||||
@@ -786,21 +888,31 @@ async function main() {
|
||||
await dataIO.loadConfig();
|
||||
checkForExit();
|
||||
|
||||
const databaseName = `${dataIO.data_owner}.${dataIO.data_repo}.${dataIO.data_version}.json`;
|
||||
|
||||
console.log(`[*] Configured for the "${dataIO.data_owner}/${dataIO.data_repo}" repository; version ${dataIO.data_version}.`);
|
||||
|
||||
const dataFetcher = new DataFetcher(dataIO.data_owner, dataIO.data_repo);
|
||||
const dataProcessor = new DataProcessor();
|
||||
|
||||
if (dataIO.update_data) {
|
||||
console.log(`[*] Loading existing data to perform an update.`);
|
||||
const oldData = await dataIO.loadData(databaseName);
|
||||
dataProcessor.takeData(oldData);
|
||||
}
|
||||
|
||||
console.log("[*] Checking the rate limits before.");
|
||||
await dataFetcher.checkRates();
|
||||
checkForExit();
|
||||
|
||||
// First, we checkout the repository for the specified branch/tag/hash. We will
|
||||
// use it to retrieve a clean commit log, ignoring merge commits. This step creates
|
||||
// as shallow copy, as we are only interested in the history of the branch.
|
||||
// use it to retrieve a clean commit log. This step creates a shallow copy of the
|
||||
// repository, as we are only interested in the history of the branch.
|
||||
// Still, it extracts all of the current files, so it may take a bit of time.
|
||||
|
||||
if (!dataIO.skip_checkout) {
|
||||
if (dataIO.skip_checkout) {
|
||||
console.log(`[*] Skipping the repository checkout.`);
|
||||
} else {
|
||||
console.log(`[*] Checking out the repository at "${dataIO.last_commit}".`);
|
||||
await dataFetcher.checkoutRepo(dataIO.git_tag, dataIO.last_commit);
|
||||
checkForExit();
|
||||
@@ -810,69 +922,97 @@ async function main() {
|
||||
console.log(`[*] Using the local clone at "${dataIO.checkout_dir}".`);
|
||||
}
|
||||
|
||||
console.log(`[*] Extracting the commit log between "${dataIO.first_commit}" and "${dataIO.last_commit}".`);
|
||||
const commitLogSize = await dataFetcher.countCommitHistory(dataIO.first_commit, dataIO.last_commit, dataIO.checkout_dir);
|
||||
const commitLog = await dataFetcher.getCommitHistory(dataIO.first_commit, dataIO.last_commit, dataIO.checkout_dir);
|
||||
checkForExit();
|
||||
|
||||
// Second, we parse the extracted commit log, to generate a list of commit hashes
|
||||
// for the next step. We also try to extract the information about this being a
|
||||
// cherry-pick, and not the original commit. We can rely on the commit message body
|
||||
// containing a certain string, from which we can take the original commit hash.
|
||||
|
||||
dataProcessor.processLog(commitLog, commitLogSize);
|
||||
checkForExit();
|
||||
|
||||
// This method returns only non-merge commits; we don't need to fetch anything about
|
||||
// merge commits. We only need them for commit history.
|
||||
const commitHashes = dataProcessor.getCommitHashes();
|
||||
|
||||
// Third, we generate a query to the GraphQL API to fetch the information about
|
||||
// linked PRs. GraphQL API doesn't have a filter to extract data for a list of
|
||||
// commit hashes, but it supports having multiple sub-queries within the same request,
|
||||
// which is our way in.
|
||||
//
|
||||
// While paginated queries are limited to 100 entries per page, sub-queries do not
|
||||
// appear to be similarly limited. We are still limited by the total number of nodes
|
||||
// we can theoretically fetch, which is 500 000. As such, we still want to do this
|
||||
// in batches, so the number of nodes in each request is manageable.
|
||||
|
||||
console.log("[*] Fetching commit data from GitHub.");
|
||||
let commitsRaw = {};
|
||||
|
||||
const totalPages = Math.ceil(commitHashes.length / COMMITS_PER_PAGE);
|
||||
// Pages are starting with 1 for better presentation.
|
||||
let page = 1;
|
||||
while (page <= totalPages) {
|
||||
const batchHashes = commitHashes.splice(0, COMMITS_PER_PAGE);
|
||||
const batchCommits = await dataFetcher.fetchCommits(batchHashes, page, totalPages);
|
||||
if (dataIO.skip_gitlog) {
|
||||
console.log(`[*] Skipping the commit log extraction.`);
|
||||
dataProcessor.consumeOldLog();
|
||||
} else {
|
||||
console.log(`[*] Extracting the commit log between "${dataIO.first_commit}" and "${dataIO.last_commit}".`);
|
||||
const commitLogSize = await dataFetcher.countCommitHistory(dataIO.first_commit, dataIO.last_commit, dataIO.checkout_dir);
|
||||
const commitLog = await dataFetcher.getCommitHistory(dataIO.first_commit, dataIO.last_commit, dataIO.checkout_dir);
|
||||
checkForExit();
|
||||
|
||||
Object.assign(commitsRaw, batchCommits);
|
||||
page++;
|
||||
// Second, we parse the extracted commit log, to generate a list of commit hashes
|
||||
// for the next step. We also try to extract the information about this being a
|
||||
// cherry-pick, and not the original commit. We can rely on the commit message body
|
||||
// containing a certain string, from which we can take the original commit hash.
|
||||
|
||||
// Wait for a bit before proceeding to avoid hitting the secondary rate limit in GitHub API.
|
||||
// See https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits.
|
||||
await dataFetcher.delay(API_DELAY_MSEC);
|
||||
dataProcessor.processLog(commitLog, commitLogSize);
|
||||
checkForExit();
|
||||
|
||||
// Add an extra delay every few requests, because the chance to trigger the hidden rate issue
|
||||
// seems to grow with the number of queries.
|
||||
if (page % 8 === 0) {
|
||||
console.log("[*] Waiting a bit for the API to cool down...");
|
||||
await dataFetcher.delay(API_DELAY_MSEC * 4);
|
||||
// We also need to keep track of the commit history of each release within a version.
|
||||
// Releases can, and most often do, include commits outside of the defined range. This
|
||||
// happens when a contribution is authored before the defined range, but merged within
|
||||
// it.
|
||||
|
||||
console.log(`[*] Extracting commit logs for releases.`);
|
||||
for (let i = 0; i < dataIO.releases.length; i++) {
|
||||
const release = dataIO.releases[i];
|
||||
|
||||
console.log(` Extracting the commit log for "${release.name}" (between "${release.from_ref}" and "${release.ref}").`);
|
||||
const releaseLog = await dataFetcher.getCommitsBetween(release.from_ref, release.ref, dataIO.checkout_dir);
|
||||
checkForExit();
|
||||
|
||||
console.log(` Processing the commit log for "${release.name}".`);
|
||||
dataProcessor._processReleaseLog(release.name, releaseLog);
|
||||
checkForExit();
|
||||
}
|
||||
}
|
||||
|
||||
// Fourth, we consolidate the information. Commits are populated with links to their
|
||||
// respective PRs, and PRs store references to their commits. We will save this to
|
||||
// a file for the specified range, which should be between two stable releases.
|
||||
//
|
||||
// For intermediate releases (developer previews) we have preconfigured hashes and
|
||||
// can simply pass them to the final data. Frontend will handle the rest.
|
||||
// This method returns only non-merge commits; we don't need to fetch anything about
|
||||
// merge commits. We only need them for a complete commit history.
|
||||
const commitHashes = dataProcessor.getCommitHashes();
|
||||
|
||||
console.log(`[*] Processing ${Object.keys(commitsRaw).length} commits.`);
|
||||
dataProcessor.processCommits(commitsRaw, `${dataIO.data_owner}/${dataIO.data_repo}`);
|
||||
checkForExit();
|
||||
if (dataIO.skip_github) {
|
||||
console.log(`[*] Skipping the commit data fetching from GitHub.`);
|
||||
dataProcessor.consumeOldCommits();
|
||||
} else {
|
||||
// Third, we generate a query to the GraphQL API to fetch the information about
|
||||
// linked PRs. GraphQL API doesn't have a filter to extract data for a list of
|
||||
// commit hashes, but it supports having multiple sub-queries within the same request,
|
||||
// which is our way in.
|
||||
//
|
||||
// While paginated queries are limited to 100 entries per page, sub-queries do not
|
||||
// appear to be similarly limited. We are still limited by the total number of nodes
|
||||
// we can theoretically fetch, which is 500 000. As such, we still want to do this
|
||||
// in batches, so the number of nodes in each request is manageable.
|
||||
|
||||
console.log("[*] Fetching commit data from GitHub.");
|
||||
let commitsRaw = {};
|
||||
|
||||
const totalPages = Math.ceil(commitHashes.length / COMMITS_PER_PAGE);
|
||||
// Pages are starting with 1 for better presentation.
|
||||
let page = 1;
|
||||
while (page <= totalPages) {
|
||||
const batchHashes = commitHashes.splice(0, COMMITS_PER_PAGE);
|
||||
const batchCommits = await dataFetcher.fetchCommits(batchHashes, page, totalPages);
|
||||
checkForExit();
|
||||
|
||||
Object.assign(commitsRaw, batchCommits);
|
||||
page++;
|
||||
|
||||
// Wait for a bit before proceeding to avoid hitting the secondary rate limit in GitHub API.
|
||||
// See https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits.
|
||||
await dataFetcher.delay(API_DELAY_MSEC);
|
||||
|
||||
// Add an extra delay every few requests, because the chance to trigger the hidden rate issue
|
||||
// seems to grow with the number of queries.
|
||||
if (page % 8 === 0) {
|
||||
console.log("[*] Waiting a bit for the API to cool down...");
|
||||
await dataFetcher.delay(API_DELAY_MSEC * 4);
|
||||
}
|
||||
}
|
||||
|
||||
// Fourth, we consolidate the information. Commits are populated with links to their
|
||||
// respective PRs, and PRs store references to their commits. We will save this to
|
||||
// a file for the specified range, which should be between two stable releases.
|
||||
//
|
||||
// For intermediate releases (developer previews) we have preconfigured hashes and
|
||||
// can simply pass them to the final data. Frontend will handle the rest.
|
||||
|
||||
console.log(`[*] Processing ${Object.keys(commitsRaw).length} commits.`);
|
||||
dataProcessor.processCommits(commitsRaw, `${dataIO.data_owner}/${dataIO.data_repo}`);
|
||||
checkForExit();
|
||||
}
|
||||
|
||||
console.log("[*] Checking the rate limits after.")
|
||||
await dataFetcher.checkRates();
|
||||
@@ -882,12 +1022,13 @@ async function main() {
|
||||
const output = {
|
||||
"generated_at": Date.now(),
|
||||
"log": dataProcessor.log,
|
||||
"release_logs": dataProcessor.releaseLogs,
|
||||
"authors": dataProcessor.authors,
|
||||
"commits": dataProcessor.commits,
|
||||
"pulls": dataProcessor.pulls,
|
||||
};
|
||||
|
||||
await dataIO.saveData(output, `${dataIO.data_owner}.${dataIO.data_repo}.${dataIO.data_version}.json`);
|
||||
await dataIO.saveData(databaseName, output);
|
||||
checkForExit();
|
||||
|
||||
console.log("[*] Database built.");
|
||||
|
||||
Reference in New Issue
Block a user