code scripts to find renamed files in the docs and create redirects

This commit is contained in:
Nathan Lovato
2021-11-25 08:54:47 -06:00
parent 87c8f73767
commit 4d1cf0019c
3 changed files with 209 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
"""Uses git to list files that were renamed between two revisions and converts
that to a CSV table.
Use it to prepare and double-check data for create_redirects.py.
"""
import subprocess
import argparse
import csv
import sys
try:
subprocess.check_output(["git", "--version"])
except subprocess.CalledProcessError:
print("Git not found. It's required to run this program.")
def parse_command_line_args():
parser = argparse.ArgumentParser(
description="Uses git to list files that were renamed between two revisions and "
"converts that to a CSV table. Use it to prepare and double-check data for create_redirects.py."
)
parser.add_argument(
"revision1",
type=str,
help="Start revision to get renamed files from.",
)
parser.add_argument(
"revision2",
type=str,
help="End revision to get renamed files from.",
)
parser.add_argument("-f", "--output-file", type=str, help="Path to the output file")
return parser.parse_args()
def main():
args = parse_command_line_args()
assert args.revision1 != args.revision2, "Revisions must be different."
for revision in [args.revision1, args.revision2]:
assert not "/" in revision, "Revisions must be local branches only."
# Ensure that both revisions are present in the local repository.
for revision in [args.revision1, args.revision2]:
try:
subprocess.check_output(
["git", "rev-list", f"HEAD..{revision}"], stderr=subprocess.STDOUT
)
except subprocess.CalledProcessError:
print(
f"Revision {revision} not found in this repository. "
"Please make sure that both revisions exist locally in your git repository."
)
exit(1)
# Get the list of renamed files between the two revisions.
renamed_files = (
subprocess.check_output(
[
"git",
"diff",
"--name-status",
"--diff-filter=R",
args.revision1,
args.revision2,
]
)
.decode("utf-8")
.split("\n")
)
renamed_documents = [f for f in renamed_files if f.endswith(".rst")]
csv_data: list[dict] = []
branch = args.revision2
for document in renamed_documents:
_, source, destination = document.split("\t")
csv_data.append(
{"source": source, "destination": destination, "branch": branch}
)
if args.output_file:
with open(args.output_file, "w") as f:
writer = csv.DictWriter(f, fieldnames=csv_data[0].keys()).writerows(
csv_data
)
writer.writeheader()
writer.writerows(csv_data)
else:
writer = csv.DictWriter(sys.stdout, fieldnames=csv_data[0].keys())
writer.writeheader()
writer.writerows(csv_data)
if __name__ == "__main__":
main()

112
_tools/create_redirects.py Normal file
View File

@@ -0,0 +1,112 @@
"""Create page redirects for a specific branch of the docs.
Loads data from a CSV file with three columns: source, destination, branch
Where the source and destination are paths to RST files in the repository.
Pre-requisites:
- You need the dotenv Python module installed. We use this to let you store your
API auth token privately.
You can install it by running: pip3 install -r requirements.txt
How to use:
- Generate a CSV file from two git revisions using convert_git_renames_to_csv.py
- Store your API token in a .env variable in this directory like so:
RTD_API_TOKEN=your_token_here
- Run this script, passing it the path to your generated CSV file as an
argument.
The script directly creates redirects using the CSV data. It does not check if a
redirect already exist or if it's correct.
"""
import argparse
import csv
import json
import os
import dotenv
from requests.models import default_hooks
try:
import requests
except ImportError:
print(
"Required third-party module `requests` not found. "
"Please install it with `pip install requests` (or `pip3 install requests` on Linux)."
)
dotenv.load_dotenv()
RTD_AUTH_TOKEN: str = os.environ.get("RTD_AUTH_TOKEN", "")
if RTD_AUTH_TOKEN == "":
print("Missing auth token in .env file or .env file not found. Aborting.")
exit(1)
REDIRECT_URL = "https://readthedocs.org/api/v3/projects/pip/redirects/"
REQUEST_HEADERS = {"Authorization": f"token {RTD_AUTH_TOKEN}"}
def parse_command_line_args():
parser = argparse.ArgumentParser(
description="Create page redirects for a specific branch of the docs."
)
parser.add_argument(
"csv_file",
type=str,
help="Path to a CSV file with three columns: source, destination, branch.",
)
# add dry-run argument
parser.add_argument(
"-d",
"--dry-run",
action="store_true",
help="Run the program and output information without side effects.",
)
return parser.parse_args()
def make_redirect(source, destination, branch, args):
# Currently, the program only works for the EN version of the docs
trimmed_source = source.replace(".rst", "")
trimmed_destination = destination.replace(".rst", "")
source_slug = f"/en/{branch}/{trimmed_source}"
destination_slug = f"/en/{branch}/{trimmed_destination}"
json_data = {"from_url": source_slug, "to_url": destination_slug, "type": "page"}
if args.dry_run:
print(f"{source_slug} -> {destination_slug}")
else:
response = requests.post(
REDIRECT_URL,
json=json.dumps(json_data),
headers=REQUEST_HEADERS,
)
if response.status_code == 201:
print(f"Created redirect {source_slug} -> {destination_slug}")
else:
print(
f"Failed to create redirect {source_slug} -> {destination_slug}. "
f"Status code: {response.status_code}"
)
def main():
args = parse_command_line_args()
redirect_data = []
with open(args.csv_file, "r") as f:
redirect_data = list(csv.DictReader(f))
assert redirect_data[0].keys() == {
"source",
"destination",
"branch",
}, "CSV file must have those three columns: source, destination, branch."
for row in redirect_data:
make_redirect(row["source"], row["destination"], row["branch"], args)
if __name__ == "__main__":
main()

2
_tools/requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
python-dotenv==0.18.0
requests==2.20.0