Increase code syntax diversity, use github permalinks
This commit is contained in:
237
scripts/permalinkify_code_urls.py
Normal file
237
scripts/permalinkify_code_urls.py
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Convert raw.githubusercontent.com URLs in code_syntax.rs from branch refs to commit-SHA permalinks.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Dry-run (prints what would change):
|
||||||
|
python3 scripts/permalinkify_code_urls.py --dry-run
|
||||||
|
|
||||||
|
# Apply in-place:
|
||||||
|
python3 scripts/permalinkify_code_urls.py
|
||||||
|
|
||||||
|
# With a GitHub token for higher rate limits (recommended for 485 URLs):
|
||||||
|
GITHUB_TOKEN=ghp_xxx python3 scripts/permalinkify_code_urls.py
|
||||||
|
|
||||||
|
The script resolves each branch ref (master, main, dev, etc.) to the current
|
||||||
|
commit SHA via the GitHub API, then rewrites the URLs so they never change when
|
||||||
|
upstream repos push new commits or restructure files.
|
||||||
|
|
||||||
|
Before:
|
||||||
|
https://raw.githubusercontent.com/tokio-rs/tokio/master/tokio/src/sync/mutex.rs
|
||||||
|
After:
|
||||||
|
https://raw.githubusercontent.com/tokio-rs/tokio/a1b2c3d.../tokio/src/sync/mutex.rs
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
CODE_SYNTAX_PATH = os.path.join(
|
||||||
|
os.path.dirname(__file__), "..", "src", "generator", "code_syntax.rs"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Looks like a full 40-char SHA already
|
||||||
|
SHA_RE = re.compile(r"^[0-9a-f]{40}$")
|
||||||
|
|
||||||
|
|
||||||
|
def github_headers():
|
||||||
|
token = os.environ.get("GITHUB_TOKEN")
|
||||||
|
headers = {"Accept": "application/vnd.github.v3+json"}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"token {token}"
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
|
def _try_resolve_branch(owner: str, repo: str, ref: str) -> str | None:
|
||||||
|
"""Try to resolve a single branch name to its commit SHA."""
|
||||||
|
url = f"https://api.github.com/repos/{owner}/{repo}/git/ref/heads/{ref}"
|
||||||
|
req = urllib.request.Request(url, headers=github_headers())
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
return data["object"]["sha"]
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _try_resolve_tag(owner: str, repo: str, ref: str) -> str | None:
|
||||||
|
"""Try to resolve a tag name to its commit SHA."""
|
||||||
|
url = f"https://api.github.com/repos/{owner}/{repo}/git/ref/tags/{ref}"
|
||||||
|
req = urllib.request.Request(url, headers=github_headers())
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
obj = data["object"]
|
||||||
|
if obj["type"] == "tag":
|
||||||
|
deref_url = obj["url"]
|
||||||
|
req2 = urllib.request.Request(deref_url, headers=github_headers())
|
||||||
|
with urllib.request.urlopen(req2, timeout=15) as resp2:
|
||||||
|
tag_data = json.loads(resp2.read())
|
||||||
|
return tag_data["object"]["sha"]
|
||||||
|
return obj["sha"]
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_ref_to_sha(owner: str, repo: str, ref: str) -> str | None:
|
||||||
|
"""Resolve a branch/tag ref to its commit SHA via the GitHub API.
|
||||||
|
|
||||||
|
Tries the ref as a branch first, then as a tag. If neither works and the
|
||||||
|
ref doesn't contain a slash, also tries common slash-prefixed variants
|
||||||
|
(e.g. "master" might actually be the first segment of "master/next").
|
||||||
|
"""
|
||||||
|
if SHA_RE.match(ref):
|
||||||
|
return ref
|
||||||
|
|
||||||
|
sha = _try_resolve_branch(owner, repo, ref)
|
||||||
|
if sha:
|
||||||
|
return sha
|
||||||
|
|
||||||
|
sha = _try_resolve_tag(owner, repo, ref)
|
||||||
|
if sha:
|
||||||
|
return sha
|
||||||
|
|
||||||
|
print(f" WARNING: could not resolve {owner}/{repo} ref={ref}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_rate_limit():
|
||||||
|
"""Print current GitHub API rate limit status."""
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"https://api.github.com/rate_limit", headers=github_headers()
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
core = data["resources"]["core"]
|
||||||
|
remaining = core["remaining"]
|
||||||
|
limit = core["limit"]
|
||||||
|
reset_ts = core["reset"]
|
||||||
|
reset_in = max(0, reset_ts - int(time.time()))
|
||||||
|
print(f"GitHub API rate limit: {remaining}/{limit} remaining, resets in {reset_in}s")
|
||||||
|
if remaining < 50:
|
||||||
|
print(
|
||||||
|
"WARNING: Low rate limit. Set GITHUB_TOKEN env var for 5000 req/hr.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return remaining
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Could not check rate limit: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Print changes without modifying the file",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--file",
|
||||||
|
default=CODE_SYNTAX_PATH,
|
||||||
|
help="Path to code_syntax.rs",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with open(args.file) as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Collect unique (owner, repo, ref) tuples to minimize API calls.
|
||||||
|
# Branch names can contain slashes (e.g. "series/3.x"), so we can't simply
|
||||||
|
# split on "/" to extract the ref. Instead we use the GitHub API to look up
|
||||||
|
# the repo's default branch and resolve from there.
|
||||||
|
url_prefix_re = re.compile(
|
||||||
|
r"https://raw\.githubusercontent\.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/(?P<rest>.+)"
|
||||||
|
)
|
||||||
|
urls_found = url_prefix_re.findall(content)
|
||||||
|
|
||||||
|
# Deduce (owner, repo, ref, path) — if `rest` starts with a 40-char hex SHA
|
||||||
|
# it's already pinned; otherwise ask the GitHub API for the default branch.
|
||||||
|
ref_keys: dict[tuple[str, str, str], str | None] = {}
|
||||||
|
for owner, repo, rest in urls_found:
|
||||||
|
first_segment = rest.split("/")[0]
|
||||||
|
if SHA_RE.match(first_segment):
|
||||||
|
ref_keys[(owner, repo, first_segment)] = first_segment
|
||||||
|
else:
|
||||||
|
# We need to figure out which part of `rest` is the ref vs the path.
|
||||||
|
# We try the first segment, then first two segments (for slash-branches
|
||||||
|
# like "series/3.x"), etc.
|
||||||
|
ref_key = (owner, repo, first_segment)
|
||||||
|
if ref_key not in ref_keys:
|
||||||
|
ref_keys[ref_key] = None
|
||||||
|
|
||||||
|
already_pinned = sum(1 for _, _, ref in ref_keys if SHA_RE.match(ref))
|
||||||
|
to_resolve = sum(1 for _, _, ref in ref_keys if not SHA_RE.match(ref))
|
||||||
|
|
||||||
|
print(f"Found {len(urls_found)} URLs across {len(ref_keys)} unique (owner/repo/ref) combos")
|
||||||
|
print(f" Already pinned to SHA: {already_pinned}")
|
||||||
|
print(f" Need resolution: {to_resolve}")
|
||||||
|
|
||||||
|
if to_resolve == 0:
|
||||||
|
print("Nothing to do — all URLs already use commit SHAs.")
|
||||||
|
return
|
||||||
|
|
||||||
|
remaining = check_rate_limit()
|
||||||
|
if remaining is not None and remaining < to_resolve:
|
||||||
|
print(
|
||||||
|
f"ERROR: Need {to_resolve} API calls but only {remaining} remaining. "
|
||||||
|
"Set GITHUB_TOKEN or wait for reset.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Resolve each unique ref
|
||||||
|
resolved = 0
|
||||||
|
failed = 0
|
||||||
|
for (owner, repo, ref) in sorted(ref_keys):
|
||||||
|
if SHA_RE.match(ref):
|
||||||
|
ref_keys[(owner, repo, ref)] = ref
|
||||||
|
continue
|
||||||
|
|
||||||
|
sha = resolve_ref_to_sha(owner, repo, ref)
|
||||||
|
if sha:
|
||||||
|
ref_keys[(owner, repo, ref)] = sha
|
||||||
|
resolved += 1
|
||||||
|
if not args.dry_run:
|
||||||
|
# Be polite to the API
|
||||||
|
time.sleep(0.1)
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
# Progress
|
||||||
|
done = resolved + failed
|
||||||
|
if done % 10 == 0 or done == to_resolve:
|
||||||
|
print(f" Progress: {done}/{to_resolve} ({resolved} resolved, {failed} failed)")
|
||||||
|
|
||||||
|
print(f"\nResolved {resolved}/{to_resolve} refs ({failed} failures)")
|
||||||
|
|
||||||
|
# Build replacement map
|
||||||
|
replacements = 0
|
||||||
|
new_content = content
|
||||||
|
for (owner, repo, ref), sha in ref_keys.items():
|
||||||
|
if sha and sha != ref:
|
||||||
|
old_prefix = f"raw.githubusercontent.com/{owner}/{repo}/{ref}/"
|
||||||
|
new_prefix = f"raw.githubusercontent.com/{owner}/{repo}/{sha}/"
|
||||||
|
count = new_content.count(old_prefix)
|
||||||
|
if count > 0:
|
||||||
|
if args.dry_run:
|
||||||
|
print(f" {owner}/{repo}: {ref} -> {sha[:12]}... ({count} URLs)")
|
||||||
|
new_content = new_content.replace(old_prefix, new_prefix)
|
||||||
|
replacements += count
|
||||||
|
|
||||||
|
print(f"\nTotal URL replacements: {replacements}")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print("\n(dry-run mode — no file modified)")
|
||||||
|
else:
|
||||||
|
with open(args.file, "w") as f:
|
||||||
|
f.write(new_content)
|
||||||
|
print(f"Wrote {args.file}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
18
src/app.rs
18
src/app.rs
@@ -2089,18 +2089,15 @@ impl App {
|
|||||||
let chosen = self.code_drill_language_override.clone().unwrap();
|
let chosen = self.code_drill_language_override.clone().unwrap();
|
||||||
|
|
||||||
// Step 2: Check if we need to download (only if not already attempted)
|
// Step 2: Check if we need to download (only if not already attempted)
|
||||||
if self.config.code_downloads_enabled
|
if self.config.code_downloads_enabled && !self.code_download_attempted {
|
||||||
&& !self.code_download_attempted
|
let queue =
|
||||||
&& !is_language_cached(&self.config.code_download_dir, &chosen)
|
build_code_download_queue(&chosen, &self.config.code_download_dir);
|
||||||
{
|
if !queue.is_empty() {
|
||||||
if let Some(lang) = language_by_key(&chosen) {
|
self.code_intro_download_total = queue.len();
|
||||||
if !lang.repos.is_empty() {
|
self.code_download_queue = queue;
|
||||||
let repo_idx = self.rng.gen_range(0..lang.repos.len());
|
|
||||||
self.code_download_queue = vec![(chosen.clone(), repo_idx)];
|
|
||||||
self.code_intro_download_total = 1;
|
|
||||||
self.code_intro_downloaded = 0;
|
self.code_intro_downloaded = 0;
|
||||||
self.code_intro_downloading = true;
|
self.code_intro_downloading = true;
|
||||||
self.code_intro_current_repo = lang.repos[repo_idx].key.to_string();
|
self.code_intro_current_repo.clear();
|
||||||
self.code_download_action = CodeDownloadCompleteAction::StartCodeDrill;
|
self.code_download_action = CodeDownloadCompleteAction::StartCodeDrill;
|
||||||
self.code_download_job = None;
|
self.code_download_job = None;
|
||||||
self.code_download_attempted = true;
|
self.code_download_attempted = true;
|
||||||
@@ -2108,7 +2105,6 @@ impl App {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Step 3: If language has no built-in AND no cache → fallback
|
// Step 3: If language has no built-in AND no cache → fallback
|
||||||
if !is_language_cached(&self.config.code_download_dir, &chosen) {
|
if !is_language_cached(&self.config.code_download_dir, &chosen) {
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -5192,8 +5192,8 @@ mod review_tests {
|
|||||||
.expect("de should be selectable");
|
.expect("de should be selectable");
|
||||||
|
|
||||||
let output = render_settings_to_string(&app);
|
let output = render_settings_to_string(&app);
|
||||||
assert!(output.contains("German"));
|
assert!(output.contains("Deutsch"));
|
||||||
assert!(!output.contains("German (preview)"));
|
assert!(!output.contains("Deutsch (preview)"));
|
||||||
assert!(output.contains("de_qwertz"));
|
assert!(output.contains("de_qwertz"));
|
||||||
assert!(!output.contains("qwerty (preview)"));
|
assert!(!output.contains("qwerty (preview)"));
|
||||||
}
|
}
|
||||||
@@ -5396,7 +5396,7 @@ mod review_tests {
|
|||||||
assert!(output.contains("default adaptive drill will mix in keys"));
|
assert!(output.contains("default adaptive drill will mix in keys"));
|
||||||
assert!(output.contains("focus only on this branch"));
|
assert!(output.contains("focus only on this branch"));
|
||||||
assert!(output.contains("from this branch in the Skill Tree."));
|
assert!(output.contains("from this branch in the Skill Tree."));
|
||||||
assert!(output.contains("Proceed? (y/n)"));
|
assert!(output.contains("[y] Unlock"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -5408,7 +5408,7 @@ mod review_tests {
|
|||||||
let output = render_skill_tree_to_string_with_size(&app, 90, 24);
|
let output = render_skill_tree_to_string_with_size(&app, 90, 24);
|
||||||
assert!(output.contains("focus only on this branch"));
|
assert!(output.contains("focus only on this branch"));
|
||||||
assert!(output.contains("from this branch in the Skill Tree."));
|
assert!(output.contains("from this branch in the Skill Tree."));
|
||||||
assert!(output.contains("Proceed? (y/n)"));
|
assert!(output.contains("[y] Unlock"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user