diff --git a/scripts/release.py b/scripts/release.py index 15ae73055f..ea00ecceeb 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -2,30 +2,74 @@ """ a release-master multitool """ +from __future__ import print_function, unicode_literals +import sys from path import path -from git import Repo +from git import Repo, Commit +from git.refs.symbolic import SymbolicReference import argparse from datetime import date, timedelta from dateutil.parser import parse as parse_datestring import re -from collections import OrderedDict +import collections +import functools import textwrap +import requests +import json +import getpass +try: + from pygments.console import colorize +except ImportError: + colorize = lambda color, text: text -IGNORED_EMAILS = set(("vagrant@precise32.(none)",)) JIRA_RE = re.compile(r"\b[A-Z]{2,}-\d+\b") +PR_BRANCH_RE = re.compile(r"remotes/edx/pr/(\d+)") PROJECT_ROOT = path(__file__).abspath().dirname() repo = Repo(PROJECT_ROOT) git = repo.git +class memoized(object): + """ + Decorator. Caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned + (not reevaluated). + + https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize + """ + def __init__(self, func): + self.func = func + self.cache = {} + + def __call__(self, *args): + if not isinstance(args, collections.Hashable): + # uncacheable. a list, for instance. + # better to not cache than blow up. + return self.func(*args) + if args in self.cache: + return self.cache[args] + else: + value = self.func(*args) + self.cache[args] = value + return value + + def __repr__(self): + '''Return the function's docstring.''' + return self.func.__doc__ + + def __get__(self, obj, objtype): + '''Support instance methods.''' + return functools.partial(self.__call__, obj) + + def make_parser(): parser = argparse.ArgumentParser(description="release master multitool") parser.add_argument( - '--previous', '--prev', '-p', metavar="GITREV", default="origin/release", - help="previous release [origin/release]") + '--previous', '--prev', '-p', metavar="GITREV", default="edx/release", + help="previous release [%(default)s]") parser.add_argument( '--current', '--curr', '-c', metavar="GITREV", default="HEAD", - help="current release candidate [HEAD]") + help="current release candidate [%(default)s]") parser.add_argument( '--date', '-d', help="expected release date: defaults to " @@ -39,6 +83,132 @@ def make_parser(): return parser +def ensure_pr_fetch(): + """ + Make sure that the git repository contains a remote called "edx" that has + two fetch URLs; one for the main codebase, and one for pull requests. + Returns True if the environment was modified in any way, False otherwise. + """ + modified = False + remotes = git.remote().splitlines() + if not "edx" in remotes: + git.remote("add", "edx", "https://github.com/edx/edx-platform.git") + modified = True + # it would be nice to use the git-python API to do this, but it doesn't seem + # to support configurations with more than one value per key. :( + edx_fetches = git.config("remote.edx.fetch", get_all=True).splitlines() + pr_fetch = '+refs/pull/*/head:refs/remotes/edx/pr/*' + if pr_fetch not in edx_fetches: + git.config("remote.edx.fetch", pr_fetch, add=True) + git.fetch("edx") + modified = True + return modified + + +def get_github_creds(): + """ + Returns Github credentials if they exist, as a two-tuple of (username, token). + Otherwise, return None. + """ + netrc_auth = requests.utils.get_netrc_auth("https://api.github.com") + if netrc_auth: + return netrc_auth + config_file = path("~/.config/edx-release").expand() + if config_file.isfile(): + with open(config_file) as f: + config = json.load(f) + github_creds = config.get("credentials", {}).get("api.github.com", {}) + username = github_creds.get("username", "") + token = github_creds.get("token", "") + if username and token: + return (username, token) + return None + + +def create_github_creds(): + """ + https://developer.github.com/v3/oauth_authorizations/#create-a-new-authorization + """ + headers = {"User-Agent": "edx-release"} + payload = {"note": "edx-release"} + username = raw_input("Github username: ") + password = getpass.getpass("Github password: ") + response = requests.post( + "https://api.github.com/authorizations", + auth=(username, password), + headers=headers, data=json.dumps(payload), + ) + # is the user using two-factor authentication? + otp_header = response.headers.get("X-GitHub-OTP") + if not response.ok and otp_header and otp_header.startswith("required;"): + # get two-factor code, redo the request + headers["X-GitHub-OTP"] = raw_input("Two-factor authentication code: ") + response = requests.post( + "https://api.github.com/authorizations", + auth=(username, password), + headers=headers, data=json.dumps(payload), + ) + if not response.ok: + raise requests.exceptions.RequestException(response.json()["message"]) + return (username, response.json()["token"]) + + +def ensure_github_creds(attempts=3): + """ + Make sure that we have Github OAuth credentials. This will check the user's + .netrc file, as well as the ~/.config/edx-release file. If no credentials + exist in either place, it will prompt the user to create OAuth credentials, + and store them in ~/.config/edx-release. + + Returns False if we found credentials, True if we had to create them. + """ + if get_github_creds(): + return False + + # Looks like we need to create the OAuth creds + print("We need to set up OAuth authentication with Github's API. " + "Your password will not be stored.", file=sys.stderr) + token = None + for _ in range(attempts): + try: + username, token = create_github_creds() + except requests.exceptions.RequestException as e: + print( + "Invalid authentication: {}".format(e.message), + file=sys.stderr, + ) + continue + else: + break + if token: + print("Successfully authenticated to Github", file=sys.stderr) + if not token: + print("Too many invalid authentication attempts.", file=sys.stderr) + return False + + config_file = path("~/.config/edx-release").expand() + # make sure parent directory exists + config_file.parent.makedirs_p() + # read existing config if it exists + if config_file.isfile(): + with open(config_file) as f: + config = json.load(f) + else: + config = {} + # update config + if not "credentials" in config: + config["credentials"] = {} + if not "api.github.com" in config["credentials"]: + config["credentials"]["api.github.com"] = {} + config["credentials"]["api.github.com"]["username"] = username + config["credentials"]["api.github.com"]["token"] = token + # write it back out + with open(config_file, "w") as f: + json.dump(config, f) + + return True + + def default_release_date(): """ Returns a date object corresponding to the expected date of the next release: @@ -58,83 +228,202 @@ def parse_ticket_references(text): return JIRA_RE.findall(text) -def emails(commit_range): - """ - Returns a set of all email addresses responsible for the commits between - the two commit references. - """ - # %ae prints the authored_by email for the commit - # %n prints a newline - # %ce prints the committed_by email for the commit - emails = set(git.log(commit_range, format='%ae%n%ce').splitlines()) - return emails - IGNORED_EMAILS +class DoesNotExist(Exception): + def __init__(self, message, commit, branch): + self.message = message + self.commit = commit + self.branch = branch -def commits_by_email(commit_range, include_merge=False): +def get_merge_commit(commit, branch="master"): """ - Return a ordered dictionary of {email: commit_list} + Given a commit that was merged into the given branch, return the merge commit + for that event. + + http://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit + """ + commit_range = "{}..{}".format(commit, branch) + ancestry_paths = git.rev_list(commit_range, ancestry_path=True).splitlines() + first_parents = git.rev_list(commit_range, first_parent=True).splitlines() + both = set(ancestry_paths) & set(first_parents) + for commit_hash in reversed(ancestry_paths): + if commit_hash in both: + return repo.commit(commit_hash) + # no merge commit! + msg = "No merge commit for {commit} in {branch}!".format( + commit=commit, branch=branch, + ) + raise DoesNotExist(msg, commit, branch) + + +def get_pr_info(num): + """ + Returns the info from the Github API + """ + url = "https://api.github.com/repos/edx/edx-platform/pulls/{num}".format(num=num) + username, token = get_github_creds() + headers = { + "Authorization": "token {}".format(token), + "User-Agent": "edx-release", + } + response = requests.get(url, headers=headers) + result = response.json() + if not response.ok: + raise requests.exceptions.RequestException(result["message"]) + return result + + +def get_merged_prs(start_ref, end_ref): + """ + Return the set of all pull requests (as integers) that were merged between + the start_ref and end_ref. + """ + ensure_pr_fetch() + start_unmerged_branches = set( + branch.strip() for branch in + git.branch(all=True, no_merged=start_ref).splitlines() + ) + end_merged_branches = set( + branch.strip() for branch in + git.branch(all=True, merged=end_ref).splitlines() + ) + merged_between_refs = start_unmerged_branches & end_merged_branches + merged_prs = set() + for branch in merged_between_refs: + match = PR_BRANCH_RE.search(branch) + if match: + merged_prs.add(int(match.group(1))) + return merged_prs + + +@memoized +def prs_by_email(start_ref, end_ref): + """ + Returns an ordered dictionary of {email: pr_list} + Email is the email address of the person who merged the pull request The dictionary is alphabetically ordered by email address - The commit list is ordered by commit author date + The pull request list is ordered by merge date """ - kwargs = {} - if not include_merge: - kwargs["no-merges"] = True + unordered_data = collections.defaultdict(set) + for pr_num in get_merged_prs(start_ref, end_ref): + ref = "refs/remotes/edx/pr/{num}".format(num=pr_num) + branch = SymbolicReference(repo, ref) + try: + merge = get_merge_commit(branch.commit, end_ref) + except DoesNotExist: + pass # this commit will be included in the commits_without_prs table + else: + unordered_data[merge.author.email].add((pr_num, merge)) - data = OrderedDict() - for email in sorted(emails(commit_range)): - authored_commits = set(repo.iter_commits( - commit_range, author=email, **kwargs - )) - committed_commits = set(repo.iter_commits( - commit_range, committer=email, **kwargs - )) - commits = authored_commits | committed_commits - data[email] = sorted(commits, key=lambda c: c.authored_date) - return data + ordered_data = collections.OrderedDict() + for email in sorted(unordered_data.keys()): + ordered = sorted(unordered_data[email], key=lambda pair: pair[1].authored_date) + ordered_data[email] = [num for num, merge in ordered] + return ordered_data -def generate_table(commit_range, include_merge=False): +def generate_pr_table(start_ref, end_ref): """ - Return a string corresponding to a commit table to embed in Confluence + Return a string corresponding to a pull request table to embed in Confluence """ - header = u"||Author||Summary||Commit||JIRA||Verified?||" - commit_link = "[commit|https://github.com/edx/edx-platform/commit/{sha}]" + header = "|| Merged By || Author || Title || PR || JIRA || Verified? ||" + pr_link = "[#{num}|https://github.com/edx/edx-platform/pull/{num}]" + user_link = "[@{user}|https://github.com/{user}]" rows = [header] - cbe = commits_by_email(commit_range, include_merge) - for email, commits in cbe.items(): - for i, commit in enumerate(commits): - rows.append(u"| {author} | {summary} | {commit} | {jira} | {verified} |".format( - author=email if i == 0 else "", - summary=commit.summary.replace("|", "\|"), - commit=commit_link.format(sha=commit.hexsha), - jira=", ".join(parse_ticket_references(commit.message)), + prbe = prs_by_email(start_ref, end_ref) + for email, pull_requests in prbe.items(): + for i, pull_request in enumerate(pull_requests): + try: + pr_info = get_pr_info(pull_request) + title = pr_info["title"] or "" + body = pr_info["body"] or "" + author = pr_info["user"]["login"] + except requests.exceptions.RequestException as e: + message = ( + "Warning: could not fetch data for #{num}: " + "{message}".format(num=pull_request, message=e.message) + ) + print(colorize("red", message), file=sys.stderr) + title = "?" + body = "?" + author = "" + rows.append("| {merged_by} | {author} | {title} | {pull_request} | {jira} | {verified} |".format( + merged_by=email if i == 0 else "", + author=user_link.format(user=author) if author else "", + title=title.replace("|", "\|"), + pull_request=pr_link.format(num=pull_request), + jira=", ".join(parse_ticket_references(body)), verified="", )) - return u"\n".join(rows) + return "\n".join(rows) -def generate_email(commit_range, release_date=None): +@memoized +def get_commits_not_in_prs(start_ref, end_ref): + """ + Return a tuple of commits that exist between start_ref and end_ref, + but were not merged to the end_ref. If everyone is following the + pull request process correctly, this should return an empty tuple. + """ + return tuple(Commit.iter_items( + repo, + "{start}..{end}".format(start=start_ref, end=end_ref), + first_parent=True, no_merges=True, + )) + + +def generate_commit_table(start_ref, end_ref): + """ + Return a string corresponding to a commit table to embed in Comfluence. + The commits in the table should only be commits that are not in the + pull request table. + """ + header = "|| Author || Summary || Commit || JIRA || Verified? ||" + commit_link = "[commit|https://github.com/edx/edx-platform/commit/{sha}]" + rows = [header] + commits = get_commits_not_in_prs(start_ref, end_ref) + for commit in commits: + rows.append("| {author} | {summary} | {commit} | {jira} | {verified} |".format( + author=commit.author.email, + summary=commit.summary.replace("|", "\|"), + commit=commit_link.format(sha=commit.hexsha), + jira=", ".join(parse_ticket_references(commit.message)), + verified="", + )) + return "\n".join(rows) + + +def generate_email(start_ref, end_ref, release_date=None): """ Returns a string roughly approximating an email. """ if release_date is None: release_date = default_release_date() + prbe = prs_by_email(start_ref, end_ref) email = """ To: {emails} - You've made changes that are about to be released. All of the commits - that you either authored or committed are listed below. Please verify them on - stage.edx.org and stage-edge.edx.org. + You merged at least one pull request for edx-platform that is going out + in this upcoming release, and you are responsible for verifying those + changes on the staging servers before the code is released. Please go + to the release page to do so: - Please record your notes on https://edx-wiki.atlassian.net/wiki/display/ENG/Release+Page%3A+{date} - and add any bugs found to the Release Candidate Bugs section. + https://edx-wiki.atlassian.net/wiki/display/ENG/Release+Page%3A+{date} - If you are a non-affiliated open-source contributor to edx-platform, - the edX employee who merged in your pull request will manually verify - your change(s), and you may disregard this message. + The staging servers are: + + https://www.stage.edx.org + https://stage-edge.edx.org + + Note that you are responsible for verifying any pull requests that you + merged, whether you wrote the code or not. (If you didn't write the code, + you can and should try to get the person who wrote the code to help + verify the changes -- but even if you can't, you're still responsible!) + If you find any bugs, please notify me and record the bugs on the + release page. Thanks! """.format( - emails=", ".join(sorted(emails(commit_range))), + emails=", ".join(prbe.keys()), date=release_date.isoformat(), ) return textwrap.dedent(email).strip() @@ -146,14 +435,15 @@ def main(): if isinstance(args.date, basestring): # user passed in a custom date, so we need to parse it args.date = parse_datestring(args.date).date() - commit_range = "{0}..{1}".format(args.previous, args.current) + + ensure_github_creds() if args.table: - print(generate_table(commit_range, include_merge=args.merge)) + print(generate_pr_table(args.previous, args.current)) return print("EMAIL:") - print(generate_email(commit_range, release_date=args.date).encode('UTF-8')) + print(generate_email(args.previous, args.current, release_date=args.date).encode('UTF-8')) print("\n") print("Wiki Table:") print( @@ -161,7 +451,22 @@ def main(): "in your release wiki page" ) print("\n") - print(generate_table(commit_range, include_merge=args.merge).encode('UTF-8')) + print(generate_pr_table(args.previous, args.current)) + commits_without_prs = get_commits_not_in_prs(args.previous, args.current) + if commits_without_prs: + num = len(commits_without_prs) + plural = num > 1 + print("\n") + print( + "There {are} {num} {commits} in this release that did not come in " + "through pull requests!".format( + num=num, are="are" if plural else "is", + commits="commits" if plural else "commit" + ) + ) + print("\n") + print(generate_commit_table(args.previous, args.current)) + if __name__ == "__main__": main()