diff options
author | Wladimir J. van der Laan <laanwj@gmail.com> | 2017-03-13 16:09:38 +0100 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@gmail.com> | 2017-03-13 16:13:38 +0100 |
commit | a327e8ea30e10abdce910a3e37c78e00d0918548 (patch) | |
tree | 06c554f84c450923774a1a1dcad5cc04ef15224f /contrib | |
parent | 8040ae6fc576e9504186f2ae3ff2c8125de1095c (diff) |
devtools: Make github-merge compute SHA512 from git, instead of worktree
This changes tree_sha512sum() to requests the objects for hashing from
git instead of from the working tree.
The change should make the process more deterministic (it hashes what
will be pushed) and hopefully avoids the frequent miscomputed SHA512's
that happen now.
Diffstat (limited to 'contrib')
-rwxr-xr-x | contrib/devtools/github-merge.py | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/contrib/devtools/github-merge.py b/contrib/devtools/github-merge.py index f1b6a12fd0..3fee39143d 100755 --- a/contrib/devtools/github-merge.py +++ b/contrib/devtools/github-merge.py @@ -78,24 +78,53 @@ def get_symlink_files(): ret.append(f.decode('utf-8').split("\t")[1]) return ret -def tree_sha512sum(): - files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines()) +def tree_sha512sum(commit='HEAD'): + # request metadata for entire tree, recursively + files = [] + blob_by_name = {} + for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines(): + name_sep = line.index(b'\t') + metadata = line[:name_sep].split() # perms, 'blob', blobid + assert(metadata[1] == b'blob') + name = line[name_sep+1:] + files.append(name) + blob_by_name[name] = metadata[2] + + files.sort() + # open connection to git-cat-file in batch mode to request data for all blobs + # this is much faster than launching it per file + p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE) overall = hashlib.sha512() for f in files: + blob = blob_by_name[f] + # request blob + p.stdin.write(blob + b'\n') + p.stdin.flush() + # read header: blob, "blob", size + reply = p.stdout.readline().split() + assert(reply[0] == blob and reply[1] == b'blob') + size = int(reply[2]) + # hash the blob data intern = hashlib.sha512() - fi = open(f, 'rb') - while True: - piece = fi.read(65536) - if piece: + ptr = 0 + while ptr < size: + bs = min(65536, size - ptr) + piece = p.stdout.read(bs) + if len(piece) == bs: intern.update(piece) else: - break - fi.close() + raise IOError('Premature EOF reading git cat-file output') + ptr += bs dig = intern.hexdigest() + assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data + # update overall hash with file hash overall.update(dig.encode("utf-8")) overall.update(" ".encode("utf-8")) overall.update(f) overall.update("\n".encode("utf-8")) + p.stdin.close() + if p.wait(): + raise IOError('Non-zero return value executing git cat-file') return overall.hexdigest() |