aboutsummaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2017-03-13 16:09:38 +0100
committerWladimir J. van der Laan <laanwj@gmail.com>2017-03-13 16:13:38 +0100
commita327e8ea30e10abdce910a3e37c78e00d0918548 (patch)
tree06c554f84c450923774a1a1dcad5cc04ef15224f /contrib
parent8040ae6fc576e9504186f2ae3ff2c8125de1095c (diff)
devtools: Make github-merge compute SHA512 from git, instead of worktree
This changes tree_sha512sum() to requests the objects for hashing from git instead of from the working tree. The change should make the process more deterministic (it hashes what will be pushed) and hopefully avoids the frequent miscomputed SHA512's that happen now.
Diffstat (limited to 'contrib')
-rwxr-xr-xcontrib/devtools/github-merge.py45
1 files changed, 37 insertions, 8 deletions
diff --git a/contrib/devtools/github-merge.py b/contrib/devtools/github-merge.py
index f1b6a12fd0..3fee39143d 100755
--- a/contrib/devtools/github-merge.py
+++ b/contrib/devtools/github-merge.py
@@ -78,24 +78,53 @@ def get_symlink_files():
ret.append(f.decode('utf-8').split("\t")[1])
return ret
-def tree_sha512sum():
- files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
+def tree_sha512sum(commit='HEAD'):
+ # request metadata for entire tree, recursively
+ files = []
+ blob_by_name = {}
+ for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
+ name_sep = line.index(b'\t')
+ metadata = line[:name_sep].split() # perms, 'blob', blobid
+ assert(metadata[1] == b'blob')
+ name = line[name_sep+1:]
+ files.append(name)
+ blob_by_name[name] = metadata[2]
+
+ files.sort()
+ # open connection to git-cat-file in batch mode to request data for all blobs
+ # this is much faster than launching it per file
+ p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
overall = hashlib.sha512()
for f in files:
+ blob = blob_by_name[f]
+ # request blob
+ p.stdin.write(blob + b'\n')
+ p.stdin.flush()
+ # read header: blob, "blob", size
+ reply = p.stdout.readline().split()
+ assert(reply[0] == blob and reply[1] == b'blob')
+ size = int(reply[2])
+ # hash the blob data
intern = hashlib.sha512()
- fi = open(f, 'rb')
- while True:
- piece = fi.read(65536)
- if piece:
+ ptr = 0
+ while ptr < size:
+ bs = min(65536, size - ptr)
+ piece = p.stdout.read(bs)
+ if len(piece) == bs:
intern.update(piece)
else:
- break
- fi.close()
+ raise IOError('Premature EOF reading git cat-file output')
+ ptr += bs
dig = intern.hexdigest()
+ assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
+ # update overall hash with file hash
overall.update(dig.encode("utf-8"))
overall.update(" ".encode("utf-8"))
overall.update(f)
overall.update("\n".encode("utf-8"))
+ p.stdin.close()
+ if p.wait():
+ raise IOError('Non-zero return value executing git cat-file')
return overall.hexdigest()