aboutsummaryrefslogtreecommitdiff
path: root/contrib/devtools/github-merge.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/devtools/github-merge.py')
-rwxr-xr-xcontrib/devtools/github-merge.py45
1 files changed, 37 insertions, 8 deletions
diff --git a/contrib/devtools/github-merge.py b/contrib/devtools/github-merge.py
index f1b6a12fd0..3fee39143d 100755
--- a/contrib/devtools/github-merge.py
+++ b/contrib/devtools/github-merge.py
@@ -78,24 +78,53 @@ def get_symlink_files():
ret.append(f.decode('utf-8').split("\t")[1])
return ret
-def tree_sha512sum():
- files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
+def tree_sha512sum(commit='HEAD'):
+ # request metadata for entire tree, recursively
+ files = []
+ blob_by_name = {}
+ for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
+ name_sep = line.index(b'\t')
+ metadata = line[:name_sep].split() # perms, 'blob', blobid
+ assert(metadata[1] == b'blob')
+ name = line[name_sep+1:]
+ files.append(name)
+ blob_by_name[name] = metadata[2]
+
+ files.sort()
+ # open connection to git-cat-file in batch mode to request data for all blobs
+ # this is much faster than launching it per file
+ p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
overall = hashlib.sha512()
for f in files:
+ blob = blob_by_name[f]
+ # request blob
+ p.stdin.write(blob + b'\n')
+ p.stdin.flush()
+ # read header: blob, "blob", size
+ reply = p.stdout.readline().split()
+ assert(reply[0] == blob and reply[1] == b'blob')
+ size = int(reply[2])
+ # hash the blob data
intern = hashlib.sha512()
- fi = open(f, 'rb')
- while True:
- piece = fi.read(65536)
- if piece:
+ ptr = 0
+ while ptr < size:
+ bs = min(65536, size - ptr)
+ piece = p.stdout.read(bs)
+ if len(piece) == bs:
intern.update(piece)
else:
- break
- fi.close()
+ raise IOError('Premature EOF reading git cat-file output')
+ ptr += bs
dig = intern.hexdigest()
+ assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
+ # update overall hash with file hash
overall.update(dig.encode("utf-8"))
overall.update(" ".encode("utf-8"))
overall.update(f)
overall.update("\n".encode("utf-8"))
+ p.stdin.close()
+ if p.wait():
+ raise IOError('Non-zero return value executing git cat-file')
return overall.hexdigest()