aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJody Bruchon <jody@jodybruchon.com>2020-09-17 21:45:40 -0400
committerJody Bruchon <jody@jodybruchon.com>2020-09-17 21:45:40 -0400
commitfda63a4e873a7e5ce8282b63e30546c2c64e0458 (patch)
tree33bd31e49c4214f83bb470d2fe8fef2932fbd0ce
parent1d74d8d9f631c8ba610f581d59e825252ab16185 (diff)
Randomize archive order before populating search tree
This doesn't result in an elegant, perfectly balanced search tree, but it's absolutely good enough. This commit completely mitigates the worst-case scenario where the archive file is sorted. Signed-off-by: Jody Bruchon <jody@jodybruchon.com>
-rw-r--r--youtube_dlc/YoutubeDL.py47
1 files changed, 12 insertions, 35 deletions
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
index 6da32960a..03bb245ba 100644
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -122,17 +122,14 @@ class ArchiveTree(object):
# Tree insertion
def at_insert(self, line):
-# print("at_insert: ", line)
cur = self
while True:
-# print("comparing ", line, cur.line)
if cur.line:
if line < cur.line:
if cur.left is None:
cur.left = ArchiveTree(line)
return
else:
-# print("LEFT")
cur = cur.left
continue
elif line > cur.line:
@@ -140,7 +137,6 @@ class ArchiveTree(object):
cur.right = ArchiveTree(line)
return
else:
-# print("RIGHT")
cur = cur.right
continue
else:
@@ -426,43 +422,24 @@ class YoutubeDL(object):
if ioe.errno != errno.ENOENT:
raise
lmax = len(lines)
- if lmax >= 4:
+ if lmax > 10:
# Populate binary search tree by splitting the archive list in half
# and then adding from the outside edges inward
# This mitigates the worst case where the archive has been sorted
- ptrLL = 0
- ptrLR = lmax // 2
- ptrRL = ptrLR + 1
- ptrRR = lmax - 1
- inserted = 0
- while True:
-# print("ptrs: %d %d %d %d" % (ptrLL, ptrLR, ptrRL, ptrRR))
- if ptrLR > ptrLL:
- self.archive.at_insert(lines[ptrLR])
- inserted += 1
- ptrLR -= 1;
- if ptrRL < ptrRR:
- self.archive.at_insert(lines[ptrRL])
- inserted += 1
- ptrRL += 1;
- if ptrLL < ptrLR:
- self.archive.at_insert(lines[ptrLL])
- inserted += 1
- ptrLL += 1;
- if ptrRR > ptrRL:
- self.archive.at_insert(lines[ptrRR])
- inserted += 1
- ptrRR -= 1;
- if ptrLL == ptrLR and ptrRL == ptrRR:
- print("inserted: %d, lmax: %d" % (inserted, lmax))
+ pos = 0
+ while pos < lmax:
+ if lmax - pos <= 2:
break
- elif lmax > 0:
- # Skip multi-line logic for a single line
- for idx in lines:
- self.archive.at_insert(idx)
- else:
+ target = random.randrange(pos + 1, lmax - 1)
+ temp = lines[pos]
+ lines[pos] = lines[target]
+ lines[target] = lines[pos]
+ pos += 1
+ elif lmax < 1:
# No lines were loaded
return False
+ for x in lines:
+ self.archive.at_insert(x)
return True
def check_deprecated(param, option, suggestion):