Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 52 additions & 40 deletions scripts/retrieve_ci_failures.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,48 @@ def diff_failure_vs_fix(repo, failure_commits, fix_commits):
return None


def process_diff(bug_id, obj, upload, repo_path):
diff_path = os.path.join("data", "ci_failures_diffs", f"{bug_id}.diff")
diff_zst_path = f"{diff_path}.zst"

if os.path.exists(diff_path) or os.path.exists(diff_zst_path):
return (0, 0)

if upload and utils.exists_s3(diff_zst_path):
return (0, 0)

try:
diff = diff_failure_vs_fix(
repo_path,
[
utils.hg2git(commit["node"])
for commit in obj["commits"]
if commit["backedoutby"]
],
[
utils.hg2git(commit["node"])
for commit in obj["commits"]
if not commit["backedoutby"] and not commit["backsout"]
],
)
except requests.exceptions.HTTPError as e:
logger.error("Failure mapping hg commit hash to git commit hash %s", e)
return (1, 0)

if diff is not None and len(diff) > 0:
with open(diff_path, "wb") as f:
f.write(diff)

utils.zstd_compress(diff_path)

os.remove(diff_path)

if upload:
utils.upload_s3([diff_zst_path])
else:
return (0, 1)


def generate_diffs(repo_url, repo_path, fixed_by_commit_pushes, upload):
if not os.path.exists(repo_path):
for attempt in tenacity.Retrying(
Expand All @@ -298,48 +340,18 @@ def generate_diffs(repo_url, repo_path, fixed_by_commit_pushes, upload):

diff_errors = 0
mapping_errors = 0
for bug_id, obj in tqdm(
fixed_by_commit_pushes.items(), total=len(fixed_by_commit_pushes)
):
diff_path = os.path.join("data", "ci_failures_diffs", f"{bug_id}.diff")
diff_zst_path = f"{diff_path}.zst"
if os.path.exists(diff_path) or os.path.exists(diff_zst_path):
continue
diffs = [(bug_id, obj) for bug_id, obj in fixed_by_commit_pushes.items()]

if upload and utils.exists_s3(diff_zst_path):
continue

try:
diff = diff_failure_vs_fix(
repo_path,
[
utils.hg2git(commit["node"])
for commit in obj["commits"]
if commit["backedoutby"]
],
[
utils.hg2git(commit["node"])
for commit in obj["commits"]
if not commit["backedoutby"] and not commit["backsout"]
],
)
except requests.exceptions.HTTPError as e:
logger.error("Failure mapping hg commit hash to git commit hash %s", e)
mapping_errors += 1
continue

if diff is not None and len(diff) > 0:
with open(diff_path, "wb") as f:
f.write(diff)

utils.zstd_compress(diff_path)

os.remove(diff_path)
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(process_diff, bug_id, obj, upload, repo_path)
for bug_id, obj in diffs
]

if upload:
utils.upload_s3([diff_zst_path])
else:
diff_errors += 1
for future in tqdm(as_completed(futures), totla=len(futures)):
mapping_error, diff_error = future.result()
mapping_errors += mapping_error
diff_errors += diff_error

logger.info(f"Failed mapping {mapping_errors} hashes")
logger.info(f"Failed generating {diff_errors} diffs")
Expand Down