Improvements to Browserstack golden/gm runs (#10967) d2f21b08aa

A few quality-of-life improvements to the way the Browserstack runs (and image diffing) work and look.

- Add a Device Summary to the generated diff page that shows the list of devices and how many tests passed/failed/were missing for each (with failures sorted to the top). This makes it easier when things are failing a lot (like currently in Vulkan) to more easily where the failures are
- Fix an error when running the Browserstack script on Windows
- Add the Android OS version to the device name (there are multiple of the same device make/model on Browerstack running on different OS versions, and we were squishing their results together into one pile)
- Simplify the device name for devices with a redundant model name (i.e. Google_Pixel_6 instead of Google_Pixel_6_Pixel_6)
- Set a display name on browser stack (mostly so that the browserstack dashboard will stop showing a warning that some tests don't have names), but the display name contains the backend name (or default if it was unspecified) so browsing between gl and vulkan runs is easier
- Unified color definitions (and changed a couple to be easier to read, especially in the device summary page)
- Fixed how some of the text was being injected into the template to not contain a bunch of (conveniently invisible) \ns (that is, the actual text \n was in the document, not as newlines)
- Diff page no longer shows the "pass" and "identical" sections if it's in fails_only mode

Co-authored-by: Josh Jersild <joshua@rive.app>
This commit is contained in:
JoshJRive
2025-11-04 22:24:29 +00:00
parent ee71941573
commit dbe049473c
5 changed files with 143 additions and 62 deletions

View File

@@ -1 +1 @@
2daf848d0a0b9bc684474d646081b12263be17ae
d2f21b08aa321ca6a243e0c063e5c064f2a9f9be

View File

@@ -6,6 +6,7 @@ import subprocess
import os.path
import pathlib
import sys
from collections import defaultdict
if not "NO_VENV" in os.environ.keys():
from venv import create
@@ -389,6 +390,11 @@ def diff_directory_shallow(candidates_path, output_path, golden_path, device_nam
return (entries, missing, success)
# Sort descending total failure count (missing + failure), then by descending failure count, then by name
# (put the failed ones at the top)
def device_entry_sort_key(kv):
return (-(kv[1]["missing_candidate"] + kv[1]["failed"]), -(kv[1]["failed"]), kv[0])
# returns entries sorted into identical, passing and failing as well as html str list of each
# based on arguments passed, we may or may not return all of the string lists, but we always return the object lists
def sort_entries(entries):
@@ -403,11 +409,50 @@ def sort_entries(entries):
sorted_failed_entires = sorted(failed_entires, reverse=True)
sorted_failed_str = [str(entry) for entry in sorted_failed_entires]
# Build a list of stat counts by device name
all_device_stats = dict()
for entry in entries:
if entry.device not in all_device_stats:
all_device_stats[entry.device] = defaultdict(int)
all_device_stats[entry.device]["url"] = entry.browserstack_details['browser_url'] if entry.browserstack_details is not None else ' '
all_device_stats[entry.device][entry.type] += 1
# Now build the device summary text using the template
device_summary_str = ""
with open(os.path.join(TEMPLATE_PATH, "device_summary_entry.html")) as t:
device_summary_entry_template = t.read()
for device_name, device_summary in sorted(all_device_stats.items(), key=device_entry_sort_key):
none_succeeded = device_summary["pass"] == 0 and device_summary["identical"] == 0
any_failed = device_summary["failed"] > 0
any_missing = device_summary["missing_candidate"] > 0
device_summary_str += device_summary_entry_template.format(
name=device_name,
url=device_summary["url"],
failed_count=device_summary["failed"] if any_failed else "-",
failed_class="fail" if any_failed else "deemphasize",
missing_count=device_summary["missing_candidate"] if any_missing else "-",
missing_class="fail" if any_missing else "deemphasize",
pass_count=device_summary["pass"],
pass_class="fail" if none_succeeded else "success",
identical_count=device_summary["identical"],
identical_class="fail" if none_succeeded else "" if device_summary["identical"] == 0 else "success")
# if we are only doing fails then only sort those and return empty html lists for "pass" and "identical" we still build and return
# identical and pass object lists for cleaning, but we dont bother sorting them
if args.fails_only:
return (sorted_failed_entires, pass_entires, identical_entires, sorted_failed_str, [], [], missing_golden_str, missing_candidate_str)
return (
sorted_failed_entires,
pass_entires,
identical_entires,
sorted_failed_str,
[],
[],
missing_golden_str,
missing_candidate_str,
device_summary_str,
len(all_device_stats))
# now sort passed entires and build the html list
@@ -416,22 +461,35 @@ def sort_entries(entries):
# if we are cleaning then return empty html list for identical. do everything else the same
if args.clean:
return (sorted_failed_entires, sorted_passed_entires, identical_entires, sorted_failed_str, sorted_passed_str, [], missing_golden_str, missing_candidate_str)
return (sorted_failed_entires, sorted_passed_entires, identical_entires, sorted_failed_str, sorted_passed_str, [], missing_golden_str, missing_candidate_str, device_summary_str, len(all_device_stats))
# otherwise build identical html entry list and include it in the return
identical_str = [str(entry) for entry in identical_entires]
return (sorted_failed_entires, sorted_passed_entires, identical_entires, sorted_failed_str, sorted_passed_str, identical_str, missing_golden_str, missing_candidate_str)
return (
sorted_failed_entires,
sorted_passed_entires,
identical_entires,
sorted_failed_str,
sorted_passed_str,
identical_str,
missing_golden_str,
missing_candidate_str,
device_summary_str,
len(all_device_stats))
def write_html(templates_path, failed_entries, passing_entries, identical_entries, missing_golden_entries, missing_candidate_entries, output_path):
def write_html(templates_path, failed_entries, passing_entries, identical_entries, missing_golden_entries, missing_candidate_entries, device_summary_str, device_number, output_path):
with open(os.path.join(templates_path, "index.html")) as t:
index_template = t.read()
html = index_template.format(identical=identical_entries, passing=passing_entries,
failed=failed_entries, failed_number=len(failed_entries),
html = index_template.format(identical=" ".join(identical_entries), passing=" ".join(passing_entries),
failed=" ".join(failed_entries), failed_number=len(failed_entries),
passing_number=len(passing_entries), identical_number=len(identical_entries),
missing_candidate=missing_candidate_entries, missing_candidate_number=len(missing_candidate_entries),
missing_golden=missing_golden_entries, missing_golden_number=len(missing_golden_entries))
missing_candidate=" ".join(missing_candidate_entries), missing_candidate_number=len(missing_candidate_entries),
missing_golden=" ".join(missing_golden_entries), missing_golden_number=len(missing_golden_entries),
device_summaries=device_summary_str, device_number=device_number,
pass_hide_class="hidden" if args.fails_only else "",
identical_hide_class="hidden" if args.fails_only or args.clean else "")
with open(os.path.join(output_path, "index.html"), "w") as file:
file.write(html)
@@ -469,7 +527,7 @@ def diff_directory_deep(candidates_path, output_path):
if args.pack:
shallow_copy_images(folder.path, output)
(failed, passed, identical, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str) = sort_entries(all_entries)
(failed, passed, identical, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str, device_summary_str, device_number) = sort_entries(all_entries)
to_clean = []
to_check = []
@@ -498,7 +556,7 @@ def diff_directory_deep(candidates_path, output_path):
os.remove(os.path.join(args.goldens, f"{obj.name}.png"))
write_html(TEMPLATE_PATH, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str, output_path)
write_html(TEMPLATE_PATH, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str, device_summary_str, device_number, output_path)
print(f"total entries {len(all_entries)}")
write_min_csv(len(passed), len(failed), len(identical), len(all_entries), output_path + "/issues.csv")
@@ -528,9 +586,9 @@ def main(argv=None):
else:
(entries, missing, success) = diff_directory_shallow(args.candidates, args.output, args.goldens)
if len(entries) > 0:
(failed, passed, identical, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str) = sort_entries(entries)
(failed, passed, identical, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str, device_summary_str, device_number) = sort_entries(entries)
assert(len(failed) + len(passed) + len(identical) + len(missing_candidate_str) + len(missing_golden_str) == len(entries))
write_html(TEMPLATE_PATH, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str, args.output)
write_html(TEMPLATE_PATH, failed_str, passed_str, identical_str, missing_golden_str, missing_candidate_str, device_summary_str, device_number, args.output)
# note could add these to the html output but w/e
missing_candidates = [os.path.basename(entry.candidates_path_abs) for entry in missing if entry.type == 'missing_candidate']
write_csv(entries, args.goldens, args.candidates, args.output, missing_candidates)

View File

@@ -0,0 +1,8 @@
<tr class="device_summary_entry">
<th class="device_entry_name"><a target="_blank" href="{url}">{name}</a></th>
<th class="{failed_class}">{failed_count}</th>
<th class="{missing_class}">{missing_count}</th>
<th class="{pass_class}">{pass_count}</th>
<th class="{identical_class}">{identical_count}</th>
</tr>

View File

@@ -34,7 +34,6 @@
<td> <a href="{candidate}"><img src="{candidate}" height="192"></a></td>
<td> <a href="{diff0}"><img src="{diff0}" height="192"></a></td>
<td> <a href="{diff1}"><img src="{diff1}" height="192"></a></td>
</tr>
</tbody>
</table>

View File

@@ -5,15 +5,36 @@
<link rel="icon" href="favicon.ico">
<link href="https://unpkg.com/bonsai.css@latest/dist/bonsai.min.css" rel="stylesheet">
<style>
:root {{
--standard-text-color: #c2baba;
--failure-text-color: #f04646;
--success-text-color: #46d046;
--deemphasized-text-color: #687e7e;
--far-background-color: #18191a;
--panel-background-color: #43484b;
--header-background-color: #292b2c;
--alternate-row-background-color: #373B3D;
}}
details h2:not(:first-of-type) {{
padding-top: 50px;
}}
header {{
text-align: left;
}}
body {{
background-color: #18191a;
background-color: var(--far-background-color);
color: var(--standard-text-color);
}}
.hidden {{
display: none;
}}
details {{
background-color: #43484b;
background-color: var(--panel-background-color);
}}
td {{
@@ -24,46 +45,29 @@
text-align: left;
}}
img {{
float: left;
details summary {{
background-color: var(--header-background-color);
overflow: auto;
color: var(--standard-text-color);
}}
.success {{
background-color: rgb(8, 128, 8);
overflow:auto;
.success {{
color: var(--success-text-color);
}}
.success_summary {{
background-color: #292b2c;
color: rgb(8, 128, 8);
overflow:auto;
.fail {{
color: var(--failure-text-color);
}}
.failed {{
background-color: rgb(175, 13, 13);
overflow:auto;
}}
.failed_summary {{
background-color: #292b2c;
color: rgb(175, 13, 13);
overflow:auto;
}}
.identical_summary {{
background-color: #292b2c;
color: rgb(194, 186, 186);
overflow:auto;
.deemphasize {{
color: var(--deemphasized-text-color);
}}
.legend_summary {{
background-color: #292b2c;
color: rgb(194, 186, 186);
overflow:auto;
font-weight: bold;
}}
.entry {{
float: left;
font-size: 20px;
vertical-align:top;
}}
@@ -71,63 +75,75 @@
.entry_table_values {{
border-spacing: 10px 0;
text-align: right;
color: rgb(177, 184, 177);
padding-top: 20px;
}}
.entry_table_images {{
border-spacing: 10px 0;
text-align: right;
color: rgb(177, 184, 177);
}}
.header_table {{
width:100%;
text-align:left;
color: rgb(194, 186, 186);
}}
.white_text {{
color: rgb(194, 186, 186);
}}
.off_white {{
background-color: whitesmoke;
.header_table tbody tr:nth-child(odd),
.device_summary_values tbody tr:nth-child(odd) {{
background-color: var(--alternate-row-background-color);
}}
</style>
</head>
<body>
<details open>
<summary class="failed_summary">{failed_number} Failed Tests</summary>
<summary class="fail">{failed_number} Failed Tests</summary>
<p>
{failed}
</p>
</details>
<details open>
<summary class="failed_summary">{missing_candidate_number} Missing Candidate</summary>
<summary class="fail">{missing_candidate_number} Missing Candidate</summary>
<p>
{missing_candidate}
</p>
</details>
<details open>
<summary class="failed_summary">{missing_golden_number} Missing Golden</summary>
<summary class="fail">{missing_golden_number} Missing Golden</summary>
<p>
{missing_golden}
</p>
</details>
<details>
<summary class="success_summary">{passing_number} Passing Tests</summary>
<details class="{pass_hide_class}">
<summary class="success">{passing_number} Passing Tests</summary>
<p>
{passing}
</p>
</details>
<details>
<summary class="identical_summary">{identical_number} Identical Images</summary>
<details class="{identical_hide_class}">
<summary>{identical_number} Identical Images</summary>
<p>
{identical}
</p>
</details>
<details>
<summary class="legend_summary"><b>legend</b></summary>
<summary class="device_summary">Device Summary ({device_number} device(s))</summary>
<table class="device_summary_values">
<thead>
<tr>
<th>Device Name</th>
<th>Fail Count</th>
<th>Missing Count</th>
<th>Passing Count</th>
<th>Identical Count</th>
</tr>
</thead>
<tbody>
{device_summaries}
</tbody>
</table>
</details>
<details>
<summary class="legend_summary">legend</summary>
<table class="header_table">
<thead>
<tr>