-
-
Notifications
You must be signed in to change notification settings - Fork 169
/
specificity.sql
125 lines (112 loc) · 3.37 KB
/
specificity.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#standardSQL
CREATE TEMPORARY FUNCTION getSpecificityInfo(css STRING)
RETURNS STRUCT<
ruleCount NUMERIC,
selectorCount NUMERIC,
distribution ARRAY<STRUCT<specificity STRING, specificity_cmp STRING, freq INT64>>
>
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function extractSpecificity(ast) {
let ret = {
selectorCount: 0,
ruleCount: 0,
specificityCount: {},
maxSpecifity: [0, 0, 0]
};
let ss = [0, 0, 0];
walkRules(ast, rule => {
ret.ruleCount++;
for (let selector of rule.selectors) {
ret.selectorCount++;
let s = parsel.specificity(selector);
ss = ss.map((a, i) => a + s[i]);
let max = Math.max(...s);
incrementByKey(ret.specificityCount, max <= 5? s + "" : "higher");
let base = Math.max(...ret.maxSpecifity, ...s);
if (parsel.specificityToNumber(s, base) > parsel.specificityToNumber(ret.maxSpecifity, base)) {
ret.maxSpecifity = s;
ret.maxSpecifitySelector = selector;
}
}
}, {type: "rule"});
ret.selectorsPerRule = ret.selectorCount / ret.ruleCount;
ret.avgSpecificity = ss.map(s => s / ret.selectorCount);
return ret;
}
function toComparableString(specificity) {
if (!specificity) {
return null;
}
if (specificity.split(',').length !== 3) {
return null;
}
// The highest unit of specificity is 9398, so we need 5 digits of padding.
return specificity.split(',').map(i => i.padStart(5, '0')).join('') + specificity;
}
const ast = JSON.parse(css);
let specificity = extractSpecificity(ast);
let ruleCount = specificity.ruleCount;
let selectorCount = specificity.selectorCount;
let distribution = Object.entries(specificity.specificityCount).map(([specificity, freq]) => {
return {
specificity,
freq,
specificity_cmp: toComparableString(specificity)
}
});
return {
ruleCount,
selectorCount,
distribution
};
} catch (e) {
return null;
}
''';
# https://www.stevenmoseley.com/blog/tech/high-performance-sql-correlated-scalar-aggregate-reduction-queries
CREATE TEMPORARY FUNCTION extractSpecificity(specificity_cmp STRING) RETURNS STRING AS (
SUBSTR(specificity_cmp, 16)
);
SELECT
percentile,
client,
extractSpecificity(APPROX_QUANTILES(max_specificity_cmp, 1000)[OFFSET(percentile * 10)]) AS max_specificity,
extractSpecificity(APPROX_QUANTILES(median_specificity_cmp, 1000)[OFFSET(percentile * 10)]) AS median_specificity
FROM (
SELECT
client,
MAX(specificity_cmp) AS max_specificity_cmp,
MIN(IF(freq_cdf >= 0.5, specificity_cmp, NULL)) AS median_specificity_cmp
FROM (
SELECT
client,
page,
bin.specificity_cmp,
SUM(bin.freq) OVER (PARTITION BY client, page ORDER BY bin.specificity_cmp) / SUM(bin.freq) OVER (PARTITION BY client, page) AS freq_cdf
FROM (
SELECT
client,
page,
getSpecificityInfo(css) AS info
FROM
`httparchive.almanac.parsed_css`
WHERE
date = '2021-07-01' AND
# Limit the size of the CSS to avoid OOM crashes.
LENGTH(css) < 0.1 * 1024 * 1024),
UNNEST(info.distribution) AS bin
WHERE
bin.specificity_cmp IS NOT NULL)
GROUP BY
client,
page),
UNNEST([10, 25, 50, 75, 90, 95, 99, 100]) AS percentile
GROUP BY
percentile,
client
ORDER BY
percentile,
client