-
-
Notifications
You must be signed in to change notification settings - Fork 169
/
keyword_totals.sql
76 lines (70 loc) · 1.65 KB
/
keyword_totals.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#standardSQL
CREATE TEMPORARY FUNCTION getGlobalKeywords(css STRING)
RETURNS ARRAY<STRUCT<property STRING, keyword STRING, freq INT64>>
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function compute(ast) {
let ret = {};
walkDeclarations(ast, ({property, value}) => {
let key = value;
ret[value] = ret[value] || {};
incrementByKey(ret[value], "total");
incrementByKey(ret[value], property);
}, {
values: ["inherit", "initial", "unset", "revert"]
});
for (let keyword in ret) {
ret[keyword] = sortObject(ret[keyword]);
}
return ret;
}
var ast = JSON.parse(css);
var kw = compute(ast);
return Object.entries(kw).flatMap(([keyword, properties]) => {
return Object.entries(properties).map(([property, freq]) => {
return {property, keyword, freq};
});
});
} catch (e) {
return [];
}
''';
SELECT
*,
pages / total_pages AS pct_pages
FROM (
SELECT
client,
kw.keyword,
kw.property,
SUM(kw.freq) AS freq,
SUM(SUM(IF(kw.property = 'total', 0, kw.freq))) OVER (PARTITION BY client, kw.keyword) AS total,
SUM(kw.freq) / SUM(SUM(IF(kw.property = 'total', 0, kw.freq))) OVER (PARTITION BY client, kw.keyword) AS pct,
COUNT(DISTINCT page) AS pages
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getGlobalKeywords(css)) AS kw
WHERE
date = '2021-07-01'
GROUP BY
client,
keyword,
property)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_pages
FROM
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
client)
USING
(client)
WHERE
pct >= 0.01
ORDER BY
client,
keyword,
pct DESC