User:CrowleyBot/task/1
Appearance
Technical details
[edit]- Maintain a list about what is legit after what.
- "Synonyms" is usually son of POS. It can also be L3.
- "Alternative forms" is usually L3. It can be son of POS.
- If something will be L3 after the process, it should be checked.
- [1] is caused by [2]. It is partially reverted.
- The error report of the second batch. Pages in the error report is not touched.
Source code
[edit]from prelude import *
from botaccount import *
"normal-enwikt.py"
en = mwc.Site('en.wiktionary.org', clients_useragent = UA)
en.login(UN, PWD)
#pn = list(map(lambda p: p.name, epgl))
#n = len(pn)
defaultsummary = ''
etxtl = [en.Pages[x].text() for x in pn]
esecl = [list() for i in range(n)]
etxtn = [''] * n
todo, todo1, todo2, todo3 = [], [], [], []
summary = [''] * n
log = []
@fct.total_ordering
class node:
def __init__(s, kyu=0, title='', a=0, b=0, c=0, z='', t='', dummy = 0):
if dummy:
s.kyu = kyu
s.title = dummy
return
s.kyu, s.title, s.oldkyu = kyu, title, kyu
s.a, s.b, s.c, s.z, s.t = a, b, c, z, t
s.l, s.r, s.f, s.s, s.tp = None, None, None, [], -1
def __bool__(s):
return isinstance(s.title, str)
def __eq__(x, y):
return x.kyu == y.kyu
def __lt__(x, y):
return x.kyu < y.kyu
def __str__(s):
return "%s%s%s" % ('=' * s.kyu, s.title, '=' * s.kyu)
def __repr__(s):
return "%s%s%s%s" % ('=' * s.kyu, s.title, '=' * s.kyu, s.t)
def printtree(s, i=0):
print(' ' * i + str(s))
for ss in s.s:
ss.printtree(i + 1)
def process1():
# *? for non-greedy
# On en.wikt, User:Erutuon has ensured the sameness of '=', no redundant spaces and no '=' in in section titles, as well as no L1
rx1 = re.compile(r'^(===*)\s*([^=]*?)\s*(===*)\s*?(\n+)', flags=re.MULTILINE)
def _f1(m):
return node(min(len(m[1]), len(m[3])), m[2], m.start(), m.end(), m.end(), m[4])
etxt = etxtl[i]
esecs = [node(0, '', 0, 0, 0)] + list(map(_f1, rx1.finditer(etxt))) + [node(0, '', len(etxt), len(etxt), len(etxt))]
for j in range(len(esecs) - 1):
esecs[j].c = esecs[j + 1].a
esecs[j].t = etxt[esecs[j].b:esecs[j].c]
del esecs[-1]
esecl[i] = esecs
# "Root" have different meaning in namespace Reconstruction
# "Proverbs" and "Citations" can be 9
typelst = [
[], [], [],
['Etymology', 'Reconstruction'], [],
['Pronunciation', ], [],
['Adjectival noun', 'Adjective', 'Adverb', 'Affix', 'Article', 'Classifier', 'Clitic', 'Combining form', 'Conjunction', 'Contraction', 'Counter', 'Cuneiform sign', 'Definitions', 'Determiner', 'Demonstrative', 'Dependent noun', 'Final', 'Han character', 'Hanja', 'Hanzi', 'Hán tự', 'Ideophone', 'Idiom', 'Infix', 'Interfix', 'Interjection', 'Kanji', 'Letter', 'Noun', 'Number', 'Numeral', 'Participle', 'Particle', 'Phrase', 'Postposition', 'Predicative', 'Prefix', 'Preposition', 'Prepositional phrase', 'Preverb', 'Pronoun', 'Proper noun', 'Proverb', 'Relative', 'Romanization', 'Root', 'Sign values', 'Suffix', 'Syllable', 'Symbol', 'Verb', 'Verbal noun', ],
['Conjugation', 'Declension', 'Derived characters', 'Forms', 'Inflection', 'Readings', 'Related characters', ],
['Affixed forms', 'Antonyms', 'Compounds', 'Coordinate terms', 'Derived terms', 'Descendants', 'Gallery', 'Holonyms', 'Hypernyms', 'Hyponyms', 'Idioms', 'Meronyms', 'Mutation', 'Paronyms', 'Quotations', 'Related terms', 'Synonyms', 'Translations', 'Trivia', 'Troponyms', 'Usage notes', ],
['Alternative forms', 'Alternative reconstructions', 'Alternative scripts', 'Dialectal variants', 'Notes', 'Reconstruction notes', 'Statistics', ],
['Further reading', 'See also', 'References', ],
[],
['Anagrams', 'Glyph origin', ],
]
typedct = defaultdict(bool)
typedct[''] = False
for i, l in enumerate(typelst):
for x in l:
typedct[x] = i
def processt():
for nd in esecl[i]:
if nd.kyu > 2:
st.add(nd.title)
def process3():
esecs = esecl[i]
def gentype(x):
if x.oldkyu in [0, 2]:
x.tp = x.kyu
return True
if x.oldkyu in [1, 7]:
return False
if 'Etymology ' in x.title or 'Reconstruction ' in x.title:
x.tp = 4
return True
if 'Pronunciation ' in x.title:
x.tp = 6
return True
key = re.sub(r' \d+', '', x.title)
x.tp = typedct[key]
if x.tp == False:
return False
return True
def link(x, y):
x.s.append(y)
y.f = x
if x.kyu == 0:
y.kyu = 2
else:
y.kyu = x.kyu + 1
def canlink(x, y):
linkd = {(0, 2), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 9), (2, 10), (2, 11), (2, 12), (2, 13), (2, 14), (3, 6), (4, 5), (4, 6), (4, 7), (4, 9), (4, 10), (4, 11), (4, 12), (4, 14), (5, 14), (6, 3), (6, 7), (6, 9), (6, 10), (6, 11), (6, 12), (6, 14), (7, 8), (7, 9), (7, 10), (7, 11)}
return (x.tp, y.tp) in linkd
def trylink(x, y, h):
while x and x.kyu >= h:
if canlink(x, y):
link(x, y)
return True
x = x.f
return False
esecs[0].tp = 0
for ((p, x), (q, y)) in its.pairwise(enumerate(esecs)):
if not gentype(y):
print(("gentype", i, p, q, str(x), str(y)))
fail.append(i)
fail1.append(("gentype", i, p, q, str(x), str(y)))
return False
if (x.tp == 4 and y.tp in [5, 6, 7, 10]) or (x.tp == 6 and y.tp in [7, 10]):
# Etymology n and Pronunciation n should have a son
if trylink(x, y, 0):
continue
elif x.oldkyu > y.oldkyu:
z = x.f
while z.kyu >= y.kyu:
z = z.f
if trylink(z, y, 0):
continue
if trylink(x, y, z.kyu + 1):
continue
elif x.oldkyu == y.oldkyu:
if trylink(x.f, y, 0):
continue
if trylink(x, y, x.kyu):
continue
else:
if trylink(x, y, 0):
continue
print(("resolve", i, p, q, str(x), str(y)))
fail.append(i)
fail1.append(("resolve", i, p, q, str(x), str(y)))
return False
return True
def process4():
tmp = []
for nd in esecl[i][1:]:
if nd.kyu != nd.oldkyu:
tmp.append('L%d -> L%d: %s' % (nd.oldkyu, nd.kyu, nd.title))
log.append((i, nd.kyu, nd.title, nd.f.title))
if nd.oldkyu - nd.f.oldkyu >= 2:
summary[i] = 'Fix L%d after L%d. ' % (nd.oldkyu, nd.f.oldkyu)
summary[i] += ', '.join(tmp)
etxtn[i] = ''.join(map(lambda nd: str(nd) + nd.z + nd.t, esecl[i]))
def process5():
tryedit(en.Pages[pn[i]], etxtn[i], summary[i], fail=fail)