Module:Internet Archive: Difference between revisions
Appearance
Content deleted Content added
No edit summary |
per discussion on talk page |
||
(26 intermediate revisions by 4 users not shown) | |||
Line 5: | Line 5: | ||
Notes: |
Notes: |
||
1. Internet Archive runs |
1. Internet Archive runs Elasticsearch search engine as of 4 Nov 2015 |
||
The search strategies will be updated when the switch occurs, the current strategy is based on Lucene. |
|||
2. Program flowchart: |
2. Program flowchart: |
||
Break name down into number of words |
|||
Check if author has birth-death data on Wikidata |
|||
Build a base URL based on number of words (1,2,3,4,5+), use of sopt=t switch, and availability of birth-death dates |
|||
If birth-death data |
|||
If any words contain extended-ascii characters |
|||
Check number of words in name |
|||
append extra code for wildcards based on sopt=t or w |
|||
return finished URL |
|||
If accented characters in name append wildcard search to end. |
|||
If not birth-death data |
|||
Repeat same as above but exclude birth-death data. |
|||
3. URL length should not exceed 2000 characters or it will break certain popular browsers |
|||
3. Lucene has a number of known issues with searching |
|||
A. Names with accented letters (é) - aka extended ascii - are problematic. There are records on IA in which the accent has been |
|||
dropped thus é is e in the record. Thus a search strategy has to use wildcards in place of extended ascii characters. The "?" |
|||
wildcard does not work correctly on Lucene and thus recommend "*". Wildcards severely slow down search times and after about |
|||
5 or 8 wildcards it may even time out. Thus, only use wildcards in a single expression in a search string. |
|||
B. Extended ascii doesn't work correctly if 1. not surrounded in quotes and 2. search string contains numbers and/or wildcards |
|||
somewhere in it and 3. multiple () statements. The extended ascii character becomes interpreted as ascii eg. é -> é |
|||
Try for example : (Évariste Régis Huc) OR (Évariste Régis Huc 1813-1860) OR (É. R. Huc) OR (É. R. Huc 1813-1860) OR (Évariste R. Huc) OR (Évariste R. Huc 1813-1860) OR (Évariste Huc) OR (Évariste Huc 1813-1860) |
|||
C. B can be nullified by enclosing the string in quotes, but this creates a literal string and many permutations must be searched |
|||
on ("John Smith" OR "Smith, John" etc). For names longer than 2 words it could exceed URL limits. URLs are limited to about 2000 |
|||
characters to account for most browsers (IE is 2083). Thus, search strategies used are a balance between possibilities |
|||
and URL length. |
|||
4. Wildcard (*) replacements should be avoided in the first letter of the first word, and with any single-letter words |
|||
5. Changing search formulations will have impacts on existing uses of the template and off-line tools which are optimized for these search recipes. |
|||
]] |
]] |
||
Line 46: | Line 35: | ||
local args = pframe.args |
local args = pframe.args |
||
local tname = "Internet Archive author" -- name of calling template. Change if template rename. |
local tname = "Internet Archive author" -- name of calling template. Change if template rename. |
||
local name = nil -- article name (default: current page) |
local name = nil -- article name (default: current page name) |
||
dname = nil -- display name (default: current page name) |
|||
local sname = nil -- search name (default: current page name) |
local sname = nil -- search name (default: current page name) |
||
local sopt = nil -- search options (default: nil) |
|||
local byabout = "Works by or about" |
|||
byabout = "Works by or about" |
|||
local tagline = "at [[Internet Archive]]" |
|||
tagline = "at the [[Internet Archive]]" |
|||
local urlhead = "//archive.org/search.php?query=" |
|||
urlhead = "https://archive.org/search.php?query=" |
|||
mydate = "" -- birth-death date |
|||
--- Determine name |
--- Determine name |
||
name = trimArg(args.name) -- When using template outside main article space, the 'name' parameter is required (not optional) |
|||
if args.name == "" or args.name == nil then |
|||
if not name then |
|||
name = mw.title.getCurrentTitle().text |
name = mw.title.getCurrentTitle().text |
||
dname = name |
|||
sname = dname |
|||
else |
|||
name = mw.text.trim(args.name) |
|||
dname = name |
|||
sname = dname |
|||
end |
end |
||
dname = mw.ustring.gsub(name,'%s+%([^%(]-%)$', '') -- Remove the final disambig parentheses |
|||
if args.sname ~= nil and args.sname ~= "" then |
|||
sname = dname |
|||
if trimArg(args.sname) then |
|||
sname = trimArg(args.sname) |
|||
end |
end |
||
if args.dname |
if trimArg(args.dname) then |
||
dname = |
dname = trimArg(args.dname) |
||
end |
end |
||
dname = mw.ustring.gsub(dname,"%s%(.*%)", "") -- remove disambiguation () |
|||
sname = mw.ustring.gsub(sname,"%s%(.*%)", "") |
|||
--- Determine search option |
|||
sopt = trimArg(args.sopt) |
|||
if sopt then |
|||
sopt = mw.ustring.lower(sopt) |
|||
if sopt == "tight" then sopt = "t" end |
|||
if sopt == "tightx" then sopt = "tx" end |
|||
if sopt == "wild" then sopt = "w" end |
|||
if sopt ~= "t" and sopt ~= "tx" and sopt ~= "w" then sopt = "unknown" end |
|||
end |
|||
--- Determine tagline |
--- Determine tagline |
||
if args.coda |
if trimArg(args.coda) then |
||
tagline = tagline .. " " .. |
tagline = tagline .. " " .. trimArg(args.coda) |
||
end |
end |
||
--- Custom search. Do early to avoid unnecessary processing. |
--- Custom search. Do early to avoid unnecessary processing. |
||
if args.search |
if trimArg(args.search) then |
||
local search = p.ia_url_encode( |
local search = p.ia_url_encode(trimArg(args.search)) |
||
return "[" .. urlhead .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline |
return "[" .. urlhead .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline |
||
end |
end |
||
-- Determine media string |
-- Determine media string |
||
media = p.mediaTypes(args.media) |
|||
if media == "" then |
|||
mediaopen = "%28" -- added a default mediatype Dec 2015 see p.mediaTypes() |
|||
else |
|||
mediaopen = "%28" |
|||
end |
|||
-- Determine date of birth and death |
-- Determine date of birth and death |
||
local temp = mw.text.split(p.bdDate(args.birth, args.death, name), " ") |
local temp = mw.text.split(p.bdDate(args.birth, args.death, name), " ") |
||
local birth = temp[1] |
local birth = temp[1] |
||
Line 95: | Line 96: | ||
return "Error in [[:Template:"..tname.."]]: [[" ..name.. "]] doesn't exist." |
return "Error in [[:Template:"..tname.."]]: [[" ..name.. "]] doesn't exist." |
||
end |
end |
||
--- Split sname into words and count words |
--- Split sname into words and count words |
||
local N = mw.text.split(sname, " ") |
local N = mw.text.split(sname, " ") |
||
local l, count = mw.ustring.gsub(sname, "%S+", "") |
local l, count = mw.ustring.gsub(sname, "%S+", "") |
||
if count == 0 then |
|||
return "Error in [[:Template:"..tname.."]]: Zero-word name." |
|||
end |
|||
--- Date string |
|||
if birth ~= "none" and death ~= "none" then |
|||
if p.ia_extendedascii(N[count]) == 1 then |
|||
mydate = "%20OR%20%28%22"..birth.."-"..death.."%22%20AND%20%28%22"..p.urlX(N[count]).."%22%20OR%20"..p.urlX(p.ia_deaccent(N[count])).."%29%29" |
|||
else |
|||
mydate = "%20OR%20%28%22"..birth.."-"..death.."%22%20AND%20"..p.urlX(N[count]).."%29" |
|||
end |
|||
end |
|||
--- wild string |
|||
wild = "%29" |
|||
if sopt == "w" and p.ia_extendedascii(sname) == 1 then |
|||
if p.wildcheck(N, count) == 1 then |
|||
myurl = p.wildfix(N, count) |
|||
return p.IArender() |
|||
end |
|||
if count < 3 or count > 3 then |
|||
-- (first last) |
|||
wild = "%20OR%20%28" .. p.ia_url_encode(p.ia_extendedascii2wildcard(sname)) .. "%29%29" |
|||
end |
|||
if count == 3 then |
|||
-- (first last) |
|||
wild = "%20OR%20%28" .. p.ia_url_encode(p.ia_extendedascii2wildcard(N[1])) .. "%20" .. p.ia_url_encode(p.ia_extendedascii2wildcard(N[3])) .. "%29%29" |
|||
end |
|||
end |
|||
--[[ |
--[[ |
||
Line 106: | Line 136: | ||
]] |
]] |
||
if count == 1 then |
|||
if birth == "none" or death == "none" then |
|||
myurl = p.oneWord(sname) |
|||
if sopt == "t" and p.ia_extendedascii(sname) == 1 then |
|||
local plainname = p.ia_deaccent(sname) |
|||
local search = "%28subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22" |
|||
local A1 = "%20OR%20%22"..p.urlX(plainname) |
|||
if p.ia_extendedascii(sname) == 1 then |
|||
myurl = myurl .. A1 .. "%22" |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return p.IArender() |
|||
return "[" .. urlhead .. media .. search .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
|||
return "[" .. urlhead .. media .. search .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
end |
||
return p.IArender() |
|||
end |
end |
||
if count == 2 then |
if count == 2 then |
||
local FIRST = 1 |
|||
myurl = p.twoWords(N, sopt) |
|||
if sopt == "t" and p.ia_extendedascii(sname) == 1 then |
|||
local plainname = p.ia_deaccent(sname) |
|||
local PN = mw.text.split(plainname, " ") |
|||
-- Last, First |
|||
local A1 = "%20OR%20%22"..p.urlX(PN[2]).."%2C%20"..p.urlX(PN[1]) |
|||
-- First Last |
|||
local A2 = "%22%20OR%20%22"..p.urlX(PN[1]).."%20"..p.urlX(PN[2]) |
|||
myurl = myurl .. A1 .. A2 .. "%22" |
|||
return p.IArender() |
|||
end |
|||
return p.IArender() |
|||
local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) |
|||
-- Last, First |
|||
local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
-- First Last |
|||
local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
local SALL = S1..S3 |
|||
-- Last, First |
|||
local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
-- First Last |
|||
local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
-- Last, F. |
|||
local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E" |
|||
local CALL = C1..C3..C5 |
|||
-- First Last |
|||
local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
local TALL = T1 |
|||
-- Last, First |
|||
local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
-- First Last |
|||
local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
local DALL = D1..D3.."%22" |
|||
if p.ia_extendedascii(sname) == 1 then |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
|||
end |
end |
||
if count == 3 then |
if count == 3 then |
||
local FIRST = 1 |
|||
local MIDDLE = 2 |
|||
local LAST = 3 |
|||
local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) |
|||
local middleinitial = mw.ustring.sub(N[MIDDLE], 1, 1) |
|||
myurl = p.threeWords(N, sopt) |
|||
-- CAUTION: This is near the max 2000 character URL limit for most browsers when using long names |
|||
-- such as "René-Nicolas Dufriche Desgenettes". As such it does not include dates. |
|||
if sopt == "t" and p.ia_extendedascii(sname) == 1 then |
|||
-- Last, First Middle |
|||
local plainname = p.ia_deaccent(sname) |
|||
local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE] |
|||
local PN = mw.text.split(plainname, " ") |
|||
-- Last, First M. |
|||
local FIRST = p.urlX(PN[1]) |
|||
local S2 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E" |
|||
local MIDDLE = p.urlX(PN[2]) |
|||
local LAST = p.urlX(PN[3]) |
|||
local S3 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" |
|||
local firstinitialp = p.urlX( p.firstLetter(PN[1]) ) |
|||
-- First Middle Last |
|||
local middleinitialp = p.urlX( p.firstLetter(PN[2]) ) |
|||
local S4 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] |
|||
-- First |
-- First Middle Last |
||
local |
local A1 = "%20OR%20%22"..FIRST.."%20"..MIDDLE.."%20"..LAST |
||
-- |
-- Last, First Middle |
||
local |
local A2 = "%22%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE |
||
-- Last, First |
-- Last, First M. |
||
local |
local A3 = "%22%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitialp.."%2E" |
||
-- |
-- Last, F. M. |
||
local |
local A4 = "%22%20OR%20%22"..LAST.."%2C%20"..firstinitialp..".%20"..middleinitialp.."%2E" |
||
local |
local ALL = A1 .. A2 .. A3 .. A4 .. "%22" |
||
myurl = myurl .. ALL |
|||
return p.IArender() |
|||
local C1 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] |
|||
end |
|||
local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] |
|||
-- F. M. Last |
|||
local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] |
|||
-- F. Middle Last |
|||
local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..N[MIDDLE].."%20"..N[LAST] |
|||
-- Last, First Middle |
|||
local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE] |
|||
-- Last, First M. |
|||
local C6 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E" |
|||
-- Last, F. M. |
|||
local C7 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" |
|||
-- Last, F. M. |
|||
local C8 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..N[MIDDLE] |
|||
-- First Last |
|||
local C9 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
-- Last, First |
|||
local C10 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
local CALL = C1..C2..C3..C4..C5..C6..C7..C8..C9..C10 |
|||
-- First Middle Last |
|||
local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] |
|||
-- First M. Last |
|||
local T2 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] |
|||
-- F. M. Last |
|||
local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] |
|||
-- First Last |
|||
local T4 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
local TALL = T1..T2..T3..T4 |
|||
-- First Middle Last |
|||
local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] |
|||
-- First M. Last |
|||
local D2 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] |
|||
-- F. M. Last |
|||
local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] |
|||
-- Last, First Middle |
|||
local D4 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE] |
|||
-- Last, First M. |
|||
local D5 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E" |
|||
-- First Last |
|||
local D6 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
-- Last, First |
|||
local D7 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
local DALL = D1..D2..D3..D4..D5..D6..D7.."%22" |
|||
return p.IArender() |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
|||
end |
end |
||
if count == 4 then |
if count == 4 then |
||
local FIRST = 1 |
|||
local SECOND = 2 |
|||
local THIRD = 3 |
|||
local LAST = 4 |
|||
myurl = p.fourWords(N, sopt) |
|||
local secondinitial = mw.ustring.sub(N[SECOND], 1, 1) |
|||
if sopt == "t" and p.ia_extendedascii(sname) == 1 then |
|||
local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1) |
|||
local plainname = p.ia_deaccent(sname) |
|||
local PN = mw.text.split(plainname, " ") |
|||
-- Last, First Second Third |
|||
local FIRST = p.urlX(PN[1]) |
|||
local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD] |
|||
local SECOND = p.urlX(PN[2]) |
|||
local THIRD = p.urlX(PN[3]) |
|||
local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] |
|||
local LAST = p.urlX(PN[4]) |
|||
local firstinitialp = p.urlX( p.firstLetter(PN[1]) ) |
|||
local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD] |
|||
local secondinitialp = p.urlX( p.firstLetter(PN[2]) ) |
|||
-- First Second Third Last |
|||
local thirdinitialp = p.urlX( p.firstLetter(PN[3]) ) |
|||
local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] |
|||
-- Last, |
-- Last, First Second Third |
||
local |
local A1 = "%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD |
||
-- First Second Third Last |
-- First Second Third Last |
||
local |
local A2 = "%22%20OR%20%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
||
-- |
-- Last, F. S. T. |
||
local |
local A3 = "%22%20OR%20%22"..LAST.."%2C%20"..firstinitialp.."%2E%20"..secondinitialp.."%2E%20"..thirdinitialp.."%2E" |
||
local |
local ALL = A1 .. A2 .. A3 .. "%22" |
||
myurl = myurl .. ALL |
|||
return p.IArender() |
|||
end |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. SALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
return p.IArender() |
|||
local X1 = "%20OR%20" .. p.ia_url_encode(sname) |
|||
return "[" .. urlhead .. media .. SALL .. X1 .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
|||
end |
end |
||
if count > 4 then |
if count > 4 then |
||
myurl = "" |
|||
local search = "%28" .. nameurl .. "%29" |
|||
if p.ia_extendedascii(sname) == 1 then |
if sopt == "w" and p.ia_extendedascii(sname) == 1 then |
||
myurl = "%28" |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. search .. wild .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
|||
return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
end |
||
myurl = myurl .. "%28" .. p.ia_url_encode(sname) |
|||
if sopt == "w" and p.ia_extendedascii(sname) == 1 then |
|||
myurl = myurl .. "%29" |
|||
end |
|||
if sopt == "t" and p.ia_extendedascii(sname) == 1 then |
|||
local plainname = p.ia_deaccent(sname) |
|||
local A1 = "%29%20OR%20%28"..p.ia_url_encode(plainname) |
|||
myurl = myurl .. A1 |
|||
return p.IArender() |
|||
end |
|||
return p.IArender() |
|||
end |
end |
||
return "Unknown error (1). Please check documentation for [[Template:"..tname.."]]" |
|||
else -- Date available |
|||
end |
|||
if count == 1 then |
|||
-- Build final output and render |
|||
local nameurl = p.ia_url_encode(sname) |
|||
function p.IArender() |
|||
local search = "%28subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22" |
|||
return "[" .. urlhead .. mediaopen .. myurl .. wild .. mydate .. media .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
if p.ia_extendedascii(sname) == 1 then |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. search .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
|||
return "[" .. urlhead .. media .. search .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
|||
end |
|||
end |
|||
if count == 2 then |
|||
local FIRST = 1 |
|||
function p.oneWord(sname) |
|||
local LAST = 2 |
|||
local nameurl = p.ia_url_encode(sname) |
|||
local A1 = "%28subject%3A%22"..nameurl |
|||
local A2 = "%22%20OR%20creator%3A%22"..nameurl |
|||
local A3 = "%22%20OR%20description%3A%22"..nameurl |
|||
local A4 = "%22%20OR%20title%3A%22"..nameurl |
|||
return A1 .. A2 .. A3 .. A4 .. "%22" |
|||
end |
|||
function p.twoWords(N, sopt) |
|||
local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) |
|||
local FIRST = p.urlX(N[1]) |
|||
local LAST = p.urlX(N[2]) |
|||
local firstinitial = p.urlX( p.firstLetter(N[1]) ) |
|||
-- Last, First, birthyear-deathyear |
|||
local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death |
|||
-- Last, First |
-- Last, First |
||
local |
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST |
||
-- First Last, birthyear-deathyear |
|||
local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death |
|||
-- First Last |
-- First Last |
||
local |
local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..LAST |
||
local SALL = S1..S2 |
local SALL = S1..S2 |
||
-- Last, First, birthyear-deathyear |
|||
local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death |
|||
-- Last, First |
-- Last, First |
||
local |
local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST |
||
-- First Last, birthyear-deathyear |
|||
local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death |
|||
-- First Last |
-- First Last |
||
local |
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..LAST |
||
local CALL = C1..C2 |
|||
local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E" |
|||
local CALL = C1..C2..C3..C4..C5 |
|||
-- First Last |
-- First Last |
||
local T1 = "%22%20OR%20title%3A%22".. |
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..LAST |
||
local TALL = T1 |
local TALL = T1 |
||
-- Last, First, birthyear-deathyear |
|||
local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death |
|||
-- Last, First |
-- Last, First |
||
local |
local D1 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST |
||
-- First Last, birthyear-deathyear |
|||
local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death |
|||
-- First Last |
-- First Last |
||
local |
local D2 = "%22%20OR%20description%3A%22"..FIRST.."%20"..LAST |
||
local DALL = D1..D2 |
local DALL = D1..D2 |
||
if |
if sopt == "t" or sopt == "tx" then |
||
return SALL .. CALL .. TALL .. DALL .. "%22" |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
else |
||
-- Last, F. |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
local C3 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E" |
|||
local CALL = CALL..C3 |
|||
return SALL .. CALL .. TALL .. DALL .. "%22" |
|||
end |
end |
||
end |
|||
function p.threeWords(N, sopt) |
|||
if count == 3 then |
|||
-- CAUTION: The following is near the max 2000 character URL limit for most browsers when using long names |
|||
local FIRST = 1 |
|||
-- such as "René-Nicolas Dufriche Desgenettes". |
|||
local MIDDLE = 2 |
|||
local LAST = 3 |
|||
local |
local FIRST = p.urlX(N[1]) |
||
local |
local MIDDLE = p.urlX(N[2]) |
||
local LAST = p.urlX(N[3]) |
|||
local firstinitial = p.urlX( p.firstLetter(N[1]) ) |
|||
-- CAUTION: This is near the max 2000 character URL limit for most browsers when using long names |
|||
local middleinitial = p.urlX( p.firstLetter(N[2]) ) |
|||
-- such as "René-Nicolas Dufriche Desgenettes". As such it does not include dates. |
|||
-- Last, First Middle |
-- Last, First Middle |
||
local S1 = "%28subject%3A%22".. |
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE |
||
-- Last, First M. |
-- Last, First M. |
||
local S2 = "%22%20OR%20subject%3A%22".. |
local S2 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E" |
||
-- Last, F. M. |
-- Last, F. M. |
||
local S3 = "%22%20OR%20subject%3A%22".. |
local S3 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" |
||
-- First Middle Last |
-- First Middle Last |
||
local S4 = "%22%20OR%20subject%3A%22".. |
local S4 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST |
||
-- First M. Last |
-- First M. Last |
||
local S5 = "%22%20OR%20subject%3A%22".. |
local S5 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST |
||
-- F. M. Last |
-- F. M. Last |
||
local S6 = "%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20".. |
local S6 = "%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST |
||
local SALL = S1..S2..S3..S4..S5..S6 |
|||
-- Last, First |
|||
local S7 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
-- First Last |
|||
local S8 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
local SALL = S1..S2..S3..S4..S5..S6..S7..S8 |
|||
-- First Middle Last |
-- First Middle Last |
||
local C1 = "%22%20OR%20creator%3A%22".. |
local C1 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST |
||
-- First M. Last |
-- First M. Last |
||
local C2 = "%22%20OR%20creator%3A%22".. |
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST |
||
-- F. M. Last |
-- F. M. Last |
||
local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20".. |
local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST |
||
-- F. Middle Last |
-- F. Middle Last |
||
local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20".. |
local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..MIDDLE.."%20"..LAST |
||
-- Last, First Middle |
-- Last, First Middle |
||
local C5 = "%22%20OR%20creator%3A%22".. |
local C5 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE |
||
-- Last, First M. |
-- Last, First M. |
||
local C6 = "%22%20OR%20creator%3A%22".. |
local C6 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E" |
||
-- Last, F. M. |
-- Last, F. M. |
||
local C7 = "%22%20OR%20creator%3A%22".. |
local C7 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" |
||
-- Last, F. M. |
-- Last, F. M. |
||
local C8 = "%22%20OR%20creator%3A%22".. |
local C8 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..MIDDLE |
||
local CALL = C1..C2..C3..C4..C5..C6..C7..C8 |
|||
-- First Last |
|||
local C9 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
-- Last, First |
|||
local C10 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
local CALL = C1..C2..C3..C4..C5..C6..C7..C8..C9..C10 |
|||
-- First Middle Last |
-- First Middle Last |
||
local T1 = "%22%20OR%20title%3A%22".. |
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST |
||
-- First M. Last |
-- First M. Last |
||
local T2 = "%22%20OR%20title%3A%22".. |
local T2 = "%22%20OR%20title%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST |
||
-- F. M. Last |
-- F. M. Last |
||
local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20".. |
local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST |
||
local TALL = T1..T2..T3 |
|||
local T4 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
local TALL = T1..T2..T3..T4 |
|||
-- First Middle Last |
-- First Middle Last |
||
local D1 = "%22%20OR%20description%3A%22".. |
local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST |
||
-- First M. Last |
-- First M. Last |
||
local D2 = "%22%20OR%20description%3A%22".. |
local D2 = "%22%20OR%20description%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST |
||
-- F. M. Last |
-- F. M. Last |
||
local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20".. |
local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST |
||
-- Last, First Middle |
-- Last, First Middle |
||
local D4 = "%22%20OR%20description%3A%22".. |
local D4 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE |
||
-- Last, First M. |
-- Last, First M. |
||
local D5 = "%22%20OR%20description%3A%22".. |
local D5 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E" |
||
local DALL = D1..D2..D3..D4..D5 |
|||
local D6 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST] |
|||
-- Last, First |
|||
local D7 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST] |
|||
local DALL = D1..D2..D3..D4..D5..D6..D7.."%22" |
|||
if |
if sopt == "t" or sopt == "tx" then |
||
return SALL .. CALL .. TALL .. DALL .. "%22" |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
else |
||
-- Last, First |
|||
return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
local S7 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..FIRST |
|||
-- First Last |
|||
local S8 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..LAST |
|||
local SALL = SALL..S7..S8 |
|||
-- First Last |
|||
local C9 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..LAST |
|||
-- Last, First |
|||
local C10 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST |
|||
local CALL = CALL..C9..C10 |
|||
-- First Last |
|||
local T4 = "%22%20OR%20title%3A%22"..FIRST.."%20"..LAST |
|||
local TALL = TALL..T4 |
|||
-- First Last |
|||
local D6 = "%22%20OR%20description%3A%22"..FIRST.."%20"..LAST |
|||
-- Last, First |
|||
local D7 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST |
|||
local DALL = DALL..D6..D7 |
|||
return SALL .. CALL .. TALL .. DALL .. "%22" |
|||
end |
end |
||
end |
|||
function p.fourWords(N, sopt) |
|||
end |
|||
local FIRST = p.urlX(N[1]) |
|||
if count == 4 then |
|||
local |
local SECOND = p.urlX(N[2]) |
||
local |
local THIRD = p.urlX(N[3]) |
||
local |
local LAST = p.urlX(N[4]) |
||
local LAST = 4 |
|||
local firstinitial = |
local firstinitial = p.firstLetter(N[1]) |
||
local secondinitial = |
local secondinitial = p.firstLetter(N[2]) |
||
local thirdinitial = |
local thirdinitial = p.firstLetter(N[3]) |
||
if sopt == "t" or sopt == "tx" then |
|||
-- Last, First Second Third |
|||
local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death |
|||
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD |
|||
-- First Second Third Last |
|||
local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
|||
-- Last, First Second Third |
|||
local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD |
|||
-- First Second Third Last |
|||
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
|||
-- First Second Third Last |
|||
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
|||
-- First Second Third Last |
|||
local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
|||
return S1..S2..C1..C2..T1..D1.."%22" |
|||
end |
|||
-- Last, First Second Third |
|||
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD |
|||
-- First Second Third Last |
-- First Second Third Last |
||
local S2 = "%22%20OR%20subject%3A%22".. |
local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
||
-- Last, First Second Third |
-- Last, First Second Third |
||
local C1 = "%22%20OR%20creator%3A%22".. |
local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD |
||
-- First Second Third Last |
-- First Second Third Last |
||
local C2 = "%22%20OR%20creator%3A%22".. |
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
||
-- Last, F. S. T. |
-- Last, F. S. T. |
||
local C3 = "%22%20OR%20creator%3A%22".. |
local C3 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E" |
||
-- First Second Third Last |
-- First Second Third Last |
||
local T1 = "%22%20OR%20title%3A%22".. |
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
||
-- First Second Third Last |
-- First Second Third Last |
||
local D1 = "%22%20OR%20description%3A%22".. |
local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST |
||
local SALL = S1..S2..C1..C2..C3..T1..D1.."%22" |
|||
return S1..S2..C1..C2..C3..T1..D1.."%22" |
|||
if p.ia_extendedascii(sname) == 1 then |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
return "[" .. urlhead .. media .. SALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
else |
|||
local X1 = "%20OR%20" .. p.ia_url_encode(sname) |
|||
return "[" .. urlhead .. media .. SALL .. X1 .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
end |
|||
end |
|||
-- ElasticSearch speed/resource problems if first letter of first word is "*" wildcard ie. accented letter |
|||
if count > 4 then |
|||
-- Build special search in these cases. |
|||
-- https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_wildcards |
|||
function p.wildfix(N, count) |
|||
--- Split along "-" and use only first word ie. John-Taylor-Smith becomes John |
|||
local nameurl = p.ia_url_encode(sname) |
|||
local NF = mw.text.split(N[1], "-") |
|||
local NL = mw.text.split(N[count], "-") |
|||
-- ..but use full name for 1-word names |
|||
if p.ia_extendedascii(sname) == 1 then |
|||
if count == 1 then |
|||
local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) |
|||
NF[1] = N[1] |
|||
return "[" .. urlhead .. media .. search .. wild .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
NL[1] = N[1] |
|||
end |
|||
return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline |
|||
-- ((Fïrst OR First) AND (Lást OR Last)) |
|||
return "%28%28%22" .. NF[1] .. "%22%20OR%20" .. p.ia_deaccent(NF[1]) .. "%29%20AND%20%28%22" .. NL[1] .. "%22%20OR%20" .. p.ia_deaccent(NL[1]) .. "%29" |
|||
end |
|||
-- Return 1 if the first letter of first word, or any single-letter word, is extended ascii |
|||
function p.wildcheck(N, count) |
|||
local i = 0 |
|||
-- first letter of first word is extended ascii |
|||
if N[1]:byte(1) < 32 or N[1]:byte(1) > 126 then return 1 end |
|||
-- any single-letter word that is composed of only extended ascii |
|||
while i < count do |
|||
i = i + 1 |
|||
if N[i]:len() == 1 then |
|||
if N[i]:byte(1) < 32 or N[i]:byte(1) > 126 then return 1 end |
|||
end |
|||
end |
end |
||
return 0 |
|||
return "Unknown error (1). Please check documentation for [[Template:"..tname.."]]" |
|||
end |
|||
function trimArg(arg) |
|||
if arg == "" or arg == nil then |
|||
return nil |
|||
else |
|||
return mw.text.trim(arg) |
|||
end |
|||
end |
end |
||
function p.mediaTypes(argsmedia) |
function p.mediaTypes(argsmedia) |
||
-- Added a default mediatype Dec 2015 due to too many false positives in the software mediatype, caused by birth-death dates catching numbers in source codes |
|||
local media = "" |
|||
local media = "-mediatype:software" |
|||
if argsmedia ~="" and argsmedia ~=nil then |
if argsmedia ~="" and argsmedia ~=nil then |
||
Line 502: | Line 498: | ||
i = i + 1 |
i = i + 1 |
||
if(mw.ustring.lower(medialist[i]) == "text" or mw.ustring.lower(medialist[i]) == "texts") then |
if(mw.ustring.lower(medialist[i]) == "text" or mw.ustring.lower(medialist[i]) == "texts") then |
||
media = media .. p.ia_url_encode(" OR mediatype:texts") |
|||
if(i == 1) then |
|||
media = media .. p.ia_url_encode("(mediatype:texts") |
|||
else |
|||
media = media .. p.ia_url_encode(" OR mediatype:texts") |
|||
end |
|||
end |
end |
||
if(mw.ustring.lower(medialist[i]) == "audio") then |
if(mw.ustring.lower(medialist[i]) == "audio") then |
||
media = media .. p.ia_url_encode(" OR mediatype:audio") |
|||
if(i == 1) then |
|||
media = media .. p.ia_url_encode("(mediatype:audio") |
|||
else |
|||
media = media .. p.ia_url_encode(" OR mediatype:audio") |
|||
end |
|||
end |
end |
||
if(mw.ustring.lower(medialist[i]) == "video") then |
if(mw.ustring.lower(medialist[i]) == "video") then |
||
media = media .. p.ia_url_encode(" OR mediatype:video") |
|||
if(i == 1) then |
|||
media = media .. p.ia_url_encode("(mediatype:video") |
|||
else |
|||
media = media .. p.ia_url_encode(" OR mediatype:video") |
|||
end |
|||
end |
end |
||
until i == acount |
until i == acount |
||
if media ~= nil then |
|||
media = media .. ")%20AND%20" |
|||
else |
|||
media = "" |
|||
end |
|||
else |
|||
media = "" |
|||
end |
end |
||
media = "%29%20AND%20%28" .. media .. "%29" |
|||
return media |
return media |
||
end |
end |
||
Line 645: | Line 626: | ||
end |
end |
||
end |
|||
--- URL-encode special characters |
|||
--- Note: this function was added later to deal with "&" characters instead of using p.ia_url_encode since |
|||
--- that may break existing instances of the template. |
|||
function p.urlX(str) |
|||
if (str) then |
|||
str = mw.ustring.gsub (str, "&", "%%26") |
|||
end |
|||
return str |
|||
end |
end |
||
Line 663: | Line 654: | ||
function p.ia_extendedascii(str) |
function p.ia_extendedascii(str) |
||
for i = 1, str:len() do |
for i = 1, str:len() do |
||
if (str:byte(i) >= 32 and str:byte(i) <= 126) and str:byte(i) ~= 39 then |
if (str:byte(i) >= 32 and str:byte(i) <= 126) and str:byte(i) ~= 39 then -- 39 = "'" |
||
--do nothing |
--do nothing |
||
else |
else |
||
Line 671: | Line 662: | ||
return 0 |
return 0 |
||
end |
end |
||
-- UTF-8 aware replacement for string.sub() which doesn't support UTF-8. |
|||
-- Note: Using instead of mw.ustring.sub() which I suspect(?) might be cause of intermittent error, and faster here for first-letter job. |
|||
-- Source: prapin @ Stack Overflow http://stackoverflow.com/questions/13235091/extract-the-first-letter-of-a-utf-8-string-with-lua |
|||
function p.firstLetter(str) |
|||
return str:match("[%z\1-\127\194-\244][\128-\191]*") |
|||
end |
|||
-- Replace all extended ascii characters with wildcard '*' |
-- Replace all extended ascii characters with wildcard '*' |
||
-- Replace "-" with <space> eg. Pierre-Jean -> Pierre Jean |
|||
function p.ia_extendedasciireplace(str) |
|||
function p.ia_extendedascii2wildcard(str) |
|||
local s = "" |
local s = "" |
||
local j = 0 |
local j = 0 |
||
Line 684: | Line 684: | ||
-- We only worry about - (45) and " (34) since the others are unlikely to appear in a proper name. |
-- We only worry about - (45) and " (34) since the others are unlikely to appear in a proper name. |
||
-- Also ' (39) since it is sometimes the extended character ’ |
-- Also ' (39) since it is sometimes the extended character ’ |
||
if |
if k == 34 or k == 39 then |
||
s = s .. "*" |
s = s .. "*" |
||
elseif k == 45 then |
|||
s = s .. " " |
|||
else |
else |
||
s = s .. str:sub(i,i) |
s = s .. str:sub(i,i) |
||
Line 699: | Line 701: | ||
end |
end |
||
return s |
return s |
||
end |
|||
-- Replace accented letters with non-accented equivalent letters |
|||
-- Note: this is not a complete list of all possible accented letters. It is |
|||
-- all of the accented letters found in the first 10,000 names using |
|||
-- the Internet Archive author template. |
|||
function p.ia_deaccent(str) |
|||
local s = str |
|||
s = mw.ustring.gsub(s, "á", "a") |
|||
s = mw.ustring.gsub(s, "a︡", "a") |
|||
s = mw.ustring.gsub(s, "Á", "A") |
|||
s = mw.ustring.gsub(s, "ă", "a") |
|||
s = mw.ustring.gsub(s, "â", "a") |
|||
s = mw.ustring.gsub(s, "æ", "ae") |
|||
s = mw.ustring.gsub(s, "Æ", "AE") |
|||
s = mw.ustring.gsub(s, "à", "a") |
|||
s = mw.ustring.gsub(s, "ā", "a") |
|||
s = mw.ustring.gsub(s, "Ā", "A") |
|||
s = mw.ustring.gsub(s, "ą", "a") |
|||
s = mw.ustring.gsub(s, "å", "a") |
|||
s = mw.ustring.gsub(s, "Å", "A") |
|||
s = mw.ustring.gsub(s, "ã", "a") |
|||
s = mw.ustring.gsub(s, "ä", "a") |
|||
s = mw.ustring.gsub(s, "Ä", "A") |
|||
s = mw.ustring.gsub(s, "β", "B") |
|||
s = mw.ustring.gsub(s, "ć", "c") |
|||
s = mw.ustring.gsub(s, "č", "c") |
|||
s = mw.ustring.gsub(s, "Č", "C") |
|||
s = mw.ustring.gsub(s, "ç", "c") |
|||
s = mw.ustring.gsub(s, "Ç", "C") |
|||
s = mw.ustring.gsub(s, "ĉ", "c") |
|||
s = mw.ustring.gsub(s, "ď", "d") |
|||
s = mw.ustring.gsub(s, "đ", "d") |
|||
s = mw.ustring.gsub(s, "é", "e") |
|||
s = mw.ustring.gsub(s, "É", "E") |
|||
s = mw.ustring.gsub(s, "ě", "e") |
|||
s = mw.ustring.gsub(s, "ê", "e") |
|||
s = mw.ustring.gsub(s, "è", "e") |
|||
s = mw.ustring.gsub(s, "È", "E") |
|||
s = mw.ustring.gsub(s, "ε", "e") |
|||
s = mw.ustring.gsub(s, "ē", "e") |
|||
s = mw.ustring.gsub(s, "Ē", "E") |
|||
s = mw.ustring.gsub(s, "ę", "e") |
|||
s = mw.ustring.gsub(s, "ð", "e") |
|||
s = mw.ustring.gsub(s, "ë", "e") |
|||
s = mw.ustring.gsub(s, "Ë", "E") |
|||
s = mw.ustring.gsub(s, "γ", "Y") |
|||
s = mw.ustring.gsub(s, "ħ", "h") |
|||
s = mw.ustring.gsub(s, "i︠a︡", "ia") |
|||
s = mw.ustring.gsub(s, "í", "i") |
|||
s = mw.ustring.gsub(s, "i︠", "i") |
|||
s = mw.ustring.gsub(s, "ĭ", "i") |
|||
s = mw.ustring.gsub(s, "Í", "I") |
|||
s = mw.ustring.gsub(s, "î", "i") |
|||
s = mw.ustring.gsub(s, "Î", "I") |
|||
s = mw.ustring.gsub(s, "ì", "i") |
|||
s = mw.ustring.gsub(s, "ī", "i") |
|||
s = mw.ustring.gsub(s, "ł", "i") |
|||
s = mw.ustring.gsub(s, "ï", "i") |
|||
s = mw.ustring.gsub(s, "Ï", "I") |
|||
s = mw.ustring.gsub(s, "ĺ", "I") |
|||
s = mw.ustring.gsub(s, "Ĺ", "L") |
|||
s = mw.ustring.gsub(s, "μ", "u") |
|||
s = mw.ustring.gsub(s, "µ", "u") |
|||
s = mw.ustring.gsub(s, "ń", "n") |
|||
s = mw.ustring.gsub(s, "ň", "n") |
|||
s = mw.ustring.gsub(s, "ņ", "n") |
|||
s = mw.ustring.gsub(s, "ñ", "n") |
|||
s = mw.ustring.gsub(s, "Ñ", "N") |
|||
s = mw.ustring.gsub(s, "ó", "o") |
|||
s = mw.ustring.gsub(s, "Ó", "O") |
|||
s = mw.ustring.gsub(s, "ô", "o") |
|||
s = mw.ustring.gsub(s, "œ", "oe") |
|||
s = mw.ustring.gsub(s, "ò", "o") |
|||
s = mw.ustring.gsub(s, "ō", "o") |
|||
s = mw.ustring.gsub(s, "ø", "o") |
|||
s = mw.ustring.gsub(s, "Ø", "o") |
|||
s = mw.ustring.gsub(s, "õ", "o") |
|||
s = mw.ustring.gsub(s, "ö", "o") |
|||
s = mw.ustring.gsub(s, "ő", "o") |
|||
s = mw.ustring.gsub(s, "Ö", "O") |
|||
s = mw.ustring.gsub(s, "φ", "o") |
|||
s = mw.ustring.gsub(s, "ŕ", "r") |
|||
s = mw.ustring.gsub(s, "ř", "r") |
|||
s = mw.ustring.gsub(s, "Ř", "R") |
|||
s = mw.ustring.gsub(s, "ß", "ss") |
|||
s = mw.ustring.gsub(s, "ś", "s") |
|||
s = mw.ustring.gsub(s, "Ś", "S") |
|||
s = mw.ustring.gsub(s, "š", "s") |
|||
s = mw.ustring.gsub(s, "ṣ", "s") |
|||
s = mw.ustring.gsub(s, "Š", "S") |
|||
s = mw.ustring.gsub(s, "ş", "s") |
|||
s = mw.ustring.gsub(s, "Ş", "S") |
|||
s = mw.ustring.gsub(s, "ŝ", "s") |
|||
s = mw.ustring.gsub(s, "σ", "s") |
|||
s = mw.ustring.gsub(s, "ť", "t") |
|||
s = mw.ustring.gsub(s, "ţ", "t") |
|||
s = mw.ustring.gsub(s, "τ", "t") |
|||
s = mw.ustring.gsub(s, "þ", "p") |
|||
s = mw.ustring.gsub(s, "Þ", "p") |
|||
s = mw.ustring.gsub(s, "ú", "u") |
|||
s = mw.ustring.gsub(s, "Ú", "U") |
|||
s = mw.ustring.gsub(s, "û", "u") |
|||
s = mw.ustring.gsub(s, "ù", "u") |
|||
s = mw.ustring.gsub(s, "ū", "u") |
|||
s = mw.ustring.gsub(s, "ů", "u") |
|||
s = mw.ustring.gsub(s, "ü", "u") |
|||
s = mw.ustring.gsub(s, "Ü", "U") |
|||
s = mw.ustring.gsub(s, "ŵ", "w") |
|||
s = mw.ustring.gsub(s, "ý", "y") |
|||
s = mw.ustring.gsub(s, "ŷ", "y") |
|||
s = mw.ustring.gsub(s, "¥", "y") |
|||
s = mw.ustring.gsub(s, "ÿ", "y") |
|||
s = mw.ustring.gsub(s, "Ÿ", "Y") |
|||
s = mw.ustring.gsub(s, "ź", "z") |
|||
s = mw.ustring.gsub(s, "Ž", "Z") |
|||
s = mw.ustring.gsub(s, "ž", "z") |
|||
s = mw.ustring.gsub(s, "ż", "z") |
|||
s = mw.ustring.gsub(s, "Ż", "Z") |
|||
return s |
|||
end |
end |
||
Latest revision as of 19:42, 9 August 2024
This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
This Lua module is used on approximately 19,000 pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them. |
Usage
There is currently 1 template that invokes this module, {{Internet Archive author}}
.
If future Lua scripts for Internet Archive are created (books, film, audio, etc), this Module would be a natural location to build.
--[[
For functions related to Internet Archive
Notes:
1. Internet Archive runs Elasticsearch search engine as of 4 Nov 2015
2. Program flowchart:
Break name down into number of words
Build a base URL based on number of words (1,2,3,4,5+), use of sopt=t switch, and availability of birth-death dates
If any words contain extended-ascii characters
append extra code for wildcards based on sopt=t or w
return finished URL
3. URL length should not exceed 2000 characters or it will break certain popular browsers
4. Wildcard (*) replacements should be avoided in the first letter of the first word, and with any single-letter words
5. Changing search formulations will have impacts on existing uses of the template and off-line tools which are optimized for these search recipes.
]]
local p = {}
--[[
For Template:Internet Archive author
]]
function p.author(frame)
local pframe = frame:getParent()
local args = pframe.args
local tname = "Internet Archive author" -- name of calling template. Change if template rename.
local name = nil -- article name (default: current page name)
dname = nil -- display name (default: current page name)
local sname = nil -- search name (default: current page name)
local sopt = nil -- search options (default: nil)
byabout = "Works by or about"
tagline = "at the [[Internet Archive]]"
urlhead = "https://archive.org/search.php?query="
mydate = "" -- birth-death date
--- Determine name
name = trimArg(args.name) -- When using template outside main article space, the 'name' parameter is required (not optional)
if not name then
name = mw.title.getCurrentTitle().text
end
dname = mw.ustring.gsub(name,'%s+%([^%(]-%)$', '') -- Remove the final disambig parentheses
sname = dname
if trimArg(args.sname) then
sname = trimArg(args.sname)
end
if trimArg(args.dname) then
dname = trimArg(args.dname)
end
--- Determine search option
sopt = trimArg(args.sopt)
if sopt then
sopt = mw.ustring.lower(sopt)
if sopt == "tight" then sopt = "t" end
if sopt == "tightx" then sopt = "tx" end
if sopt == "wild" then sopt = "w" end
if sopt ~= "t" and sopt ~= "tx" and sopt ~= "w" then sopt = "unknown" end
end
--- Determine tagline
if trimArg(args.coda) then
tagline = tagline .. " " .. trimArg(args.coda)
end
--- Custom search. Do early to avoid unnecessary processing.
if trimArg(args.search) then
local search = p.ia_url_encode(trimArg(args.search))
return "[" .. urlhead .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline
end
-- Determine media string
media = p.mediaTypes(args.media)
if media == "" then
mediaopen = "%28" -- added a default mediatype Dec 2015 see p.mediaTypes()
else
mediaopen = "%28"
end
-- Determine date of birth and death
local temp = mw.text.split(p.bdDate(args.birth, args.death, name), " ")
local birth = temp[1]
local death = temp[2]
if birth == "Error" or death == "Error" then
return "Error in [[:Template:"..tname.."]]: [[" ..name.. "]] doesn't exist."
end
--- Split sname into words and count words
local N = mw.text.split(sname, " ")
local l, count = mw.ustring.gsub(sname, "%S+", "")
if count == 0 then
return "Error in [[:Template:"..tname.."]]: Zero-word name."
end
--- Date string
if birth ~= "none" and death ~= "none" then
if p.ia_extendedascii(N[count]) == 1 then
mydate = "%20OR%20%28%22"..birth.."-"..death.."%22%20AND%20%28%22"..p.urlX(N[count]).."%22%20OR%20"..p.urlX(p.ia_deaccent(N[count])).."%29%29"
else
mydate = "%20OR%20%28%22"..birth.."-"..death.."%22%20AND%20"..p.urlX(N[count]).."%29"
end
end
--- wild string
wild = "%29"
if sopt == "w" and p.ia_extendedascii(sname) == 1 then
if p.wildcheck(N, count) == 1 then
myurl = p.wildfix(N, count)
return p.IArender()
end
if count < 3 or count > 3 then
-- (first last)
wild = "%20OR%20%28" .. p.ia_url_encode(p.ia_extendedascii2wildcard(sname)) .. "%29%29"
end
if count == 3 then
-- (first last)
wild = "%20OR%20%28" .. p.ia_url_encode(p.ia_extendedascii2wildcard(N[1])) .. "%20" .. p.ia_url_encode(p.ia_extendedascii2wildcard(N[3])) .. "%29%29"
end
end
--[[
Format URL
]]
if count == 1 then
myurl = p.oneWord(sname)
if sopt == "t" and p.ia_extendedascii(sname) == 1 then
local plainname = p.ia_deaccent(sname)
local A1 = "%20OR%20%22"..p.urlX(plainname)
myurl = myurl .. A1 .. "%22"
return p.IArender()
end
return p.IArender()
end
if count == 2 then
myurl = p.twoWords(N, sopt)
if sopt == "t" and p.ia_extendedascii(sname) == 1 then
local plainname = p.ia_deaccent(sname)
local PN = mw.text.split(plainname, " ")
-- Last, First
local A1 = "%20OR%20%22"..p.urlX(PN[2]).."%2C%20"..p.urlX(PN[1])
-- First Last
local A2 = "%22%20OR%20%22"..p.urlX(PN[1]).."%20"..p.urlX(PN[2])
myurl = myurl .. A1 .. A2 .. "%22"
return p.IArender()
end
return p.IArender()
end
if count == 3 then
myurl = p.threeWords(N, sopt)
if sopt == "t" and p.ia_extendedascii(sname) == 1 then
local plainname = p.ia_deaccent(sname)
local PN = mw.text.split(plainname, " ")
local FIRST = p.urlX(PN[1])
local MIDDLE = p.urlX(PN[2])
local LAST = p.urlX(PN[3])
local firstinitialp = p.urlX( p.firstLetter(PN[1]) )
local middleinitialp = p.urlX( p.firstLetter(PN[2]) )
-- First Middle Last
local A1 = "%20OR%20%22"..FIRST.."%20"..MIDDLE.."%20"..LAST
-- Last, First Middle
local A2 = "%22%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE
-- Last, First M.
local A3 = "%22%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitialp.."%2E"
-- Last, F. M.
local A4 = "%22%20OR%20%22"..LAST.."%2C%20"..firstinitialp..".%20"..middleinitialp.."%2E"
local ALL = A1 .. A2 .. A3 .. A4 .. "%22"
myurl = myurl .. ALL
return p.IArender()
end
return p.IArender()
end
if count == 4 then
myurl = p.fourWords(N, sopt)
if sopt == "t" and p.ia_extendedascii(sname) == 1 then
local plainname = p.ia_deaccent(sname)
local PN = mw.text.split(plainname, " ")
local FIRST = p.urlX(PN[1])
local SECOND = p.urlX(PN[2])
local THIRD = p.urlX(PN[3])
local LAST = p.urlX(PN[4])
local firstinitialp = p.urlX( p.firstLetter(PN[1]) )
local secondinitialp = p.urlX( p.firstLetter(PN[2]) )
local thirdinitialp = p.urlX( p.firstLetter(PN[3]) )
-- Last, First Second Third
local A1 = "%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD
-- First Second Third Last
local A2 = "%22%20OR%20%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- Last, F. S. T.
local A3 = "%22%20OR%20%22"..LAST.."%2C%20"..firstinitialp.."%2E%20"..secondinitialp.."%2E%20"..thirdinitialp.."%2E"
local ALL = A1 .. A2 .. A3 .. "%22"
myurl = myurl .. ALL
return p.IArender()
end
return p.IArender()
end
if count > 4 then
myurl = ""
if sopt == "w" and p.ia_extendedascii(sname) == 1 then
myurl = "%28"
end
myurl = myurl .. "%28" .. p.ia_url_encode(sname)
if sopt == "w" and p.ia_extendedascii(sname) == 1 then
myurl = myurl .. "%29"
end
if sopt == "t" and p.ia_extendedascii(sname) == 1 then
local plainname = p.ia_deaccent(sname)
local A1 = "%29%20OR%20%28"..p.ia_url_encode(plainname)
myurl = myurl .. A1
return p.IArender()
end
return p.IArender()
end
return "Unknown error (1). Please check documentation for [[Template:"..tname.."]]"
end
-- Build final output and render
function p.IArender()
return "[" .. urlhead .. mediaopen .. myurl .. wild .. mydate .. media .. " " .. byabout .. " " .. dname .. "] " .. tagline
end
function p.oneWord(sname)
local nameurl = p.ia_url_encode(sname)
local A1 = "%28subject%3A%22"..nameurl
local A2 = "%22%20OR%20creator%3A%22"..nameurl
local A3 = "%22%20OR%20description%3A%22"..nameurl
local A4 = "%22%20OR%20title%3A%22"..nameurl
return A1 .. A2 .. A3 .. A4 .. "%22"
end
function p.twoWords(N, sopt)
local FIRST = p.urlX(N[1])
local LAST = p.urlX(N[2])
local firstinitial = p.urlX( p.firstLetter(N[1]) )
-- Last, First
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST
-- First Last
local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..LAST
local SALL = S1..S2
-- Last, First
local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST
-- First Last
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..LAST
local CALL = C1..C2
-- First Last
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..LAST
local TALL = T1
-- Last, First
local D1 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST
-- First Last
local D2 = "%22%20OR%20description%3A%22"..FIRST.."%20"..LAST
local DALL = D1..D2
if sopt == "t" or sopt == "tx" then
return SALL .. CALL .. TALL .. DALL .. "%22"
else
-- Last, F.
local C3 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E"
local CALL = CALL..C3
return SALL .. CALL .. TALL .. DALL .. "%22"
end
end
function p.threeWords(N, sopt)
-- CAUTION: The following is near the max 2000 character URL limit for most browsers when using long names
-- such as "René-Nicolas Dufriche Desgenettes".
local FIRST = p.urlX(N[1])
local MIDDLE = p.urlX(N[2])
local LAST = p.urlX(N[3])
local firstinitial = p.urlX( p.firstLetter(N[1]) )
local middleinitial = p.urlX( p.firstLetter(N[2]) )
-- Last, First Middle
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE
-- Last, First M.
local S2 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E"
-- Last, F. M.
local S3 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E"
-- First Middle Last
local S4 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST
-- First M. Last
local S5 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST
-- F. M. Last
local S6 = "%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST
local SALL = S1..S2..S3..S4..S5..S6
-- First Middle Last
local C1 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST
-- First M. Last
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST
-- F. M. Last
local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST
-- F. Middle Last
local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..MIDDLE.."%20"..LAST
-- Last, First Middle
local C5 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE
-- Last, First M.
local C6 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E"
-- Last, F. M.
local C7 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E"
-- Last, F. M.
local C8 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..MIDDLE
local CALL = C1..C2..C3..C4..C5..C6..C7..C8
-- First Middle Last
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST
-- First M. Last
local T2 = "%22%20OR%20title%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST
-- F. M. Last
local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST
local TALL = T1..T2..T3
-- First Middle Last
local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST
-- First M. Last
local D2 = "%22%20OR%20description%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST
-- F. M. Last
local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST
-- Last, First Middle
local D4 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE
-- Last, First M.
local D5 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E"
local DALL = D1..D2..D3..D4..D5
if sopt == "t" or sopt == "tx" then
return SALL .. CALL .. TALL .. DALL .. "%22"
else
-- Last, First
local S7 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..FIRST
-- First Last
local S8 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..LAST
local SALL = SALL..S7..S8
-- First Last
local C9 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..LAST
-- Last, First
local C10 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST
local CALL = CALL..C9..C10
-- First Last
local T4 = "%22%20OR%20title%3A%22"..FIRST.."%20"..LAST
local TALL = TALL..T4
-- First Last
local D6 = "%22%20OR%20description%3A%22"..FIRST.."%20"..LAST
-- Last, First
local D7 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST
local DALL = DALL..D6..D7
return SALL .. CALL .. TALL .. DALL .. "%22"
end
end
function p.fourWords(N, sopt)
local FIRST = p.urlX(N[1])
local SECOND = p.urlX(N[2])
local THIRD = p.urlX(N[3])
local LAST = p.urlX(N[4])
local firstinitial = p.firstLetter(N[1])
local secondinitial = p.firstLetter(N[2])
local thirdinitial = p.firstLetter(N[3])
if sopt == "t" or sopt == "tx" then
-- Last, First Second Third
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD
-- First Second Third Last
local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- Last, First Second Third
local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD
-- First Second Third Last
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- First Second Third Last
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- First Second Third Last
local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
return S1..S2..C1..C2..T1..D1.."%22"
end
-- Last, First Second Third
local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD
-- First Second Third Last
local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- Last, First Second Third
local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD
-- First Second Third Last
local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- Last, F. S. T.
local C3 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E"
-- First Second Third Last
local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
-- First Second Third Last
local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST
return S1..S2..C1..C2..C3..T1..D1.."%22"
end
-- ElasticSearch speed/resource problems if first letter of first word is "*" wildcard ie. accented letter
-- Build special search in these cases.
-- https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_wildcards
function p.wildfix(N, count)
--- Split along "-" and use only first word ie. John-Taylor-Smith becomes John
local NF = mw.text.split(N[1], "-")
local NL = mw.text.split(N[count], "-")
-- ..but use full name for 1-word names
if count == 1 then
NF[1] = N[1]
NL[1] = N[1]
end
-- ((Fïrst OR First) AND (Lást OR Last))
return "%28%28%22" .. NF[1] .. "%22%20OR%20" .. p.ia_deaccent(NF[1]) .. "%29%20AND%20%28%22" .. NL[1] .. "%22%20OR%20" .. p.ia_deaccent(NL[1]) .. "%29"
end
-- Return 1 if the first letter of first word, or any single-letter word, is extended ascii
function p.wildcheck(N, count)
local i = 0
-- first letter of first word is extended ascii
if N[1]:byte(1) < 32 or N[1]:byte(1) > 126 then return 1 end
-- any single-letter word that is composed of only extended ascii
while i < count do
i = i + 1
if N[i]:len() == 1 then
if N[i]:byte(1) < 32 or N[i]:byte(1) > 126 then return 1 end
end
end
return 0
end
function trimArg(arg)
if arg == "" or arg == nil then
return nil
else
return mw.text.trim(arg)
end
end
function p.mediaTypes(argsmedia)
-- Added a default mediatype Dec 2015 due to too many false positives in the software mediatype, caused by birth-death dates catching numbers in source codes
local media = "-mediatype:software"
if argsmedia ~="" and argsmedia ~=nil then
local medialist = mw.text.split(mw.text.trim(argsmedia), " ")
local al, acount = mw.ustring.gsub(mw.text.trim(argsmedia), "%S+", "")
local i = 0
repeat -- the following could be condensed but repetitive for clarity
i = i + 1
if(mw.ustring.lower(medialist[i]) == "text" or mw.ustring.lower(medialist[i]) == "texts") then
media = media .. p.ia_url_encode(" OR mediatype:texts")
end
if(mw.ustring.lower(medialist[i]) == "audio") then
media = media .. p.ia_url_encode(" OR mediatype:audio")
end
if(mw.ustring.lower(medialist[i]) == "video") then
media = media .. p.ia_url_encode(" OR mediatype:video")
end
until i == acount
end
media = "%29%20AND%20%28" .. media .. "%29"
return media
end
-- Alt way to get b/d dates via getContent()
function p.bdDateAlt(argsbirth, argsdeath, name)
local pagetext = nil
local birth = "none"
local death = "none"
-- Load the page
local t = mw.title.new(name)
if(t.exists) then
pagetext = t:getContent()
end
if pagetext == nil then
return "Error"
end
-- Remove false positives
pagetext = mw.ustring.gsub( mw.ustring.gsub(pagetext, "<!--.--->", ""), "<nowiki>.-</nowiki>", "")
-- "Category:1900 births"
if argsbirth == "" or argsbirth == nil then
local birthcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-births%s-%]%]" )
if birthcheck ~= nil then
birth = mw.ustring.match(birthcheck, "%d+%.?%d*")
else
birth = "none"
end
else
birth = mw.text.trim(argsbirth)
end
-- "Category:2000 deaths"
if argsdeath == "" or argsdeath == nil then
local deathcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-deaths%s-%]%]" )
if deathcheck ~= nil then
death = mw.ustring.match(deathcheck, "%d+%.?%d*")
else
death = "none"
end
else
death = mw.text.trim(argsdeath)
end
return birth .. " " .. death
end
-- Get b/d dates via Wikidata.
--
function p.bdDate(argsbirth, argsdeath, name)
local pagetext = nil
local birth = "none"
local death = "none"
entity = mw.wikibase.getEntityObject()
if not entity or not entity.claims then
-- Alternative if template not on a page in mainspace. This is needed since Wikidata can only be retrieved
-- for the article where the template is located.
return p.bdDateAlt(argsbirth, argsdeath, name)
end
-- Note: The below uses formatPropertyValues() to get and format the date from Wikidata.
-- For an alternative method, see sandbox revision dated 5:58 am, 15 October 2014
if argsbirth == "" or argsbirth == nil then
local birthtable = entity:formatPropertyValues( 'P569' )
local birthsplit = mw.text.split(birthtable["value"], " ")
local l, count = mw.ustring.gsub(birthtable["value"], "%S+", "")
if count > 0 then
if string.find(birthsplit[count], "^%d") then
birth = birthsplit[count]
elseif string.find(birthsplit[count], "BCE") then
birth = birthsplit[count - 1]
elseif string.find(birthsplit[count], "BC") then
birth = birthsplit[count - 1]
elseif string.find(birthsplit[count], "AD") then
birth = birthsplit[count - 1]
end
end
else
birth = mw.text.trim(argsbirth)
end
if argsdeath == "" or argsdeath == nil then
local deathtable = entity:formatPropertyValues( 'P570' )
local deathsplit = mw.text.split(deathtable["value"], " ")
local l, count = mw.ustring.gsub(deathtable["value"], "%S+", "")
if count > 0 then
if string.find(deathsplit[count], "^%d") then
death = deathsplit[count]
elseif string.find(deathsplit[count], "BCE") then
death = deathsplit[count - 1]
elseif string.find(deathsplit[count], "BC") then
death = deathsplit[count - 1]
elseif string.find(deathsplit[count], "AD") then
death = deathsplit[count - 1]
end
end
else
death = mw.text.trim(argsdeath)
end
if birth == "none" and death == "none" then
-- Alternative if Wikidata is missing data
-- return p.bdDateAlt(name)
return birth .. " " .. death
else
return birth .. " " .. death
end
end
--- URL-encode special characters
--- Note: this function was added later to deal with "&" characters instead of using p.ia_url_encode since
--- that may break existing instances of the template.
function p.urlX(str)
if (str) then
str = mw.ustring.gsub (str, "&", "%%26")
end
return str
end
--- URL-encode a string
--- http://lua-users.org/wiki/StringRecipes
---
function p.ia_url_encode(str)
if (str) then
str = mw.ustring.gsub (str, "\n", "\r\n")
str = mw.ustring.gsub (str, "([^%w %-%_%.%~])",
function (c) return mw.ustring.format ("%%%02X", string.byte(c)) end)
str = mw.ustring.gsub (str, " ", "+")
end
return str
end
-- Does str contain extended ascii? 1 = yes
function p.ia_extendedascii(str)
for i = 1, str:len() do
if (str:byte(i) >= 32 and str:byte(i) <= 126) and str:byte(i) ~= 39 then -- 39 = "'"
--do nothing
else
return 1
end
end
return 0
end
-- UTF-8 aware replacement for string.sub() which doesn't support UTF-8.
-- Note: Using instead of mw.ustring.sub() which I suspect(?) might be cause of intermittent error, and faster here for first-letter job.
-- Source: prapin @ Stack Overflow http://stackoverflow.com/questions/13235091/extract-the-first-letter-of-a-utf-8-string-with-lua
function p.firstLetter(str)
return str:match("[%z\1-\127\194-\244][\128-\191]*")
end
-- Replace all extended ascii characters with wildcard '*'
-- Replace "-" with <space> eg. Pierre-Jean -> Pierre Jean
function p.ia_extendedascii2wildcard(str)
local s = ""
local j = 0
local k = 0
for i = 1, str:len() do
k = str:byte(i)
if k >= 32 and k <= 126 then
-- For list of Lucene special characters needing to be escaped:
-- http://lucene.apache.org/core/4_10_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters
-- We only worry about - (45) and " (34) since the others are unlikely to appear in a proper name.
-- Also ' (39) since it is sometimes the extended character ’
if k == 34 or k == 39 then
s = s .. "*"
elseif k == 45 then
s = s .. " "
else
s = s .. str:sub(i,i)
end
else
if j == 1 then
s = s .. "*"
j = 2
end
if j == 0 then j = 1 end
if j == 2 then j = 0 end
end
end
return s
end
-- Replace accented letters with non-accented equivalent letters
-- Note: this is not a complete list of all possible accented letters. It is
-- all of the accented letters found in the first 10,000 names using
-- the Internet Archive author template.
function p.ia_deaccent(str)
local s = str
s = mw.ustring.gsub(s, "á", "a")
s = mw.ustring.gsub(s, "a︡", "a")
s = mw.ustring.gsub(s, "Á", "A")
s = mw.ustring.gsub(s, "ă", "a")
s = mw.ustring.gsub(s, "â", "a")
s = mw.ustring.gsub(s, "æ", "ae")
s = mw.ustring.gsub(s, "Æ", "AE")
s = mw.ustring.gsub(s, "à", "a")
s = mw.ustring.gsub(s, "ā", "a")
s = mw.ustring.gsub(s, "Ā", "A")
s = mw.ustring.gsub(s, "ą", "a")
s = mw.ustring.gsub(s, "å", "a")
s = mw.ustring.gsub(s, "Å", "A")
s = mw.ustring.gsub(s, "ã", "a")
s = mw.ustring.gsub(s, "ä", "a")
s = mw.ustring.gsub(s, "Ä", "A")
s = mw.ustring.gsub(s, "β", "B")
s = mw.ustring.gsub(s, "ć", "c")
s = mw.ustring.gsub(s, "č", "c")
s = mw.ustring.gsub(s, "Č", "C")
s = mw.ustring.gsub(s, "ç", "c")
s = mw.ustring.gsub(s, "Ç", "C")
s = mw.ustring.gsub(s, "ĉ", "c")
s = mw.ustring.gsub(s, "ď", "d")
s = mw.ustring.gsub(s, "đ", "d")
s = mw.ustring.gsub(s, "é", "e")
s = mw.ustring.gsub(s, "É", "E")
s = mw.ustring.gsub(s, "ě", "e")
s = mw.ustring.gsub(s, "ê", "e")
s = mw.ustring.gsub(s, "è", "e")
s = mw.ustring.gsub(s, "È", "E")
s = mw.ustring.gsub(s, "ε", "e")
s = mw.ustring.gsub(s, "ē", "e")
s = mw.ustring.gsub(s, "Ē", "E")
s = mw.ustring.gsub(s, "ę", "e")
s = mw.ustring.gsub(s, "ð", "e")
s = mw.ustring.gsub(s, "ë", "e")
s = mw.ustring.gsub(s, "Ë", "E")
s = mw.ustring.gsub(s, "γ", "Y")
s = mw.ustring.gsub(s, "ħ", "h")
s = mw.ustring.gsub(s, "i︠a︡", "ia")
s = mw.ustring.gsub(s, "í", "i")
s = mw.ustring.gsub(s, "i︠", "i")
s = mw.ustring.gsub(s, "ĭ", "i")
s = mw.ustring.gsub(s, "Í", "I")
s = mw.ustring.gsub(s, "î", "i")
s = mw.ustring.gsub(s, "Î", "I")
s = mw.ustring.gsub(s, "ì", "i")
s = mw.ustring.gsub(s, "ī", "i")
s = mw.ustring.gsub(s, "ł", "i")
s = mw.ustring.gsub(s, "ï", "i")
s = mw.ustring.gsub(s, "Ï", "I")
s = mw.ustring.gsub(s, "ĺ", "I")
s = mw.ustring.gsub(s, "Ĺ", "L")
s = mw.ustring.gsub(s, "μ", "u")
s = mw.ustring.gsub(s, "µ", "u")
s = mw.ustring.gsub(s, "ń", "n")
s = mw.ustring.gsub(s, "ň", "n")
s = mw.ustring.gsub(s, "ņ", "n")
s = mw.ustring.gsub(s, "ñ", "n")
s = mw.ustring.gsub(s, "Ñ", "N")
s = mw.ustring.gsub(s, "ó", "o")
s = mw.ustring.gsub(s, "Ó", "O")
s = mw.ustring.gsub(s, "ô", "o")
s = mw.ustring.gsub(s, "œ", "oe")
s = mw.ustring.gsub(s, "ò", "o")
s = mw.ustring.gsub(s, "ō", "o")
s = mw.ustring.gsub(s, "ø", "o")
s = mw.ustring.gsub(s, "Ø", "o")
s = mw.ustring.gsub(s, "õ", "o")
s = mw.ustring.gsub(s, "ö", "o")
s = mw.ustring.gsub(s, "ő", "o")
s = mw.ustring.gsub(s, "Ö", "O")
s = mw.ustring.gsub(s, "φ", "o")
s = mw.ustring.gsub(s, "ŕ", "r")
s = mw.ustring.gsub(s, "ř", "r")
s = mw.ustring.gsub(s, "Ř", "R")
s = mw.ustring.gsub(s, "ß", "ss")
s = mw.ustring.gsub(s, "ś", "s")
s = mw.ustring.gsub(s, "Ś", "S")
s = mw.ustring.gsub(s, "š", "s")
s = mw.ustring.gsub(s, "ṣ", "s")
s = mw.ustring.gsub(s, "Š", "S")
s = mw.ustring.gsub(s, "ş", "s")
s = mw.ustring.gsub(s, "Ş", "S")
s = mw.ustring.gsub(s, "ŝ", "s")
s = mw.ustring.gsub(s, "σ", "s")
s = mw.ustring.gsub(s, "ť", "t")
s = mw.ustring.gsub(s, "ţ", "t")
s = mw.ustring.gsub(s, "τ", "t")
s = mw.ustring.gsub(s, "þ", "p")
s = mw.ustring.gsub(s, "Þ", "p")
s = mw.ustring.gsub(s, "ú", "u")
s = mw.ustring.gsub(s, "Ú", "U")
s = mw.ustring.gsub(s, "û", "u")
s = mw.ustring.gsub(s, "ù", "u")
s = mw.ustring.gsub(s, "ū", "u")
s = mw.ustring.gsub(s, "ů", "u")
s = mw.ustring.gsub(s, "ü", "u")
s = mw.ustring.gsub(s, "Ü", "U")
s = mw.ustring.gsub(s, "ŵ", "w")
s = mw.ustring.gsub(s, "ý", "y")
s = mw.ustring.gsub(s, "ŷ", "y")
s = mw.ustring.gsub(s, "¥", "y")
s = mw.ustring.gsub(s, "ÿ", "y")
s = mw.ustring.gsub(s, "Ÿ", "Y")
s = mw.ustring.gsub(s, "ź", "z")
s = mw.ustring.gsub(s, "Ž", "Z")
s = mw.ustring.gsub(s, "ž", "z")
s = mw.ustring.gsub(s, "ż", "z")
s = mw.ustring.gsub(s, "Ż", "Z")
return s
end
return p