Module:Delink: Difference between revisions
Appearance
Content deleted Content added
handle nested links |
m Changed protection settings for "Module:Delink": Dependency of fully-protected (and on WP:CASC) Template:Fix ([Edit=Require administrator access] (indefinite) [Move=Require administrator access] (indefinite)) |
||
(40 intermediate revisions by 8 users not shown) | |||
Line 1: | Line 1: | ||
-- This module de-links most wikitext. |
-- This module de-links most wikitext. |
||
require("strict") |
|||
p = {} |
|||
local |
local p = {} |
||
-- Assume we have already delinked internal wikilinks, and that |
|||
local getArgs |
|||
-- we have been passed some text between two square brackets [foo]. |
|||
-- Check if the text has a valid URL prefix and at least one valid URL character. |
|||
local valid_url_prefixes = {"//", "https://", "https://", "ftp://", "gopher://", "mailto:", "news:", "irc://"} |
|||
local url_prefix |
|||
for i,v in ipairs(valid_url_prefixes) do |
|||
if mw.ustring.match(s, '^%[' .. v ..'[^" ].*%]' ) then |
|||
url_prefix = v |
|||
break |
|||
end |
|||
end |
|||
-- Get display text |
|||
if not url_prefix then |
|||
return s |
|||
else |
|||
s = mw.ustring.match(s, "^%[" .. url_prefix .. "(.*)%]") -- Grab all of the text after the URL prefix and before the final square bracket. |
|||
s = mw.ustring.match(s, '^.-(["<> ].*)') or "" -- Grab all of the text after the first URL separator character ("<> ). |
|||
s = mw.ustring.match(s, "^ ?(.*)") -- If the separating character was a space, trim it off. |
|||
return s |
|||
end |
|||
end |
|||
local function delinkReversePipeTrick(s) |
local function delinkReversePipeTrick(s) |
||
if s:match("^%[%[|.*[|\n]") then -- Check for newlines or multiple pipes. |
|||
return s |
|||
end |
|||
else |
|||
return s:match("%[%[|(.*)%]%]") |
|||
end |
|||
end |
end |
||
local function delinkPipeTrick(s) |
local function delinkPipeTrick(s) |
||
-- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]]. |
|||
local linkarea, display = "", "" |
|||
-- First, remove the text before the first colon, if any. |
|||
-- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]]. |
|||
if s:match(":") then |
|||
s = s:match("%[%[.-:(.*)|%]%]") |
|||
-- First, remove the text before the first colon, if any. |
|||
-- If there are no colons, grab all of the text apart from the square brackets and the pipe. |
|||
if mw.ustring.match(s, ":") then |
|||
else |
|||
s = mw.ustring.match(s, "%[%[.-:(.*)|%]%]") |
|||
s = s:match("%[%[(.*)|%]%]") |
|||
-- If there are no colons, grab all of the text apart from the square brackets and the pipe. |
|||
end |
|||
else |
|||
s = mw.ustring.match(s, "%[%[(.*)|%]%]") |
|||
-- Next up, brackets and commas. |
|||
end |
|||
if s:match("%(.-%)$") then -- Brackets trump commas. |
|||
s = s:match("(.-) ?%(.-%)$") |
|||
-- Next up, brackets and commas. |
|||
elseif s:match(",") then -- If there are no brackets, display only the text before the first comma. |
|||
if mw.ustring.match(s, "%(.-%)$") then -- Brackets trump commas. |
|||
s = s:match("(.-),.*$") |
|||
end |
|||
elseif mw.ustring.match(s, ",") then -- If there are no brackets, display only the text before the first comma. |
|||
return s |
|||
s = mw.ustring.match(s, "(.-),.*$") |
|||
end |
|||
return s |
|||
end |
end |
||
-- Return wikilink target |wikilinks=target |
|||
local function delinkWikilink(s) |
|||
local function getDelinkedTarget(s) |
|||
-- Deal with the reverse pipe trick. |
|||
local result = s |
|||
if mw.ustring.match(s, "%[%[|") then |
|||
-- Deal with the reverse pipe trick. |
|||
return delinkReversePipeTrick(s) |
|||
if result:match("%[%[|") then |
|||
end |
|||
return delinkReversePipeTrick(result) |
|||
end |
|||
-- Check for bad titles. To do this we need to find the |
|||
-- title area of the link, i.e. the part before any pipes. |
|||
result = mw.uri.decode(result, "PATH") -- decode percent-encoded entities. Leave underscores and plus signs. |
|||
local titlearea |
|||
result = mw.text.decode(result, true) -- decode HTML entities. |
|||
if mw.ustring.match(s, "|") then -- Find if we're dealing with a piped link. |
|||
titlearea= mw.ustring.match(s, "^%[%[(.-)|.*%]%]") |
|||
-- Check for bad titles. To do this we need to find the |
|||
else |
|||
-- title area of the link, i.e. the part before any pipes. |
|||
titlearea = mw.ustring.match(s, "^%[%[(.-)%]%]") |
|||
local target_area |
|||
end |
|||
if result:match("|") then -- Find if we're dealing with a piped link. |
|||
target_area = result:match("^%[%[(.-)|.*%]%]") |
|||
if not mw.title.makeTitle("", titlearea) then |
|||
else |
|||
return s -- If it's not a valid link, return the whole string. |
|||
target_area = result:match("^%[%[(.-)%]%]") |
|||
end |
|||
end |
|||
local other_invalid_link_strings = { '�' } |
|||
for i,v in ipairs(other_invalid_link_strings) do |
|||
if mw.ustring.match(titlearea, v) then |
|||
return s |
|||
end |
|||
end |
|||
-- Check for categories, interwikis, and files. |
|||
local colonprefix = mw.ustring.match(s, "%[%[(.-):.*%]%]") or "" -- Get the text before the first colon. |
|||
if mw.language.isKnownLanguageTag(colonprefix) |
|||
or mw.ustring.match(colonprefix, "^[Cc]ategory$") |
|||
or mw.ustring.match(colonprefix, "^[Ff]ile$") |
|||
or mw.ustring.match(colonprefix, "^[Ii]mage$") then |
|||
return "" |
|||
end |
|||
-- Remove the colon if the link is using the [[Help:Colon trick]]. |
|||
if mw.ustring.match(s, "%[%[:") then |
|||
s = "[[" .. mw.ustring.match(s, "%[%[:(.*%]%])") |
|||
end |
|||
-- Deal with links using the [[Help:Pipe trick]]. |
|||
if mw.ustring.match(s, "^%[%[[^|]*|%]%]") then |
|||
return delinkPipeTrick(s) |
|||
end |
|||
-- Find the display area of the wikilink |
|||
local display |
|||
if mw.ustring.match(s, "|") then -- Find if we're dealing with a piped link. |
|||
display = mw.ustring.match(s, "^%[%[.-|(.+)%]%]") |
|||
else |
|||
display = mw.ustring.match(s, "^%[%[(.-)%]%]") |
|||
end |
|||
-- Check for bad characters. |
|||
return display |
|||
if mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") and mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") ~= "?" then |
|||
return s |
|||
end |
|||
return target_area |
|||
end |
end |
||
local function |
local function getDelinkedLabel(s) |
||
local result = s |
|||
-- Deal with the reverse pipe trick. |
|||
local result = "" |
|||
if result:match("%[%[|") then |
|||
return delinkReversePipeTrick(result) |
|||
-- Delink wikilinks. We need to search character by character rather |
|||
end |
|||
-- than just use gsub, otherwise nested links aren't detected properly. |
|||
while mw.ustring.len(text) > 0 do |
|||
result = mw.uri.decode(result, "PATH") -- decode percent-encoded entities. Leave underscores and plus signs. |
|||
text = mw.ustring.gsub(text, "^%[%[.-%]%]", delinkWikilink, 1) |
|||
result = mw.text.decode(result, true) -- decode HTML entities. |
|||
text = mw.ustring.sub(text, 2, -1) |
|||
-- Check for bad titles. To do this we need to find the |
|||
end |
|||
-- title area of the link, i.e. the part before any pipes. |
|||
local target_area |
|||
-- Reset the variables. |
|||
if result:match("|") then -- Find if we're dealing with a piped link. |
|||
text = result |
|||
target_area = result:match("^%[%[(.-)|.*%]%]") |
|||
else |
|||
target_area = result:match("^%[%[(.-)%]%]") |
|||
-- Delink URLs. |
|||
end |
|||
while mw.ustring.len(text) > 0 do |
|||
text = mw.ustring.gsub(text, "^%[.-%]", delinkURL, 1) |
|||
-- Check for bad characters. |
|||
result = result .. mw.ustring.sub(text, 1, 1) |
|||
if mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") and mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") ~= "?" then |
|||
text = mw.ustring.sub(text, 2, -1) |
|||
return s |
|||
end |
|||
end |
|||
return result |
|||
-- Check for categories, interwikis, and files. |
|||
local colon_prefix = result:match("%[%[(.-):.*%]%]") or "" -- Get the text before the first colon. |
|||
local ns = mw.site.namespaces[colon_prefix] -- see if this is a known namespace |
|||
if mw.language.isKnownLanguageTag(colon_prefix) or (ns and (ns.canonicalName == "File" or ns.canonicalName == "Category")) then |
|||
return "" |
|||
end |
|||
-- Remove the colon if the link is using the [[Help:Colon trick]]. |
|||
if result:match("%[%[:") then |
|||
result = "[[" .. result:match("%[%[:(.*%]%])") |
|||
end |
|||
-- Deal with links using the [[Help:Pipe trick]]. |
|||
if mw.ustring.match(result, "^%[%[[^|]*|%]%]") then |
|||
return delinkPipeTrick(result) |
|||
end |
|||
-- Find the display area of the wikilink |
|||
if result:match("|") then -- Find if we're dealing with a piped link. |
|||
result = result:match("^%[%[.-|(.+)%]%]") |
|||
-- Remove new lines from the display of multiline piped links, |
|||
-- where the pipe is before the first new line. |
|||
result = result:gsub("\n", "") |
|||
else |
|||
result = result:match("^%[%[(.-)%]%]") |
|||
end |
|||
return result |
|||
end |
|||
local function delinkURL(s) |
|||
-- Assume we have already delinked internal wikilinks, and that |
|||
-- we have been passed some text between two square brackets [foo]. |
|||
-- If the text contains a line break it is not formatted as a URL, regardless of other content. |
|||
if s:match("\n") then |
|||
return s |
|||
end |
|||
-- Check if the text has a valid URL prefix and at least one valid URL character. |
|||
local valid_url_prefixes = {"//", "https://", "https://", "ftp://", "gopher://", "mailto:", "news:", "irc://"} |
|||
local url_prefix |
|||
for _ ,v in ipairs(valid_url_prefixes) do |
|||
if mw.ustring.match(s, '^%[' .. v ..'[^"%s].*%]' ) then |
|||
url_prefix = v |
|||
break |
|||
end |
|||
end |
|||
-- Get display text |
|||
if not url_prefix then |
|||
return s |
|||
end |
|||
s = s:match("^%[" .. url_prefix .. "(.*)%]") -- Grab all of the text after the URL prefix and before the final square bracket. |
|||
s = s:match('^.-(["<> ].*)') or "" -- Grab all of the text after the first URL separator character ("<> ). |
|||
s = mw.ustring.match(s, "^%s*(%S.*)$") or "" -- If the separating character was a space, trim it off. |
|||
local s_decoded = mw.text.decode(s, true) |
|||
if mw.ustring.match(s_decoded, "%c") then |
|||
return s |
|||
end |
|||
return s_decoded |
|||
end |
|||
local function delinkLinkClass(text, pattern, delinkFunction) |
|||
if type(text) ~= "string" then |
|||
error("Attempt to de-link non-string input.", 2) |
|||
end |
|||
if type(pattern) ~= "string" or mw.ustring.sub(pattern, 1, 1) ~= "^" then |
|||
error('Invalid pattern detected. Patterns must begin with "^".', 2) |
|||
end |
|||
-- Iterate over the text string, and replace any matched text. using the |
|||
-- delink function. We need to iterate character by character rather |
|||
-- than just use gsub, otherwise nested links aren't detected properly. |
|||
local result = "" |
|||
while text ~= "" do |
|||
-- Replace text using one iteration of gsub. |
|||
text = mw.ustring.gsub(text, pattern, delinkFunction, 1) |
|||
-- Append the left-most character to the result string. |
|||
result = result .. mw.ustring.sub(text, 1, 1) |
|||
text = mw.ustring.sub(text, 2, -1) |
|||
end |
|||
return result |
|||
end |
|||
function p._delink(args) |
|||
local text = args[1] or "" |
|||
if args.refs == "yes" then |
|||
-- Remove any [[Help:Strip markers]] representing ref tags. In most situations |
|||
-- this is not a good idea - only use it if you know what you are doing! |
|||
text = mw.ustring.gsub(text, "UNIQ%w*%-ref%-%d*%-QINU", "") |
|||
end |
|||
if args.comments ~= "no" then |
|||
text = text:gsub("<!%-%-.-%-%->", "") -- Remove html comments. |
|||
end |
|||
if args.wikilinks ~= "no" and args.wikilinks ~= "target" then |
|||
-- De-link wikilinks and return the label portion of the wikilink. |
|||
text = delinkLinkClass(text, "^%[%[.-%]%]", getDelinkedLabel) |
|||
elseif args.wikilinks == "target" then |
|||
-- De-link wikilinks and return the target portions of the wikilink. |
|||
text = delinkLinkClass(text, "^%[%[.-%]%]", getDelinkedTarget) |
|||
end |
|||
if args.urls ~= "no" then |
|||
text = delinkLinkClass(text, "^%[.-%]", delinkURL) -- De-link URLs. |
|||
end |
|||
if args.whitespace ~= "no" then |
|||
-- Replace single new lines with a single space, but leave double new lines |
|||
-- and new lines only containing spaces or tabs before a second new line. |
|||
text = mw.ustring.gsub(text, "([^\n \t][ \t]*)\n([ \t]*[^\n \t])", "%1 %2") |
|||
text = text:gsub("[ \t]+", " ") -- Remove extra tabs and spaces. |
|||
end |
|||
return text |
|||
end |
end |
||
function p.delink(frame) |
function p.delink(frame) |
||
if not getArgs then |
|||
local args |
|||
getArgs = require('Module:Arguments').getArgs |
|||
if frame == mw.getCurrentFrame() then |
|||
end |
|||
-- We're being called via #invoke. If the invoking template passed any args, use |
|||
return p._delink(getArgs(frame, {wrappers = 'Template:Delink'})) |
|||
-- them. Otherwise, use the args that were passed into the template. |
|||
args = frame:getParent().args |
|||
for k, v in pairs(frame.args) do |
|||
args = frame.args |
|||
break |
|||
end |
|||
else |
|||
-- We're being called from another module or from the debug console, so assume |
|||
-- the args are passed in directly. |
|||
args = frame |
|||
end |
|||
return _delink(args) |
|||
end |
end |
||
Latest revision as of 04:47, 17 February 2024
This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
This Lua module is used in system messages, and on approximately 3,250,000 pages, or roughly 5% of all pages. Changes to it can cause immediate changes to the Wikipedia user interface. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Please discuss changes on the talk page before implementing them. |
This module implements the {{delink}} template. Please see the template page for documentation.
-- This module de-links most wikitext.
require("strict")
local p = {}
local getArgs
local function delinkReversePipeTrick(s)
if s:match("^%[%[|.*[|\n]") then -- Check for newlines or multiple pipes.
return s
end
return s:match("%[%[|(.*)%]%]")
end
local function delinkPipeTrick(s)
-- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]].
-- First, remove the text before the first colon, if any.
if s:match(":") then
s = s:match("%[%[.-:(.*)|%]%]")
-- If there are no colons, grab all of the text apart from the square brackets and the pipe.
else
s = s:match("%[%[(.*)|%]%]")
end
-- Next up, brackets and commas.
if s:match("%(.-%)$") then -- Brackets trump commas.
s = s:match("(.-) ?%(.-%)$")
elseif s:match(",") then -- If there are no brackets, display only the text before the first comma.
s = s:match("(.-),.*$")
end
return s
end
-- Return wikilink target |wikilinks=target
local function getDelinkedTarget(s)
local result = s
-- Deal with the reverse pipe trick.
if result:match("%[%[|") then
return delinkReversePipeTrick(result)
end
result = mw.uri.decode(result, "PATH") -- decode percent-encoded entities. Leave underscores and plus signs.
result = mw.text.decode(result, true) -- decode HTML entities.
-- Check for bad titles. To do this we need to find the
-- title area of the link, i.e. the part before any pipes.
local target_area
if result:match("|") then -- Find if we're dealing with a piped link.
target_area = result:match("^%[%[(.-)|.*%]%]")
else
target_area = result:match("^%[%[(.-)%]%]")
end
-- Check for bad characters.
if mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") and mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") ~= "?" then
return s
end
return target_area
end
local function getDelinkedLabel(s)
local result = s
-- Deal with the reverse pipe trick.
if result:match("%[%[|") then
return delinkReversePipeTrick(result)
end
result = mw.uri.decode(result, "PATH") -- decode percent-encoded entities. Leave underscores and plus signs.
result = mw.text.decode(result, true) -- decode HTML entities.
-- Check for bad titles. To do this we need to find the
-- title area of the link, i.e. the part before any pipes.
local target_area
if result:match("|") then -- Find if we're dealing with a piped link.
target_area = result:match("^%[%[(.-)|.*%]%]")
else
target_area = result:match("^%[%[(.-)%]%]")
end
-- Check for bad characters.
if mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") and mw.ustring.match(target_area, "[%[%]<>{}%%%c\n]") ~= "?" then
return s
end
-- Check for categories, interwikis, and files.
local colon_prefix = result:match("%[%[(.-):.*%]%]") or "" -- Get the text before the first colon.
local ns = mw.site.namespaces[colon_prefix] -- see if this is a known namespace
if mw.language.isKnownLanguageTag(colon_prefix) or (ns and (ns.canonicalName == "File" or ns.canonicalName == "Category")) then
return ""
end
-- Remove the colon if the link is using the [[Help:Colon trick]].
if result:match("%[%[:") then
result = "[[" .. result:match("%[%[:(.*%]%])")
end
-- Deal with links using the [[Help:Pipe trick]].
if mw.ustring.match(result, "^%[%[[^|]*|%]%]") then
return delinkPipeTrick(result)
end
-- Find the display area of the wikilink
if result:match("|") then -- Find if we're dealing with a piped link.
result = result:match("^%[%[.-|(.+)%]%]")
-- Remove new lines from the display of multiline piped links,
-- where the pipe is before the first new line.
result = result:gsub("\n", "")
else
result = result:match("^%[%[(.-)%]%]")
end
return result
end
local function delinkURL(s)
-- Assume we have already delinked internal wikilinks, and that
-- we have been passed some text between two square brackets [foo].
-- If the text contains a line break it is not formatted as a URL, regardless of other content.
if s:match("\n") then
return s
end
-- Check if the text has a valid URL prefix and at least one valid URL character.
local valid_url_prefixes = {"//", "http://", "https://", "ftp://", "gopher://", "mailto:", "news:", "irc://"}
local url_prefix
for _ ,v in ipairs(valid_url_prefixes) do
if mw.ustring.match(s, '^%[' .. v ..'[^"%s].*%]' ) then
url_prefix = v
break
end
end
-- Get display text
if not url_prefix then
return s
end
s = s:match("^%[" .. url_prefix .. "(.*)%]") -- Grab all of the text after the URL prefix and before the final square bracket.
s = s:match('^.-(["<> ].*)') or "" -- Grab all of the text after the first URL separator character ("<> ).
s = mw.ustring.match(s, "^%s*(%S.*)$") or "" -- If the separating character was a space, trim it off.
local s_decoded = mw.text.decode(s, true)
if mw.ustring.match(s_decoded, "%c") then
return s
end
return s_decoded
end
local function delinkLinkClass(text, pattern, delinkFunction)
if type(text) ~= "string" then
error("Attempt to de-link non-string input.", 2)
end
if type(pattern) ~= "string" or mw.ustring.sub(pattern, 1, 1) ~= "^" then
error('Invalid pattern detected. Patterns must begin with "^".', 2)
end
-- Iterate over the text string, and replace any matched text. using the
-- delink function. We need to iterate character by character rather
-- than just use gsub, otherwise nested links aren't detected properly.
local result = ""
while text ~= "" do
-- Replace text using one iteration of gsub.
text = mw.ustring.gsub(text, pattern, delinkFunction, 1)
-- Append the left-most character to the result string.
result = result .. mw.ustring.sub(text, 1, 1)
text = mw.ustring.sub(text, 2, -1)
end
return result
end
function p._delink(args)
local text = args[1] or ""
if args.refs == "yes" then
-- Remove any [[Help:Strip markers]] representing ref tags. In most situations
-- this is not a good idea - only use it if you know what you are doing!
text = mw.ustring.gsub(text, "UNIQ%w*%-ref%-%d*%-QINU", "")
end
if args.comments ~= "no" then
text = text:gsub("<!%-%-.-%-%->", "") -- Remove html comments.
end
if args.wikilinks ~= "no" and args.wikilinks ~= "target" then
-- De-link wikilinks and return the label portion of the wikilink.
text = delinkLinkClass(text, "^%[%[.-%]%]", getDelinkedLabel)
elseif args.wikilinks == "target" then
-- De-link wikilinks and return the target portions of the wikilink.
text = delinkLinkClass(text, "^%[%[.-%]%]", getDelinkedTarget)
end
if args.urls ~= "no" then
text = delinkLinkClass(text, "^%[.-%]", delinkURL) -- De-link URLs.
end
if args.whitespace ~= "no" then
-- Replace single new lines with a single space, but leave double new lines
-- and new lines only containing spaces or tabs before a second new line.
text = mw.ustring.gsub(text, "([^\n \t][ \t]*)\n([ \t]*[^\n \t])", "%1 %2")
text = text:gsub("[ \t]+", " ") -- Remove extra tabs and spaces.
end
return text
end
function p.delink(frame)
if not getArgs then
getArgs = require('Module:Arguments').getArgs
end
return p._delink(getArgs(frame, {wrappers = 'Template:Delink'}))
end
return p