Jump to content

Module:Delink

Permanently protected module
From Wikipedia, the free encyclopedia

This is an old revision of this page, as edited by Mr. Stradivarius (talk | contribs) at 23:16, 3 April 2013 (better whitespace matching for urls). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

-- This module de-links most wikitext.

p = {}

local function delinkURL(s)
    -- Assume we have already delinked internal wikilinks, and that
    -- we have been passed some text between two square brackets [foo].
    
    -- Check if the text has a valid URL prefix and at least one valid URL character.
    local valid_url_prefixes = {"//", "http://", "https://", "ftp://", "gopher://", "mailto:", "news:", "irc://"} 
    local url_prefix
    for i,v in ipairs(valid_url_prefixes) do
        if mw.ustring.match(s, '^%[' .. v ..'[^" ].*%]' ) then
            url_prefix = v
            break
        end
    end
    
    -- Get display text
    if not url_prefix then
        return s
    else
        s = mw.ustring.match(s, "^%[" .. url_prefix .. "(.*)%]") -- Grab all of the text after the URL prefix and before the final square bracket.
        s = mw.ustring.match(s, '^.-(["<>%s].*)') or "" -- Grab all of the text after the first URL separator character.
        s = mw.ustring.match(s, "^%s*(.-)") -- Remove any whitespace from the start of the display text.
        return s
    end
end

local function delinkReversePipeTrick(s)
    if mw.ustring.match(s, "^%[%[|.*|") then -- Check for multiple pipes.
        return s
    else
        return mw.ustring.match(s, "%[%[|(.*)%]%]")
    end
end

local function delinkPipeTrick(s)
    local linkarea, display = "", ""
    -- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]].
    
    -- First, remove the text before the first colon, if any.
    if mw.ustring.match(s, ":") then
        s = mw.ustring.match(s, "%[%[.-:(.*)|%]%]")
    -- If there are no colons, grab all of the text apart from the square brackets and the pipe.
    else
        s = mw.ustring.match(s, "%[%[(.*)|%]%]")
    end
    
    -- Next up, brackets and commas.
    if mw.ustring.match(s, "%(.-%)$") then -- Brackets trump commas.
        s = mw.ustring.match(s, "(.-) ?%(.-%)$")
    elseif mw.ustring.match(s, ",") then -- If there are no brackets, display only the text before the first comma.
        s = mw.ustring.match(s, "(.-),.*$")
    end
    return s
end

local function delinkWikilink(s)
    -- Deal with the reverse pipe trick.
    if mw.ustring.match(s, "%[%[|") then
        return delinkReversePipeTrick(s)
    end
    
    -- Check for bad titles. To do this we need to find the
    -- title area of the link, i.e. the part before any pipes.
    local titlearea
    if mw.ustring.match(s, "|") then -- Find if we're dealing with a piped link.
        titlearea= mw.ustring.match(s, "^%[%[(.-)|.*%]%]")
    else
        titlearea = mw.ustring.match(s, "^%[%[(.-)%]%]")
    end
    -- Clear the title of the fragment (the section link).
    if not mw.title.makeTitle("", titlearea) then
        return s -- If it's not a valid link, return the whole string.
    end
    local other_invalid_link_strings = { '&#x0000;' }
    for i,v in ipairs(other_invalid_link_strings) do
        if mw.ustring.match(titlearea, v) then
            return s
        end
    end
    
    -- Check for categories, interwikis, and files.
    local colonprefix = mw.ustring.match(s, "%[%[(.-):.*%]%]") or "" -- Get the text before the first colon.
    if mw.language.isKnownLanguageTag(colonprefix)
    or mw.ustring.match(colonprefix, "^[Cc]ategory$")
    or mw.ustring.match(colonprefix, "^[Ff]ile$")
    or mw.ustring.match(colonprefix, "^[Ii]mage$") then
        return ""
    end
    
    -- Remove the colon if the link is using the [[Help:Colon trick]].
    if mw.ustring.match(s, "%[%[:") then
        s = "[[" .. mw.ustring.match(s, "%[%[:(.*%]%])")
    end
    
    -- Deal with links using the [[Help:Pipe trick]].
    if mw.ustring.match(s, "^%[%[[^|]*|%]%]") then
        return delinkPipeTrick(s)
    end
    
    -- Find the display area of the wikilink
    local display
    if mw.ustring.match(s, "|") then -- Find if we're dealing with a piped link.
        display = mw.ustring.match(s, "^%[%[.-|(.+)%]%]")
    else
        display = mw.ustring.match(s, "^%[%[(.-)%]%]")
    end

    return display
end

local function _delink(args)
    local text = args[1] or ""
    local result = ""
    
    -- Delink wikilinks. We need to search character by character rather 
    -- than just use gsub, otherwise nested links aren't detected properly.
    while mw.ustring.len(text) > 0 do
        text = mw.ustring.gsub(text, "^%[%[.-%]%]", delinkWikilink, 1)
        result = result .. mw.ustring.sub(text, 1, 1)
        text = mw.ustring.sub(text, 2, -1)
    end
    
    -- Reset the variables.
    text = result
    result = ""
    
    -- Delink URLs.
    while mw.ustring.len(text) > 0 do
        text = mw.ustring.gsub(text, "^%[.-%]", delinkURL, 1)
        result = result .. mw.ustring.sub(text, 1, 1)
        text = mw.ustring.sub(text, 2, -1)
    end
    return result
end

function p.delink(frame)
    local args
    if frame == mw.getCurrentFrame() then
        -- We're being called via #invoke. If the invoking template passed any args, use
        -- them. Otherwise, use the args that were passed into the template.
        args = frame:getParent().args
        for k, v in pairs(frame.args) do
            args = frame.args
            break
        end
    else
        -- We're being called from another module or from the debug console, so assume
        -- the args are passed in directly.
        args = frame
    end
 
    return _delink(args)
end

return p