Module:UrlToWiki

Ka Wikipedia

Documentation for this module may be created at Module:UrlToWiki/doc

-- This module takes a URL from a Wikimedia project and returns the equivalent wikitext. 
-- Any actions such as edit, history, etc., are stripped, and percent-encoded characters 
-- are converted to normal text.

p = {}

local interwiki_table = mw.loadData("Module:InterwikiTable")

local function getHostId(host)
    if type(host) ~= "string" then
        error("Non-string value for host name detected.")
    end
    -- Find the entry for the host in the interwiki table.
    local id
    for k,v in pairs(interwiki_table) do
        if mw.ustring.match(host, interwiki_table[k].domain) then
            id = k
            break
        end
    end
    return id
end

local function getInterwiki(host)
    -- This function returns the value of the interwiki prefix.
    if type(host) ~= "string" then
        error("Non-string value for host name detected.")
    end
    host = mw.language.getContentLanguage():lc(host)
    local host_id = getHostId(host)
    if not host_id then return end -- If the host isn't valid then the language and the interwiki are also invalid.

    -- Find the language in the interwiki prefix, if applicable.
    local lang = mw.ustring.match(host, "^(.-)%.") -- Find the text before the first period.
    if not lang or not mw.language.isSupportedLanguage(lang) then -- Check if lang is a valid language code.
        lang = false
    end
    -- A language prefix is not necessary if there is already a language prefix for the host in the interwiki table.
    local domain_lang = mw.ustring.match(interwiki_table[host_id].domain, "^(.-)%.") -- Find the text before the first period.
    if  mw.language.isSupportedLanguage(domain_lang) then
        lang = false
    end
    
    -- No need for an interwiki link if we are on the same site as the URL.
    local current_host = mw.uri.new(mw.title.getCurrentTitle():fullUrl()).host -- Get the host portion of the current page URL.
    if host == current_host then
        return nil, lang, host_id
    end
    
    -- Check if the URL language is the same as the current language.
    local same_lang
    if lang and lang == mw.ustring.match(current_host, "^(.-)%.") then
        same_lang = true
    end
    
    -- Check if the project is the same as the current project (but a different language).
    local current_host_id = getHostId(current_host)
    local same_project
    if current_host_id == host_id then
        same_project = true
    end
    
    -- Return the interwiki prefix, omitting the language or the project code if
    -- it is not necessary.
    local project = interwiki_table[host_id].iw_prefix[1]
    if same_lang or ( not lang and interwiki_table[host_id].takes_lang_prefix == false ) then
        return project, lang, host_id
    elseif same_project then
        return lang, lang, host_id
    elseif not lang then -- If the language code is bad but the rest of the host name is ok.
        return nil, nil, host_id
    else
        return project .. ":" .. lang, lang, host_id
    end   
end

local function _urlToWiki(args)
    local url = args[1] or error("No URL specified")
    url = mw.text.trim(url) -- Trim whitespace.
    url = mw.uri.new(url)
    local host = url.host -- Get the host name.
    -- Get the interwiki prefix. Lang and host_id are passed through so we don't have to work them out again.
    local interwiki, lang, host_id
    if host then
        interwiki, lang, host_id = getInterwiki(host)
    end
    local link = true -- This decides whether the resulting wikitext will be linked or not. Default is yes.
    if args.link == "no" then
        link = false -- Let users override the default.
    end
    
    -- Get the page title.
    local pagetitle, title_prefix
    if host_id and not ( interwiki_table[host_id].takes_lang_prefix == true and not lang ) then
        title_prefix = interwiki_table[host_id].title_prefix
    end
    -- If the URL path starts with the title prefix in the interwiki table, use that to get the title.
    if title_prefix and mw.ustring.sub(url.path, 1, mw.ustring.len(title_prefix)) == title_prefix then
        pagetitle = mw.ustring.sub(url.path, mw.ustring.len(title_prefix) + 1, -1)
    -- Else, if the URL is a history "index.php", use url.query.title. Check for host_id
    -- in case the URL isn't of a Wikimedia site.
    elseif host_id and mw.ustring.match(url.path, "index%.php") and url.query.title then
        pagetitle = url.query.title
    -- Special case for Bugzilla.
    elseif host_id == "bugzilla" and url.query.id then
        pagetitle = url.query.id
    elseif host_id == "bugzilla" and not url.query.id then
        interwiki = false -- disable the interwiki prefix as we are returning a full URL.
        link = false -- don't use double square brackets for URLs.
        pagetitle = tostring(url)
    -- If the URL is valid but not a recognised interwiki, return the URL and don't link it.
    elseif host and not host_id then
        link = false -- Don't use double square brackets for URLs.
        pagetitle = tostring(url)
    -- Otherwise, return whatever text we were passed and link it.
    else
        pagetitle = tostring(url)
    end
    
    -- Get the fragment and pre-process percent-encoded characters.
    local fragment = url.fragment
    if fragment then
        fragment = mw.ustring.gsub(fragment, "%.([0-9A-F][0-9A-F])", "%%%1")
    end
    
    -- Assemble the wikilink.
    local wikitext = pagetitle
    if interwiki then
        wikitext = interwiki .. ":" .. wikitext
    end
    if fragment and not (args.section == "no") then
        wikitext = wikitext .. "#" .. fragment
    end
    
    -- Decode percent-encoded characters and convert underscores to spaces.
    wikitext = mw.uri.decode(wikitext, "WIKI")
    -- If the wikitext is to be linked, re-encode illegal characters. Don't re-encode 
    -- characters from invalid URLs to make the default [[{{{1}}}]] display correctly.
    if link and host then
        wikitext = mw.ustring.gsub(wikitext, "[<>%[%]|{}%c\n]", mw.uri.encode)
    end
    
    -- Use the [[Help:Colon trick]] with categories, interwikis, and files.
    local colon_prefix = mw.ustring.match(wikitext, "^(.-):.*$") or "" -- Get the text before the first colon.
    local current_lang = mw.language.getContentLanguage()
    local ns = mw.site.namespaces
    local need_colon_trick
    if mw.language.isSupportedLanguage(colon_prefix) -- Check for interwiki links.
    or current_lang:lc(ns[6].name) == current_lang:lc(colon_prefix) -- Check for files.
    or current_lang:lc(ns[14].name) == current_lang:lc(colon_prefix) then -- Check for categories.
        need_colon_trick = true
    end
    for i,v in ipairs(ns[6].aliases) do -- Check for file namespace aliases.
        if current_lang:lc(v) == current_lang:lc(colon_prefix) then
            need_colon_trick = true
            break
        end
    end
    for i,v in ipairs(ns[14].aliases) do -- Check for category namespace aliases.
        if current_lang:lc(v) == current_lang:lc(colon_prefix) then
            need_colon_trick = true
            break
        end
    end
    -- Don't use the colon trick if the user says so or if we are not linking
    -- (due to [[bugzilla:12974]]).
    if need_colon_trick and link and not (args.colontrick == "no") then
        wikitext = ":" .. wikitext
    end
    
    -- Add the link
    if link then
        local display = args[2] -- The display text in piped links.
        if display then
            if type(display) ~= "string" then
                error("Non-string display value detected")
            end
            display = mw.text.trim(display) -- Trim whitespace.
            wikitext = wikitext .. "|" .. display
        end
        wikitext = "[[" .. wikitext .. "]]"
    end
    
    return wikitext
end

function p.urlToWiki(frame)
    local origArgs
    if frame == mw.getCurrentFrame() then
        -- We're being called via #invoke. If the invoking template passed any args, use
        -- them. Otherwise, use the args that were passed into the template.
        origArgs = frame:getParent().args
        for k, v in pairs(frame.args) do
            origArgs = frame.args
            break
        end
    else
        -- We're being called from another module or from the debug console, so assume
        -- the args are passed in directly.
        origArgs = frame
    end
    
    -- ParserFunctions considers the empty string to be false, so to preserve the previous 
    -- behavior of the template, change any empty arguments to nil, so Lua will consider
    -- them false too.
    local args = {}
    for k, v in pairs(origArgs) do
        if mw.ustring.match(v, '%S') then
            args[k] = v
        end
    end
    return _urlToWiki(args)
end

return p