Module:String utilities

local module_name = "string_utilities"
local export = {}

local rfind = mw.ustring.find

local format_escapes = {
    ["op"] = "{",
    ["cl"] = "}",
}

function export.format(str, tbl)
    return (string.gsub(str, "{(\\?)((\\?)[^{}]*)}", function (p1, name, p2)
        if #p1 + #p2 == 1 then
            return format_escapes[name] or error(module_name .. ".format: unrecognized escape sequence '{\\" .. name .. "}'")
        else
            return tbl[name] or error(module_name .. ".format: '" .. name .. "' not found in table")
        end
    end))
end

-- Reimplementation of mw.ustring.split() that includes any capturing
-- groups in the splitting pattern. This works like Python's re.split()
-- function, except that it has Lua's behavior when the split pattern
-- is empty (i.e. advancing by one character at a time; Python returns the
-- whole remainder of the string).
function export.capturing_split(str, pattern)
    local ret = {}
    -- (.-) corresponds to (.*?) in Python or Perl; () captures the
    -- current position after matching.
    pattern = "(.-)" .. pattern .. "()"
    local start = 1
    while true do
        -- Did we reach the end of the string?
        if start > #str then
            table.insert(ret, "")
            return ret
        end
        -- match() returns all captures as multiple return values;
        -- we need to insert into a table to get them all.
        local captures = {mw.ustring.match(str, pattern, start)}
        -- If no match, add the remainder of the string.
        if #captures == 0 then
            table.insert(ret, mw.ustring.sub(str, start))
            return ret
        end
        local newstart = table.remove(captures)
        -- Special case: If we don't advance by any characters, then advance
        -- by one character; this avoids an infinite loop, and makes splitting
        -- by an empty string work the way mw.ustring.split() does. If we
        -- reach the end of the string this way, return immediately, so we
        -- don't get a final empty string.
        if newstart == start then
            table.insert(ret, mw.ustring.sub(str, start, start))
            table.remove(captures, 1)
            start = start + 1
            if start > #str then
            	return ret
            end
        else
            table.insert(ret, table.remove(captures, 1))
            start = newstart
        end
        -- Insert any captures from the splitting pattern.
        for _, x in ipairs(captures) do
            table.insert(ret, x)
        end
    end
end

local function uclcfirst(text, dolower)
	local function douclcfirst(text)
		-- Actual function to re-case of the first letter.
		local first_letter = mw.ustring.sub(text, 1, 1)
		first_letter = dolower and mw.ustring.lower(first_letter) or mw.ustring.upper(first_letter)
		return first_letter .. mw.ustring.sub(text, 2)
	end
	-- If there's a link at the beginning, re-case the first letter of the
	-- link text. This pattern matches both piped and unpiped links.
	-- If the link is not piped, the second capture (linktext) will be empty.
	local link, linktext, remainder = mw.ustring.match(text, "^%[%[([^|%]]+)%|?(.-)%]%](.*)$")
	if link then
		return "[[" .. link .. "|" .. douclcfirst(linktext ~= "" and linktext or link) .. "]]" .. remainder
	end
	return douclcfirst(text)
end

function export.ucfirst(text)
	return uclcfirst(text, false)
end

function export.lcfirst(text)
	return uclcfirst(text, true)
end

function export.add_indefinite_article(text, uppercase)
	local is_vowel = false
	-- If there's a link at the beginning, examine the first letter of the
	-- link text. This pattern matches both piped and unpiped links.
	-- If the link is not piped, the second capture (linktext) will be empty.
	local link, linktext, remainder = mw.ustring.match(text, "^%[%[([^|%]]+)%|?(.-)%]%](.*)$")
	if link then
		is_vowel = rfind(linktext ~= "" and linktext or link, "^[AEIOUaeiou]")
	else
		is_vowel = rfind(text, "^[AEIOUaeiou]")
	end
	return (is_vowel and (uppercase and "An " or "an ") or (uppercase and "A " or "a ")) .. text
end

return export

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.