Module:HTMLParse

Template-info.png Documentation

This module parses and manipulates HTML strings.

There is currently 4 functions:

  • pluralize
  • make_possessive
  • strip_formatting
  • capitalize


pluralize

Pluralizes an HTML string by inserting an "s" between the final alphabetic character and closing tag </

For example, <span>book</span> becomes <span>books</span>.

Notes:

  • Won't pluralize anything that already ends in "s".
  • Won't pluralize anything that doesn't lua pattern match %a</
  • If passing a template that uses a cargo query as the html parameter, be sure that the cargo query is marked with no html or this Module might not be able to expand the template correctly.

Usage

{{#invoke: HTMLParse | pluralize | html = (html string here) }}

Examples

wikitext result
{{#invoke: HTMLParse | pluralize | html = {{Qud text|&Cultra-light &brobot| notxml }} }}
ultra-light robots
{{#invoke: HTMLParse | pluralize | html = <span style="color:red">big </span><span style="color:blue">book</span> }}
big books

make_possessive

Identical to pluralize except that instead of inserting an s, make_possessive inserts an 's

strip_formatting

Strips HTML tags from the input string. For example, <span style="color:red;">help!</span> becomes help!.

Example:

{{#invoke: HTMLParse | strip_formatting | html = <span style="color: #d74200;">c</span><span style="color: #cfc041;">r</span><span style="color: #00c420;">a</span><span style="color: #0096ff;">y</span><span style="color: #da5bd6;">o</span><span style="color: #77bfcf;">n</span><span style="color: #FFFFFF;">s</span> }}

Result:

crayons

capitalize

Capitalizes the word while preserving HTML formatting. It only takes one argument.

{{#invoke: HTMLParse | capitalize | html = {{qud text|&Cbrinestalk &Gstalks}} }}

brinestalk stalks turns into: Brinestalk stalks


local p = {}
local conjugate = require'Module:Grammar/Conjugate'

function p.pluralize(frame, apostrophe)
    apostrophe = apostrophe or ''
    local htmlString = frame.args.html
    if string.find(htmlString, "{") then
        htmlString = frame:preprocess(htmlString)   --expand any templates that may have been passed to this module
    end
    prefix, postfix = htmlString:match'(.*%a)(</.*)'   --split string at the last alphabetic character that appears immediately before a closing html tag </...>
    if (prefix == nil or prefix == '' or postfix == nil or postfix == '') then
        return htmlString   --return unmodified string (couldn't find alpha character followed by closing HTML tag)
    else
      return (conjugate.pluralize(prefix, apostrophe) .. postfix)
    end
end

function p.make_possessive(frame)
    return p.pluralize(frame, "'")
end

function p.capitalize(frame)
	local htmlString = frame.args.html
    if string.find(htmlString, "{") then
        htmlString = frame:preprocess(htmlString)   --expand any templates that may have been passed to this module
    end
    --split string at the first alphabetic character that appears immediately after a starting html tag <...>
    prefix = htmlString:match'([^/]*>)' or ''
    if (prefix == nil or prefix == '') then
    	return conjugate.capitalize(htmlString) 
    else
    	_, a = htmlString:find'([^/]*>)'
    	postfix = htmlString:sub(a + 1) or ''
    	mw.log('prefix: ' .. prefix)
    	mw.log('postfix: ' .. postfix)
    	return (prefix .. conjugate.capitalize(postfix))
    end
end

function p.strip_formatting(frame)
    local htmlString = frame.args.html
    htmlString = frame:preprocess(htmlString)
    return (htmlString:gsub("<[^>]->", ""))
end

return p