web-mini/web_mini/html.py
Ari Archer c4779f49a3
1.4.0 : improve minification, fix bugs
Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
2023-10-31 21:47:23 +02:00

80 lines
2.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""minify html"""
from __future__ import annotations
import re
from typing import List
from . import const, recache
html_fns: recache.ReCache = recache.ReCache()
HTML_TAG_RE: re.Pattern[str] = re.compile(r"<[^>]+>", re.I | re.M | re.S)
HTML_TAG_SPACE_RE: re.Pattern[str] = re.compile(r">\s+<")
HTML_TAG_BREAK_RE: re.Pattern[str] = re.compile(r"\s{2,}|[\r\n]")
@html_fns.recache(r"<!--.*?-->", re.S)
def html_remove_comments(pat: re.Pattern[str], html: str) -> str:
"""remove html comments"""
return pat.sub("", html)
def html_remove_type(html: str) -> str:
"""remove html `type` attr from <script> and <style>"""
return (
html.replace('<style type="text/css"', "<style")
.replace("<style type=text/css", "<style")
.replace('<script type="text/javascript"', "<script")
.replace("<script type=text/javascript", "<script")
)
@html_fns.recache(r"(?:" + "|".join(const.UNNEEDED_HTML_TAGS) + r")", re.I)
def html_remove_unneeded_tags(pat: re.Pattern[str], html: str) -> str:
"""remove optional html tags"""
return pat.sub("", html)
@html_fns.recache(r'([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)"')
def html_unquote_attrs(pat: re.Pattern[str], html: str) -> str:
"""remove all html quotes on attibutes if possible"""
return HTML_TAG_RE.sub(lambda m: pat.sub(r"\1=\2", m.group()), html)
@html_fns.recache(
r"(<\s*?(?:pre|code|textarea).*?>|<\s*?/\s*?(?:pre|code|textarea)\s*?>)",
re.I,
)
def html_remove_whitespace(pat: re.Pattern[str], html: str) -> str:
"""remove useless html whitespace ( does not minify whitespace-sensitive tags )"""
tags: int = 0
split: List[str] = [
HTML_TAG_SPACE_RE.sub("> <", tag) if idx % 2 == 1 else tag
for idx, tag in enumerate(pat.split(html))
]
for idx, tag in enumerate(split):
if idx % 2 == 0 and tags == 0:
split[idx] = HTML_TAG_BREAK_RE.sub(" ", tag)
elif (idx + 1) % 2 == 0:
tags += -1 if "/" in tag else 1
return "".join(split).strip()
def minify_html(html: str) -> str:
"""run all html stages"""
html_fns.compileall()
html = html_remove_comments(html)
html = html_remove_type(html)
html = html_remove_unneeded_tags(html)
html = html_remove_whitespace(html)
html = html_unquote_attrs(html)
return html