formatitko/html.py

262 lines
7.6 KiB
Python
Raw Normal View History

2023-02-02 02:50:33 +01:00
from panflute import *
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound
2023-02-06 01:00:45 +01:00
import os
2023-02-02 02:50:33 +01:00
from whitespace import NBSP
2023-02-02 16:39:18 +01:00
from transform import FQuoted
2023-02-03 14:54:16 +01:00
from katex import KatexClient
from util import inlinify
from group import Group
2023-02-06 01:00:45 +01:00
from images import ImageProcessor
2023-02-02 02:50:33 +01:00
2023-02-06 01:00:45 +01:00
def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
2023-02-02 02:50:33 +01:00
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
return ""
2023-02-02 16:39:18 +01:00
if isinstance(e, ListContainer):
2023-02-06 01:00:45 +01:00
return ''.join([html(child, k, i, indent_level, indent_str) for child in e])
2023-02-02 16:39:18 +01:00
2023-02-02 02:50:33 +01:00
tag = e.tag.lower()
attributes = ""
content_foot = ""
content_head = ""
if isinstance(e, Str):
return e.text.replace(" ", " ")
2023-02-02 16:39:18 +01:00
tags = {
BulletList: "ul",
Doc: "main",
Emph: "em",
Caption: "figcaption",
Para: "p",
Header: "h"+str(e.level) if hasattr(e, "level") else "",
LineBlock: "p",
ListItem: "li",
SmallCaps: "span",
Strikeout: "strike",
Subscript: "sub",
Superscript: "sup",
Underline: "u",
TableBody: "tbody",
TableHead: "thead",
TableFoot: "tfoot",
TableRow: "tr",
TableCell: "td",
}
if type(e) in tags:
tag = tags[type(e)]
not_implemented = {
Citation: True,
Cite: True,
Definition: True,
DefinitionItem: True,
DefinitionList: True
}
if type(e) in not_implemented:
return f'<!-- FIXME: {type(e)}s not implemented -->'
simple_string = {
NBSP: "&nbsp;",
Space: " ",
Null: "",
LineBreak: f"\n{indent_level*indent_str}<br>\n{indent_level*indent_str}",
SoftBreak: f" ",
2023-02-02 16:39:18 +01:00
HorizontalRule: f"{indent_level*indent_str}<hr>\n"
}
if type(e) in simple_string:
return simple_string[type(e)]
2023-02-02 02:50:33 +01:00
if hasattr(e, "identifier") and e.identifier != "":
attributes += f' id="{e.identifier}"'
if hasattr(e, "classes") and len(e.classes) != 0:
attributes += f' class="{" ".join(e.classes)}"'
# TODO: Pass attributes down to HTML too
2023-02-02 02:50:33 +01:00
if isinstance(e, CodeBlock):
if e.attributes["highlight"] == True or e.attributes["highlight"] == 'True':
for cl in e.classes:
try:
lexer = get_lexer_by_name(cl)
except ClassNotFound:
continue
break
formatter = HtmlFormatter(style=e.attributes["style"])
result = highlight(e.text, lexer, formatter)
style = formatter.get_style_defs(".highlight")
return f'<style>{style}</style>{result}'
else:
return f'<pre>{e.text}</pre>'
2023-02-02 02:50:33 +01:00
if isinstance(e, Figure):
2023-02-06 01:00:45 +01:00
content_foot = html(e.caption, k, i, indent_level+1, indent_str)
2023-02-02 02:50:33 +01:00
if isinstance(e, Caption):
tag = "figcaption"
if isinstance(e, Image):
2023-02-06 01:00:45 +01:00
url = e.url
source_dir = e.attributes["source_dir"]
2023-02-06 01:00:45 +01:00
_, ext = os.path.splitext(url)
ext = ext[1:]
if ext in ["svg", "png", "jpeg", "gif"]:
url = i.process_image(url, ext, source_dir)
2023-02-06 01:00:45 +01:00
elif ext in ["pdf", "epdf"]:
url = i.process_image(url, "png", source_dir, dpi=300)
2023-02-06 01:00:45 +01:00
elif ext in ["jpg"]:
url = i.process_image(url, "jpeg", source_dir)
2023-02-06 01:00:45 +01:00
else:
url = i.process_image(url, "png", source_dir)
_, ext = os.path.splitext(url)
ext = ext[1:]
srcset = []
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
# This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
width, height = i.get_image_size(url, [source_dir, i.public_dir])
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes:
if width <= size[0] and height <= size[1]:
srcset.append((f'{i.web_path}/{url}', f'{width}w'))
break
quality = size[2] if ext == "jpeg" else None
srcset.append((f'{i.web_path}/{i.process_image(url, ext, source_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
url = i.web_path + "/" + url
attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"'
if len(srcset) != 0:
return f'<a href="{url}"><img src="{srcset[-1][0]}" srcset="{", ".join([" ".join(src) for src in srcset])}"{attributes}></a>'
else:
return f'<img src="{url}"{attributes}>'
2023-02-02 02:50:33 +01:00
if isinstance(e, Header):
tag = "h"+str(e.level)
if isinstance(e, Link):
tag = "a"
attributes += f' href="{e.url}"'
2023-02-02 16:39:18 +01:00
if e.title:
2023-02-02 02:50:33 +01:00
attributes += f' title="{e.title}"'
2023-02-02 16:39:18 +01:00
if isinstance(e, LineItem):
2023-02-06 01:00:45 +01:00
return indent_level*indent_str + html(e.content, k, i) + "<br>\n"
2023-02-02 16:39:18 +01:00
if isinstance(e, Note):
content_head = "("
content_foot = ")"
if inlinify(e) is not None:
2023-02-06 01:00:45 +01:00
return f' <note>({html(inlinify(e), k, i, 0, "")})</note>'
2023-02-02 16:39:18 +01:00
if isinstance(e, OrderedList):
tag = "ol"
if e.start and e.start != 1:
attributes += f' start="{e.start}"'
html_styles = {
"Decimal": "1",
"LowerRoman": "i",
"UpperRoman:": "I",
"LowerAlpha": "a",
"UpperAlpha": "A"
}
if e.style and e.style != "DefaultStyle":
attributes += f' type="{html_styles[e.style]}"'
# FIXME: Delimeter styles
if isinstance(e, Table):
2023-02-06 01:00:45 +01:00
content_head = html(e.head, k, i, indent_level+1, indent_str)
content_foot = html(e.foot, k, i, indent_level+1, indent_str)
2023-02-02 16:39:18 +01:00
# FIXME: Fancy pandoc tables, using colspec
if isinstance(e, TableCell):
tag = "td"
if e.colspan != 1:
attributes += f' colspan="{e.colspan}"'
if e.rowspan != 1:
attributes += f' rowspan="{e.rowspan}"'
aligns = {
"AlignLeft": "left",
"AlignRight": "right",
"AlignCenter": "center"
}
if e.alignment and e.alignment != "AlignDefault":
attributes += f' style="text-align: {aligns[e.alignment]}"'
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
2023-02-06 01:00:45 +01:00
return f'{html(e.content, k, i, 0, "")}'
2023-02-02 16:39:18 +01:00
elif e.quote_type == "DoubleQuote":
2023-02-06 01:00:45 +01:00
return f'{html(e.content, k, i, 0, "")}'
2023-02-02 16:39:18 +01:00
elif e.style == "en":
if e.quote_type == "SingleQuote":
2023-02-06 01:00:45 +01:00
return f'{html(e.content, k, i, 0, "")}'
2023-02-02 16:39:18 +01:00
elif e.quote_type == "DoubleQuote":
2023-02-06 01:00:45 +01:00
return f'{html(e.content, k, i, 0, "")}'
2023-02-02 16:39:18 +01:00
else:
if e.quote_type == "SingleQuote":
2023-02-06 01:00:45 +01:00
return f'\'{html(e.content, k, i, 0, "")}\''
2023-02-02 16:39:18 +01:00
elif e.quote_type == "DoubleQuote":
2023-02-06 01:00:45 +01:00
return f'"{html(e.content, k, i, 0, "")}"'
2023-02-02 16:39:18 +01:00
else:
2023-02-06 01:00:45 +01:00
return f'"{html(e.content, k, i, 0, "")}"'
2023-02-02 02:50:33 +01:00
if isinstance(e, Group):
k.begingroup()
2023-02-06 01:00:45 +01:00
ret = html(e.content, k, i, indent_level, indent_str)
k.endgroup()
return ret
2023-02-02 02:50:33 +01:00
if isinstance(e, Math):
2023-02-03 14:54:16 +01:00
formats = {
"DisplayMath": True,
"InlineMath": False
}
# FIXME: Currently, all bits of math are isolated from each other, this
# means that \defs and and alike work only inside a single math block
# and are forgotten in the next one.
return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]})
2023-02-02 02:50:33 +01:00
if isinstance(e, RawInline):
if e.format == "html":
return e.text
else:
return ""
2023-02-02 02:50:33 +01:00
if isinstance(e, RawBlock):
if e.format == "html":
return f'{e.text}\n'
else:
return ""
2023-02-02 16:39:18 +01:00
2023-02-02 02:50:33 +01:00
if isinstance(e, Inline):
2023-02-06 01:00:45 +01:00
return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}</{tag}>'
2023-02-02 02:50:33 +01:00
out_str = ""
if not isinstance(e, Plain):
out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n"
out_str += content_head
if hasattr(e, "_content"):
if len(e.content) > 0 and isinstance(e.content[0], Inline):
out_str += (indent_level+1)*indent_str
2023-02-06 01:00:45 +01:00
out_str += html(e.content, k, i, indent_level+1, indent_str)
2023-02-02 02:50:33 +01:00
if hasattr(e, "text"):
out_str += e.text
out_str += f"{content_foot}\n"
if not isinstance(e, Plain):
out_str += f"{indent_level*indent_str}</{tag}>\n"
return out_str
2023-02-02 16:39:18 +01:00