|
|
|
from panflute import Cite, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline
|
|
|
|
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead
|
|
|
|
from panflute import TableRow, TableCell, Caption, Doc
|
|
|
|
from panflute import ListContainer, Element
|
|
|
|
from typing import Union, Dict
|
|
|
|
|
|
|
|
import re
|
|
|
|
import os
|
|
|
|
import io
|
|
|
|
import warnings
|
|
|
|
|
|
|
|
from pygments import highlight
|
|
|
|
from pygments.lexers import get_lexer_by_name
|
|
|
|
from pygments.formatters import HtmlFormatter
|
|
|
|
from pygments.util import ClassNotFound
|
|
|
|
|
|
|
|
from .whitespace import NBSP
|
|
|
|
from .context import Group
|
|
|
|
from .output_generator import OutputGenerator
|
|
|
|
from .katex import KatexClient
|
|
|
|
from .images import ImageProcessor
|
|
|
|
from .util import inlinify
|
|
|
|
|
|
|
|
class HTMLGenerator(OutputGenerator):
|
|
|
|
def __init__(self, output_file, katexClient: KatexClient, imageProcessor: ImageProcessor, *args, **kwargs):
|
|
|
|
self.katexClient = katexClient
|
|
|
|
self.imageProcessor = imageProcessor
|
|
|
|
super().__init__(output_file, *args, **kwargs)
|
|
|
|
|
|
|
|
def generate(self, e: Union[Element, ListContainer]):
|
|
|
|
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
|
|
|
|
return
|
|
|
|
super().generate(e)
|
|
|
|
|
|
|
|
def htmlescapespecialchars(self, text: str) -> str:
|
|
|
|
text = re.sub(re.compile(r"&"), "&", text)
|
|
|
|
text = re.sub(re.compile(r"<"), "<", text)
|
|
|
|
text = re.sub(re.compile(r">"), "&rt;", text)
|
|
|
|
text = re.sub(re.compile(r"\""), """, text)
|
|
|
|
text = re.sub(re.compile(r"'"), "'", text)
|
|
|
|
# text = re.sub(re.compile(r" '), " ", text) # Don't replace no-break spaces with HTML escapes, because we trust unicode?
|
|
|
|
return text
|
|
|
|
|
|
|
|
def stag(self, tag: str, attributes: Dict[str,str]={}) -> str:
|
|
|
|
words = [tag]
|
|
|
|
for key, value in attributes.items():
|
|
|
|
words.append(f"{key}=\"{self.htmlescapespecialchars(value)}\"")
|
|
|
|
return "<" + " ".join(words) + ">"
|
|
|
|
|
|
|
|
def etag(self, tag: str, attributes: Dict[str,str]={}) -> str:
|
|
|
|
return "</" + tag + ">"
|
|
|
|
|
|
|
|
def ntag(self, tag: str, attributes: Dict[str,str]={}) -> str:
|
|
|
|
return self.stag(tag, attributes)
|
|
|
|
|
|
|
|
def tagname(self, e) -> str:
|
|
|
|
if isinstance(e, Header):
|
|
|
|
return "h" + str(e.level)
|
|
|
|
try:
|
|
|
|
return {
|
|
|
|
BulletList: "ul",
|
|
|
|
Doc: "main",
|
|
|
|
Emph: "em",
|
|
|
|
Caption: "figcaption",
|
|
|
|
Para: "p",
|
|
|
|
LineBlock: "p",
|
|
|
|
LineBreak: "br",
|
|
|
|
Link: "a",
|
|
|
|
ListItem: "li",
|
|
|
|
HorizontalRule: "hr",
|
|
|
|
OrderedList: "ol",
|
|
|
|
SmallCaps: "span",
|
|
|
|
Strikeout: "strike",
|
|
|
|
Subscript: "sub",
|
|
|
|
Superscript: "sup",
|
|
|
|
Underline: "u",
|
|
|
|
TableBody: "tbody",
|
|
|
|
TableHead: "thead",
|
|
|
|
TableFoot: "tfoot",
|
|
|
|
TableRow: "tr",
|
|
|
|
TableCell: "td",
|
|
|
|
}[type(e)]
|
|
|
|
except KeyError:
|
|
|
|
return type(e).__name__.lower()
|
|
|
|
|
|
|
|
def common_attributes(self, e) -> Dict[str,str]:
|
|
|
|
attributes = {}
|
|
|
|
if hasattr(e, "identifier") and e.identifier != "":
|
|
|
|
attributes["id"] = e.identifier
|
|
|
|
if hasattr(e, "classes") and len(e.classes) != 0:
|
|
|
|
attributes["class"] = " ".join(e.classes)
|
|
|
|
return attributes
|
|
|
|
|
|
|
|
def generate_Str(self, e: Str):
|
|
|
|
self.write(self.htmlescapespecialchars(e.text))
|
|
|
|
|
|
|
|
def generate_NBSP(self, e: NBSP):
|
|
|
|
self.write(" ") # Unicode no-break space, because we trust unicode?
|
|
|
|
|
|
|
|
def generate_Null(self, e: Null):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def generate_Doc(self, e: Doc):
|
|
|
|
formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
|
|
|
|
self.writeln("<style>")
|
|
|
|
self.writeraw(formatter.get_style_defs(".highlight"))
|
|
|
|
self.writeln("</style>")
|
|
|
|
self.generate_simple_block_tag(e, "main", self.common_attributes(e))
|
|
|
|
|
|
|
|
def generate_CodeBlock(self, e: CodeBlock):
|
|
|
|
lexer = None
|
|
|
|
if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):
|
|
|
|
# Syntax highlighting using pygments
|
|
|
|
for cl in e.classes:
|
|
|
|
try:
|
|
|
|
lexer = get_lexer_by_name(cl)
|
|
|
|
except ClassNotFound:
|
|
|
|
continue
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
warnings.warn(f"Syntax highligher does not have lexer for element with these classes: {e.classes}", UserWarning)
|
|
|
|
|
|
|
|
if lexer:
|
|
|
|
formatter = HtmlFormatter(style=e.attributes["style"])
|
|
|
|
result = highlight(e.text, lexer, formatter)
|
|
|
|
self.writeraw(result)
|
|
|
|
else:
|
|
|
|
self.generate_raw_block_tag(e, "pre", self.common_attributes(e))
|
|
|
|
|
|
|
|
def generate_Image(self, e: Image):
|
|
|
|
url = e.url
|
|
|
|
|
|
|
|
additional_args = self.get_image_processor_args(e.attributes)
|
|
|
|
|
|
|
|
# The directory of the current file, will also look for images there.
|
|
|
|
source_dir = e.attributes["source_dir"]
|
|
|
|
|
|
|
|
_, ext = os.path.splitext(url)
|
|
|
|
ext = ext[1:]
|
|
|
|
|
|
|
|
# Conversions between various formats.
|
|
|
|
if ext in ["svg", "png", "jpeg", "gif"]:
|
|
|
|
# Even supported elements have to be 'converted' because the
|
|
|
|
# processing contains finding and moving them to the output
|
|
|
|
# directory.
|
|
|
|
url = self.imageProcessor.process_image(url, ext, source_dir, **additional_args)
|
|
|
|
elif ext in ["pdf", "epdf"]:
|
|
|
|
if not "dpi" in additional_args:
|
|
|
|
additional_args["dpi"] = 300
|
|
|
|
url = self.imageProcessor.process_image(url, "png", source_dir, **additional_args)
|
|
|
|
elif ext in ["jpg"]:
|
|
|
|
url = self.imageProcessor.process_image(url, "jpeg", source_dir, **additional_args)
|
|
|
|
else:
|
|
|
|
url = self.imageProcessor.process_image(url, "png", source_dir, **additional_args)
|
|
|
|
|
|
|
|
# Srcset generation - multiple alternative sizes of images browsers can
|
|
|
|
# choose from.
|
|
|
|
_, ext = os.path.splitext(url)
|
|
|
|
ext = ext[1:]
|
|
|
|
srcset = []
|
|
|
|
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
|
|
|
|
# This is inspired by @vojta001's blogPhoto shortcode he made for
|
|
|
|
# patek.cz:
|
|
|
|
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
|
|
|
|
width, height = self.imageProcessor.get_image_size(url, [self.imageProcessor.cache_dir])
|
|
|
|
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
|
|
|
|
for size in sizes:
|
|
|
|
if width <= size[0] and height <= size[1]:
|
|
|
|
srcset.append((f'{self.imageProcessor.web_path}/{url}', f'{width}w'))
|
|
|
|
break
|
|
|
|
quality = size[2] if ext == "jpeg" else None
|
|
|
|
cache_img = self.imageProcessor.process_image(url, ext, self.imageProcessor.cache_dir, width=size[0], height=size[1], quality=quality)
|
|
|
|
self.imageProcessor.publish_image(cache_img)
|
|
|
|
srcset.append((f'{self.imageProcessor.web_path}/{cache_img}', f'{size[0]}w'))
|
|
|
|
|
|
|
|
self.imageProcessor.publish_image(url)
|
|
|
|
url = self.imageProcessor.web_path + "/" + url
|
|
|
|
|
|
|
|
attributes = self.common_attributes(e)
|
|
|
|
if "width" in e.attributes:
|
|
|
|
attributes["width"] = e.attributes["width"]
|
|
|
|
|
|
|
|
if e.title:
|
|
|
|
attributes["alt"] = e.title
|
|
|
|
else:
|
|
|
|
fake_out = io.StringIO()
|
|
|
|
HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content)
|
|
|
|
attributes["alt"] = fake_out.getvalue()
|
|
|
|
|
|
|
|
if len(srcset) != 0:
|
|
|
|
attributes["src"] = srcset[-1][0]
|
|
|
|
attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])
|
|
|
|
else:
|
|
|
|
attributes["src"] = url
|
|
|
|
|
|
|
|
img = RawInline(self.ntag("img", attributes))
|
|
|
|
link = Link(img, url=url)
|
|
|
|
|
|
|
|
self.generate(link)
|
|
|
|
|
|
|
|
def generate_Group(self, e: Group):
|
|
|
|
self.katexClient.begingroup()
|
|
|
|
self.generate(e.content)
|
|
|
|
self.katexClient.endgroup()
|
|
|
|
|
|
|
|
def generate_Plain(self, e: Plain):
|
|
|
|
self.generate(e.content)
|
|
|
|
|
|
|
|
def generate_LineItem(self, e: LineItem):
|
|
|
|
self.generate(e.content)
|
|
|
|
self.write("<br>")
|
|
|
|
self.endln()
|
|
|
|
|
|
|
|
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))
|
|
|
|
def generate_Note(self, e: Note):
|
|
|
|
inline = inlinify(e)
|
|
|
|
tag = self.tagname(e)
|
|
|
|
if inline is not None:
|
|
|
|
self.write(self.stag(tag)+" (")
|
|
|
|
self.generate(inline)
|
|
|
|
self.write(") "+self.etag(tag))
|
|
|
|
else:
|
|
|
|
self.writeln(self.stag(tag) + "(")
|
|
|
|
self.iup()
|
|
|
|
self.generate(e.content)
|
|
|
|
self.ido()
|
|
|
|
self.writeln(self.etag(tag) + ")")
|
|
|
|
|
|
|
|
def generate_Math(self, e: Math):
|
|
|
|
formats = {
|
|
|
|
"DisplayMath": True,
|
|
|
|
"InlineMath": False
|
|
|
|
}
|
|
|
|
self.writeln(self.katexClient.render(e.text, {"displayMode": formats[e.format]}))
|
|
|
|
|
|
|
|
def generate_RawInline(self, e: RawInline):
|
|
|
|
if e.format == "html":
|
|
|
|
self.write(e.text)
|
|
|
|
|
|
|
|
def generate_RawBlock(self, e: RawBlock):
|
|
|
|
if e.format == "html":
|
|
|
|
self.writeraw(e.text)
|
|
|
|
|
|
|
|
def generate_Link(self, e: Link):
|
|
|
|
attributes = {}
|
|
|
|
attributes["href"] = e.url
|
|
|
|
if e.title:
|
|
|
|
attributes["title"] = e.title
|
|
|
|
self.generate_simple_inline_tag(e, self.tagname(e), self.common_attributes(e) | attributes)
|
|
|
|
|
|
|
|
def generate_OrderedList(self, e: OrderedList):
|
|
|
|
attributes = {}
|
|
|
|
if e.start and e.start != 1:
|
|
|
|
attributes["start"] = str(e.start)
|
|
|
|
html_styles = {
|
|
|
|
"Decimal": "1",
|
|
|
|
"LowerRoman": "i",
|
|
|
|
"UpperRoman:": "I",
|
|
|
|
"LowerAlpha": "a",
|
|
|
|
"UpperAlpha": "A"
|
|
|
|
}
|
|
|
|
if e.style and e.style != "DefaultStyle":
|
|
|
|
attributes["type"] = html_styles[e.style]
|
|
|
|
# FIXME: Delimeter styles: 1. 1) (1)
|
|
|
|
self.generate_simple_block_tag(e, self.tagname(e), self.common_attributes(e) | attributes)
|
|
|
|
|
|
|
|
def generate_TableCell(self, e: TableCell):
|
|
|
|
attributes = self.common_attributes(e)
|
|
|
|
if e.colspan != 1:
|
|
|
|
attributes["colspan"] = str(e.colspan)
|
|
|
|
if e.rowspan != 1:
|
|
|
|
attributes["rowspan"] = str(e.rowspan)
|
|
|
|
aligns = {
|
|
|
|
"AlignLeft": "left",
|
|
|
|
"AlignRight": "right",
|
|
|
|
"AlignCenter": "center"
|
|
|
|
}
|
|
|
|
if e.alignment and e.alignment != "AlignDefault":
|
|
|
|
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"
|
|
|
|
self.generate_simple_block_tag(e, self.tagname(e), attributes)
|
|
|
|
|
|
|
|
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
|
|
|
|
def generate_Citation(self, e: Citation):
|
|
|
|
self.writeln("<!-- FIXME: Citations not implemented -->")
|
|
|
|
|
|
|
|
def generate_Cite(self, e: Cite):
|
|
|
|
self.writeln("<!-- FIXME: Cites not implemented -->")
|
|
|
|
|
|
|
|
def generate_Definition(self, e: Definition):
|
|
|
|
self.writeln("<!-- FIXME: Definitions not implemented -->")
|
|
|
|
|
|
|
|
def generate_DefinitionItem(self, e: DefinitionItem):
|
|
|
|
self.writeln("<!-- FIXME: DefinitionItems not implemented -->")
|
|
|
|
|
|
|
|
def generate_DefinitionList(self, e: DefinitionList):
|
|
|
|
self.writeln("<!-- FIXME: DefinitionLists not implemented -->")
|