379 lines
13 KiB
Python
379 lines
13 KiB
Python
from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline
|
||
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead
|
||
from panflute import TableRow, TableCell, Caption, Doc
|
||
from panflute import ListContainer, Element
|
||
from typing import Union
|
||
|
||
import os
|
||
import io
|
||
import warnings
|
||
|
||
from pygments import highlight
|
||
from pygments.lexers import get_lexer_by_name
|
||
from pygments.formatters import HtmlFormatter
|
||
from pygments.util import ClassNotFound
|
||
|
||
from .whitespace import NBSP
|
||
from .context import Group, BlockGroup, InlineGroup
|
||
from .output_generator import OutputGenerator
|
||
from .katex import KatexClient
|
||
from .images import ImageProcessor, ImageProcessorNamespaceSearcher
|
||
from .util import inlinify
|
||
from .elements import FileLink
|
||
|
||
|
||
class HTMLGenerator(OutputGenerator):
|
||
imageProcessor: ImageProcessor
|
||
katexClient: KatexClient
|
||
|
||
def __init__(self, output_file, katexClient: KatexClient, imageProcessor: ImageProcessor, *args, **kwargs):
|
||
self.katexClient = katexClient
|
||
self.imageProcessor = imageProcessor
|
||
super().__init__(output_file, *args, **kwargs)
|
||
|
||
def generate(self, e: Union[Element, ListContainer]):
|
||
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
|
||
return
|
||
super().generate(e)
|
||
|
||
def escape_special_chars(self, text: str) -> str:
|
||
text = text.replace("&", "&")
|
||
text = text.replace("<", "<")
|
||
text = text.replace(">", ">")
|
||
text = text.replace("\"", """)
|
||
text = text.replace("'", "'")
|
||
# text = text.replace(" ", " ") # Don't replace no-break spaces with HTML escapes, because we trust unicode?
|
||
return text
|
||
|
||
def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
|
||
words = [tag]
|
||
for key, value in attributes.items():
|
||
if value is not None:
|
||
words.append(f"{key}=\"{self.escape_special_chars(value)}\"")
|
||
return "<" + " ".join(words) + ">"
|
||
|
||
def end_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
|
||
return "</" + tag + ">"
|
||
|
||
def single_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
|
||
return self.start_tag(tag, attributes)
|
||
|
||
def tagname(self, e) -> str:
|
||
if isinstance(e, Header):
|
||
return "h" + str(e.level)
|
||
try:
|
||
return {
|
||
BulletList: "ul",
|
||
Doc: "main",
|
||
Emph: "em",
|
||
Caption: "figcaption",
|
||
Para: "p",
|
||
LineBlock: "p",
|
||
LineBreak: "br",
|
||
Link: "a",
|
||
ListItem: "li",
|
||
HorizontalRule: "hr",
|
||
OrderedList: "ol",
|
||
SmallCaps: "span",
|
||
Strikeout: "strike",
|
||
Subscript: "sub",
|
||
Superscript: "sup",
|
||
Underline: "u",
|
||
TableBody: "tbody",
|
||
TableHead: "thead",
|
||
TableFoot: "tfoot",
|
||
TableRow: "tr",
|
||
TableCell: "td",
|
||
InlineGroup: "span",
|
||
BlockGroup: "div"
|
||
}[type(e)]
|
||
except KeyError:
|
||
return type(e).__name__.lower()
|
||
|
||
def common_attributes(self, e) -> dict[str,str]:
|
||
attributes = {}
|
||
if hasattr(e, "identifier") and e.identifier != "":
|
||
attributes["id"] = e.identifier
|
||
if hasattr(e, "classes") and len(e.classes) != 0:
|
||
attributes["class"] = " ".join(e.classes)
|
||
return attributes
|
||
|
||
def generate_NBSP(self, e: NBSP):
|
||
self.write(" ") # Unicode no-break space, because we trust unicode?
|
||
|
||
def generate_Null(self, e: Null):
|
||
pass
|
||
|
||
#def generate_Doc(self, e: Doc):
|
||
# formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
|
||
# self.generate_simple_tag(tag="style", attributes={}, content=formatter.get_style_defs(".highlight"))
|
||
#self.generate_simple_tag(e, tag="main")
|
||
|
||
def generate_CodeBlock(self, e: CodeBlock):
|
||
lexer = None
|
||
if e.classes and len(e.classes) > 0 and (e.attributes.get("highlight", False) in [True, 'True']):
|
||
# Syntax highlighting using pygments
|
||
for cl in e.classes:
|
||
try:
|
||
lexer = get_lexer_by_name(cl)
|
||
except ClassNotFound:
|
||
continue
|
||
break
|
||
else:
|
||
warnings.warn(f"Syntax highligher does not have lexer for element with these classes: {e.classes}", UserWarning)
|
||
|
||
if lexer:
|
||
formatter = HtmlFormatter(style=e.attributes.get("style", self.context.get_metadata("highlight-style")), noclasses=True)
|
||
result = highlight(e.text, lexer, formatter)
|
||
self.writeraw(result)
|
||
else:
|
||
e.text = self.escape_special_chars(e.text)
|
||
self.generate_simple_tag(e, tag="pre")
|
||
|
||
def generate_Code(self, e: Code):
|
||
e.text = self.escape_special_chars(e.text)
|
||
self.generate_simple_tag(e)
|
||
|
||
def generate_Image(self, e: Image):
|
||
url = e.url
|
||
|
||
additional_args = self.get_image_processor_args(e.attributes)
|
||
additional_args["context"] = self.context
|
||
|
||
# The directory of the current file relative to the current working directory
|
||
source_dir = self.context.dir
|
||
# The directory of the current file relative to the md file we were called on
|
||
rel_dir = self.context.rel_dir
|
||
|
||
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
|
||
url = self.imageProcessor.get_path_without_namespace(url)
|
||
|
||
_, ext = os.path.splitext(url)
|
||
ext = ext[1:]
|
||
|
||
# Conversions between various formats.
|
||
if ext in ["svg", "png", "jpeg", "gif"]:
|
||
# Even supported elements have to be 'converted' because the
|
||
# processing contains finding and moving them to the output
|
||
# directory.
|
||
url = self.imageProcessor.process_image(url, ext, searcher, **additional_args)
|
||
elif ext in ["pdf", "epdf","asy"]:
|
||
# Only relevant for when these were PNGs, leaving this here for future reference.
|
||
# if not "dpi" in additional_args:
|
||
# additional_args["dpi"] = 300
|
||
url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args)
|
||
elif ext in ["jpg"]:
|
||
url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args)
|
||
else:
|
||
url = self.imageProcessor.process_image(url, "png", searcher, **additional_args)
|
||
|
||
# Srcset generation - multiple alternative sizes of images browsers can
|
||
# choose from.
|
||
_, ext = os.path.splitext(url)
|
||
ext = ext[1:]
|
||
srcset = []
|
||
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
|
||
# This is inspired by @vojta001's blogPhoto shortcode he made for
|
||
# patek.cz:
|
||
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
|
||
width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir()))
|
||
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
|
||
for size in sizes:
|
||
if width <= size[0] and height <= size[1]:
|
||
srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w'))
|
||
break
|
||
quality = size[2] if ext == "jpeg" else None
|
||
cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality)
|
||
searcher.publish_image(cache_img)
|
||
srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w'))
|
||
|
||
searcher.publish_image(url)
|
||
url = searcher.get_web_path() + "/" + url
|
||
|
||
attributes = self.common_attributes(e)
|
||
if "width" in e.attributes:
|
||
attributes["width"] = e.attributes["width"]
|
||
if "height" in e.attributes:
|
||
attributes["height"] = e.attributes["height"]
|
||
if "title" in e.attributes:
|
||
attributes["title"] = e.attributes["title"]
|
||
|
||
if e.title:
|
||
attributes["alt"] = e.title
|
||
else:
|
||
fake_out = io.StringIO()
|
||
HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content)
|
||
attributes["alt"] = fake_out.getvalue()
|
||
|
||
if len(srcset) > 1:
|
||
attributes["src"] = srcset[-1][0]
|
||
attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])
|
||
else:
|
||
attributes["src"] = url
|
||
|
||
if e.attributes["no-img-link"]:
|
||
self.write(self.single_tag("img", attributes))
|
||
return
|
||
|
||
img = RawInline(self.single_tag("img", attributes))
|
||
link = Link(img, url=url)
|
||
|
||
self.generate(link)
|
||
|
||
def generate_FileLink(self, e: FileLink):
|
||
url = e.url
|
||
|
||
# The directory of the current file relative to the current working directory
|
||
source_dir = self.context.dir
|
||
# The directory of the current file relative to the md file we were called on
|
||
rel_dir = self.context.rel_dir
|
||
|
||
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
|
||
url = self.imageProcessor.get_path_without_namespace(url)
|
||
|
||
url = self.imageProcessor.process_image(url, "", searcher, self.context)
|
||
searcher.publish_image(url)
|
||
url = searcher.get_web_path() + "/" + url
|
||
|
||
self.generate_Link(Link(*e.content, url=url))
|
||
|
||
def generate_InlineGroup(self, e: InlineGroup):
|
||
self.generate_Group(e)
|
||
|
||
def generate_BlockGroup(self, e: BlockGroup):
|
||
self.generate_Group(e)
|
||
|
||
def generate_Group(self, e: Group):
|
||
self.katexClient.begingroup()
|
||
self.generate_simple_tag(e, attributes=self.common_attributes(e) | {"lang":self.context.get_metadata("lang")})
|
||
self.katexClient.endgroup()
|
||
|
||
def generate_Plain(self, e: Plain):
|
||
self.generate(e.content)
|
||
|
||
def generate_LineItem(self, e: LineItem):
|
||
self.generate(e.content)
|
||
self.write("<br>")
|
||
self.endln()
|
||
|
||
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))
|
||
def generate_Note(self, e: Note):
|
||
inline = inlinify(e)
|
||
tag = self.tagname(e)
|
||
if inline is not None:
|
||
self.write(self.start_tag(tag)+" (")
|
||
self.generate(inline)
|
||
self.write(") "+self.end_tag(tag))
|
||
else:
|
||
self.writeln(self.start_tag(tag) + "(")
|
||
self.indent_more()
|
||
self.generate(e.content)
|
||
self.indent_less()
|
||
self.writeln(self.end_tag(tag) + ")")
|
||
|
||
def generate_Math(self, e: Math):
|
||
formats = {
|
||
"DisplayMath": True,
|
||
"InlineMath": False
|
||
}
|
||
rawhtml = self.katexClient.render(e.text, {"displayMode": formats[e.format]})
|
||
if (e.format == "InlineMath"):
|
||
self.write(rawhtml)
|
||
else:
|
||
self.writeraw(rawhtml)
|
||
|
||
def generate_RawInline(self, e: RawInline):
|
||
if e.format == "html":
|
||
self.write(e.text)
|
||
|
||
def generate_RawBlock(self, e: RawBlock):
|
||
if e.format == "html":
|
||
self.writeraw(e.text)
|
||
|
||
def generate_Link(self, e: Link):
|
||
attributes = {}
|
||
attributes["href"] = e.url
|
||
if e.title:
|
||
attributes["title"] = e.title
|
||
self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)
|
||
|
||
def generate_OrderedList(self, e: OrderedList):
|
||
attributes = {}
|
||
if e.start and e.start != 1:
|
||
attributes["start"] = str(e.start)
|
||
html_styles = {
|
||
"Decimal": "1",
|
||
"LowerRoman": "i",
|
||
"UpperRoman:": "I",
|
||
"LowerAlpha": "a",
|
||
"UpperAlpha": "A"
|
||
}
|
||
if e.style and e.style != "DefaultStyle":
|
||
attributes["type"] = html_styles[e.style]
|
||
# FIXME: Delimeter styles: 1. 1) (1)
|
||
self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)
|
||
|
||
def generate_TableCell(self, e: TableCell):
|
||
attributes = self.common_attributes(e)
|
||
if e.colspan != 1:
|
||
attributes["colspan"] = str(e.colspan)
|
||
if e.rowspan != 1:
|
||
attributes["rowspan"] = str(e.rowspan)
|
||
aligns = {
|
||
"AlignLeft": "left",
|
||
"AlignRight": "right",
|
||
"AlignCenter": "center"
|
||
}
|
||
if e.alignment and e.alignment != "AlignDefault":
|
||
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"
|
||
self.generate_simple_tag(e, attributes=attributes)
|
||
|
||
def generate_Cite(self, e: Cite):
|
||
self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"})
|
||
|
||
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
|
||
def generate_Citation(self, e: Citation):
|
||
self.writeln("<!-- FIXME: Citations not implemented -->")
|
||
|
||
def generate_Definition(self, e: Definition):
|
||
self.writeln("<!-- FIXME: Definitions not implemented -->")
|
||
|
||
def generate_DefinitionItem(self, e: DefinitionItem):
|
||
self.writeln("<!-- FIXME: DefinitionItems not implemented -->")
|
||
|
||
def generate_DefinitionList(self, e: DefinitionList):
|
||
self.writeln("<!-- FIXME: DefinitionLists not implemented -->")
|
||
|
||
def generate_SmallCaps(self, e: SmallCaps):
|
||
self.generate_simple_tag(e, attributes=self.common_attributes(e) | {"style": "font-variant: small-caps;"})
|
||
|
||
|
||
class StandaloneHTMLGenerator(HTMLGenerator):
|
||
def generate_Doc(self, e: Doc):
|
||
self.writeraw("<!DOCTYPE html>")
|
||
self.writeln(self.start_tag("html", attributes={"lang": e.get_metadata("lang", None, True)}))
|
||
self.writeln(self.start_tag("head"))
|
||
self.indent_more()
|
||
self.writeln(self.single_tag("meta", attributes={"charset": "utf-8"}))
|
||
self.writeln(self.single_tag("meta", attributes={"viewport": "width=device-width, initial-scale=1.0"}))
|
||
self.writeln(self.single_tag("link", attributes={"rel": "stylesheet", "href": "https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css", "integrity":"sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0", "crossorigin":"anonymous"}))
|
||
if "title" in e.metadata:
|
||
self.write(self.start_tag("title"))
|
||
self.generate(e.metadata["title"])
|
||
self.write(self.end_tag("title"))
|
||
self.endln()
|
||
if "html-head-includes" in e.metadata:
|
||
self.generate(e.metadata["html-head-includes"])
|
||
self.indent_less()
|
||
self.writeln(self.end_tag("head"))
|
||
self.writeln(self.start_tag("body"))
|
||
self.indent_more()
|
||
super().generate_Doc(e)
|
||
self.indent_less()
|
||
self.writeln(self.end_tag("body"))
|
||
self.writeln(self.end_tag("html"))
|
||
|
||
|
||
|
||
|
||
|