You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
379 lines
13 KiB
379 lines
13 KiB
from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline
|
|
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead
|
|
from panflute import TableRow, TableCell, Caption, Doc
|
|
from panflute import ListContainer, Element
|
|
from typing import Union
|
|
|
|
import os
|
|
import io
|
|
import warnings
|
|
|
|
from pygments import highlight
|
|
from pygments.lexers import get_lexer_by_name
|
|
from pygments.formatters import HtmlFormatter
|
|
from pygments.util import ClassNotFound
|
|
|
|
from .whitespace import NBSP
|
|
from .context import Group, BlockGroup, InlineGroup
|
|
from .output_generator import OutputGenerator
|
|
from .katex import KatexClient
|
|
from .images import ImageProcessor, ImageProcessorNamespaceSearcher
|
|
from .util import inlinify
|
|
from .elements import FileLink
|
|
|
|
|
|
class HTMLGenerator(OutputGenerator):
|
|
imageProcessor: ImageProcessor
|
|
katexClient: KatexClient
|
|
|
|
def __init__(self, output_file, katexClient: KatexClient, imageProcessor: ImageProcessor, *args, **kwargs):
|
|
self.katexClient = katexClient
|
|
self.imageProcessor = imageProcessor
|
|
super().__init__(output_file, *args, **kwargs)
|
|
|
|
def generate(self, e: Union[Element, ListContainer]):
|
|
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
|
|
return
|
|
super().generate(e)
|
|
|
|
def escape_special_chars(self, text: str) -> str:
|
|
text = text.replace("&", "&")
|
|
text = text.replace("<", "<")
|
|
text = text.replace(">", ">")
|
|
text = text.replace("\"", """)
|
|
text = text.replace("'", "'")
|
|
# text = text.replace(" ", " ") # Don't replace no-break spaces with HTML escapes, because we trust unicode?
|
|
return text
|
|
|
|
def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
|
|
words = [tag]
|
|
for key, value in attributes.items():
|
|
if value is not None:
|
|
words.append(f"{key}=\"{self.escape_special_chars(value)}\"")
|
|
return "<" + " ".join(words) + ">"
|
|
|
|
def end_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
|
|
return "</" + tag + ">"
|
|
|
|
def single_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
|
|
return self.start_tag(tag, attributes)
|
|
|
|
def tagname(self, e) -> str:
|
|
if isinstance(e, Header):
|
|
return "h" + str(e.level)
|
|
try:
|
|
return {
|
|
BulletList: "ul",
|
|
Doc: "main",
|
|
Emph: "em",
|
|
Caption: "figcaption",
|
|
Para: "p",
|
|
LineBlock: "p",
|
|
LineBreak: "br",
|
|
Link: "a",
|
|
ListItem: "li",
|
|
HorizontalRule: "hr",
|
|
OrderedList: "ol",
|
|
SmallCaps: "span",
|
|
Strikeout: "strike",
|
|
Subscript: "sub",
|
|
Superscript: "sup",
|
|
Underline: "u",
|
|
TableBody: "tbody",
|
|
TableHead: "thead",
|
|
TableFoot: "tfoot",
|
|
TableRow: "tr",
|
|
TableCell: "td",
|
|
InlineGroup: "span",
|
|
BlockGroup: "div"
|
|
}[type(e)]
|
|
except KeyError:
|
|
return type(e).__name__.lower()
|
|
|
|
def common_attributes(self, e) -> dict[str,str]:
|
|
attributes = {}
|
|
if hasattr(e, "identifier") and e.identifier != "":
|
|
attributes["id"] = e.identifier
|
|
if hasattr(e, "classes") and len(e.classes) != 0:
|
|
attributes["class"] = " ".join(e.classes)
|
|
return attributes
|
|
|
|
def generate_NBSP(self, e: NBSP):
|
|
self.write(" ") # Unicode no-break space, because we trust unicode?
|
|
|
|
def generate_Null(self, e: Null):
|
|
pass
|
|
|
|
#def generate_Doc(self, e: Doc):
|
|
# formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
|
|
# self.generate_simple_tag(tag="style", attributes={}, content=formatter.get_style_defs(".highlight"))
|
|
#self.generate_simple_tag(e, tag="main")
|
|
|
|
def generate_CodeBlock(self, e: CodeBlock):
|
|
lexer = None
|
|
if e.classes and len(e.classes) > 0 and (e.attributes.get("highlight", False) in [True, 'True']):
|
|
# Syntax highlighting using pygments
|
|
for cl in e.classes:
|
|
try:
|
|
lexer = get_lexer_by_name(cl)
|
|
except ClassNotFound:
|
|
continue
|
|
break
|
|
else:
|
|
warnings.warn(f"Syntax highligher does not have lexer for element with these classes: {e.classes}", UserWarning)
|
|
|
|
if lexer:
|
|
formatter = HtmlFormatter(style=e.attributes.get("style", self.context.get_metadata("highlight-style")), noclasses=True)
|
|
result = highlight(e.text, lexer, formatter)
|
|
self.writeraw(result)
|
|
else:
|
|
e.text = self.escape_special_chars(e.text)
|
|
self.generate_simple_tag(e, tag="pre")
|
|
|
|
def generate_Code(self, e: Code):
|
|
e.text = self.escape_special_chars(e.text)
|
|
self.generate_simple_tag(e)
|
|
|
|
def generate_Image(self, e: Image):
|
|
url = e.url
|
|
|
|
additional_args = self.get_image_processor_args(e.attributes)
|
|
additional_args["context"] = self.context
|
|
|
|
# The directory of the current file relative to the current working directory
|
|
source_dir = self.context.dir
|
|
# The directory of the current file relative to the md file we were called on
|
|
rel_dir = self.context.rel_dir
|
|
|
|
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
|
|
url = self.imageProcessor.get_path_without_namespace(url)
|
|
|
|
_, ext = os.path.splitext(url)
|
|
ext = ext[1:]
|
|
|
|
# Conversions between various formats.
|
|
if ext in ["svg", "png", "jpeg", "gif"]:
|
|
# Even supported elements have to be 'converted' because the
|
|
# processing contains finding and moving them to the output
|
|
# directory.
|
|
url = self.imageProcessor.process_image(url, ext, searcher, **additional_args)
|
|
elif ext in ["pdf", "epdf","asy"]:
|
|
# Only relevant for when these were PNGs, leaving this here for future reference.
|
|
# if not "dpi" in additional_args:
|
|
# additional_args["dpi"] = 300
|
|
url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args)
|
|
elif ext in ["jpg"]:
|
|
url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args)
|
|
else:
|
|
url = self.imageProcessor.process_image(url, "png", searcher, **additional_args)
|
|
|
|
# Srcset generation - multiple alternative sizes of images browsers can
|
|
# choose from.
|
|
_, ext = os.path.splitext(url)
|
|
ext = ext[1:]
|
|
srcset = []
|
|
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
|
|
# This is inspired by @vojta001's blogPhoto shortcode he made for
|
|
# patek.cz:
|
|
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
|
|
width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir()))
|
|
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
|
|
for size in sizes:
|
|
if width <= size[0] and height <= size[1]:
|
|
srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w'))
|
|
break
|
|
quality = size[2] if ext == "jpeg" else None
|
|
cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality)
|
|
searcher.publish_image(cache_img)
|
|
srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w'))
|
|
|
|
searcher.publish_image(url)
|
|
url = searcher.get_web_path() + "/" + url
|
|
|
|
attributes = self.common_attributes(e)
|
|
if "width" in e.attributes:
|
|
attributes["width"] = e.attributes["width"]
|
|
if "height" in e.attributes:
|
|
attributes["height"] = e.attributes["height"]
|
|
if "title" in e.attributes:
|
|
attributes["title"] = e.attributes["title"]
|
|
|
|
if e.title:
|
|
attributes["alt"] = e.title
|
|
else:
|
|
fake_out = io.StringIO()
|
|
HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content)
|
|
attributes["alt"] = fake_out.getvalue()
|
|
|
|
if len(srcset) > 1:
|
|
attributes["src"] = srcset[-1][0]
|
|
attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])
|
|
else:
|
|
attributes["src"] = url
|
|
|
|
if e.attributes["no-img-link"]:
|
|
self.write(self.single_tag("img", attributes))
|
|
return
|
|
|
|
img = RawInline(self.single_tag("img", attributes))
|
|
link = Link(img, url=url)
|
|
|
|
self.generate(link)
|
|
|
|
def generate_FileLink(self, e: FileLink):
|
|
url = e.url
|
|
|
|
# The directory of the current file relative to the current working directory
|
|
source_dir = self.context.dir
|
|
# The directory of the current file relative to the md file we were called on
|
|
rel_dir = self.context.rel_dir
|
|
|
|
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
|
|
url = self.imageProcessor.get_path_without_namespace(url)
|
|
|
|
url = self.imageProcessor.process_image(url, "", searcher, self.context)
|
|
searcher.publish_image(url)
|
|
url = searcher.get_web_path() + "/" + url
|
|
|
|
self.generate_Link(Link(*e.content, url=url))
|
|
|
|
def generate_InlineGroup(self, e: InlineGroup):
|
|
self.generate_Group(e)
|
|
|
|
def generate_BlockGroup(self, e: BlockGroup):
|
|
self.generate_Group(e)
|
|
|
|
def generate_Group(self, e: Group):
|
|
self.katexClient.begingroup()
|
|
self.generate_simple_tag(e, attributes=self.common_attributes(e) | {"lang":self.context.get_metadata("lang")})
|
|
self.katexClient.endgroup()
|
|
|
|
def generate_Plain(self, e: Plain):
|
|
self.generate(e.content)
|
|
|
|
def generate_LineItem(self, e: LineItem):
|
|
self.generate(e.content)
|
|
self.write("<br>")
|
|
self.endln()
|
|
|
|
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))
|
|
def generate_Note(self, e: Note):
|
|
inline = inlinify(e)
|
|
tag = self.tagname(e)
|
|
if inline is not None:
|
|
self.write(self.start_tag(tag)+" (")
|
|
self.generate(inline)
|
|
self.write(") "+self.end_tag(tag))
|
|
else:
|
|
self.writeln(self.start_tag(tag) + "(")
|
|
self.indent_more()
|
|
self.generate(e.content)
|
|
self.indent_less()
|
|
self.writeln(self.end_tag(tag) + ")")
|
|
|
|
def generate_Math(self, e: Math):
|
|
formats = {
|
|
"DisplayMath": True,
|
|
"InlineMath": False
|
|
}
|
|
rawhtml = self.katexClient.render(e.text, {"displayMode": formats[e.format]})
|
|
if (e.format == "InlineMath"):
|
|
self.write(rawhtml)
|
|
else:
|
|
self.writeraw(rawhtml)
|
|
|
|
def generate_RawInline(self, e: RawInline):
|
|
if e.format == "html":
|
|
self.write(e.text)
|
|
|
|
def generate_RawBlock(self, e: RawBlock):
|
|
if e.format == "html":
|
|
self.writeraw(e.text)
|
|
|
|
def generate_Link(self, e: Link):
|
|
attributes = {}
|
|
attributes["href"] = e.url
|
|
if e.title:
|
|
attributes["title"] = e.title
|
|
self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)
|
|
|
|
def generate_OrderedList(self, e: OrderedList):
|
|
attributes = {}
|
|
if e.start and e.start != 1:
|
|
attributes["start"] = str(e.start)
|
|
html_styles = {
|
|
"Decimal": "1",
|
|
"LowerRoman": "i",
|
|
"UpperRoman:": "I",
|
|
"LowerAlpha": "a",
|
|
"UpperAlpha": "A"
|
|
}
|
|
if e.style and e.style != "DefaultStyle":
|
|
attributes["type"] = html_styles[e.style]
|
|
# FIXME: Delimeter styles: 1. 1) (1)
|
|
self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)
|
|
|
|
def generate_TableCell(self, e: TableCell):
|
|
attributes = self.common_attributes(e)
|
|
if e.colspan != 1:
|
|
attributes["colspan"] = str(e.colspan)
|
|
if e.rowspan != 1:
|
|
attributes["rowspan"] = str(e.rowspan)
|
|
aligns = {
|
|
"AlignLeft": "left",
|
|
"AlignRight": "right",
|
|
"AlignCenter": "center"
|
|
}
|
|
if e.alignment and e.alignment != "AlignDefault":
|
|
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"
|
|
self.generate_simple_tag(e, attributes=attributes)
|
|
|
|
def generate_Cite(self, e: Cite):
|
|
self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"})
|
|
|
|
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
|
|
def generate_Citation(self, e: Citation):
|
|
self.writeln("<!-- FIXME: Citations not implemented -->")
|
|
|
|
def generate_Definition(self, e: Definition):
|
|
self.writeln("<!-- FIXME: Definitions not implemented -->")
|
|
|
|
def generate_DefinitionItem(self, e: DefinitionItem):
|
|
self.writeln("<!-- FIXME: DefinitionItems not implemented -->")
|
|
|
|
def generate_DefinitionList(self, e: DefinitionList):
|
|
self.writeln("<!-- FIXME: DefinitionLists not implemented -->")
|
|
|
|
def generate_SmallCaps(self, e: SmallCaps):
|
|
self.generate_simple_tag(e, attributes=self.common_attributes(e) | {"style": "font-variant: small-caps;"})
|
|
|
|
|
|
class StandaloneHTMLGenerator(HTMLGenerator):
|
|
def generate_Doc(self, e: Doc):
|
|
self.writeraw("<!DOCTYPE html>")
|
|
self.writeln(self.start_tag("html", attributes={"lang": e.get_metadata("lang", None, True)}))
|
|
self.writeln(self.start_tag("head"))
|
|
self.indent_more()
|
|
self.writeln(self.single_tag("meta", attributes={"charset": "utf-8"}))
|
|
self.writeln(self.single_tag("meta", attributes={"viewport": "width=device-width, initial-scale=1.0"}))
|
|
self.writeln(self.single_tag("link", attributes={"rel": "stylesheet", "href": "https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css", "integrity":"sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0", "crossorigin":"anonymous"}))
|
|
if "title" in e.metadata:
|
|
self.write(self.start_tag("title"))
|
|
self.generate(e.metadata["title"])
|
|
self.write(self.end_tag("title"))
|
|
self.endln()
|
|
if "html-head-includes" in e.metadata:
|
|
self.generate(e.metadata["html-head-includes"])
|
|
self.indent_less()
|
|
self.writeln(self.end_tag("head"))
|
|
self.writeln(self.start_tag("body"))
|
|
self.indent_more()
|
|
super().generate_Doc(e)
|
|
self.indent_less()
|
|
self.writeln(self.end_tag("body"))
|
|
self.writeln(self.end_tag("html"))
|
|
|
|
|
|
|
|
|
|
|
|
|