You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

317 lines
11 KiB

from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead
from panflute import TableRow, TableCell, Caption, Doc
from panflute import ListContainer, Element
11 months ago
from typing import Union
import os
import io
import warnings
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound
from .whitespace import NBSP
from .context import Group, BlockGroup, InlineGroup
from .output_generator import OutputGenerator
from .katex import KatexClient
from .images import ImageProcessor, ImageProcessorNamespaceSearcher
from .util import inlinify
class HTMLGenerator(OutputGenerator):
imageProcessor: ImageProcessor
katexClient: KatexClient
def __init__(self, output_file, katexClient: KatexClient, imageProcessor: ImageProcessor, *args, **kwargs):
self.katexClient = katexClient
self.imageProcessor = imageProcessor
super().__init__(output_file, *args, **kwargs)
def generate(self, e: Union[Element, ListContainer]):
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
def escape_special_chars(self, text: str) -> str:
text = text.replace("&", "&")
text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;")
text = text.replace("\"", "&quot;")
text = text.replace("'", "&#39;")
# text = text.replace(" ", "&nbsp;") # Don't replace no-break spaces with HTML escapes, because we trust unicode?
return text
11 months ago
def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
words = [tag]
for key, value in attributes.items():
if value is not None:
return "<" + " ".join(words) + ">"
11 months ago
def end_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
return "</" + tag + ">"
11 months ago
def single_tag(self, tag: str, attributes: dict[str,str]={}) -> str:
return self.start_tag(tag, attributes)
def tagname(self, e) -> str:
if isinstance(e, Header):
return "h" + str(e.level)
return {
BulletList: "ul",
Doc: "main",
Emph: "em",
Caption: "figcaption",
Para: "p",
LineBlock: "p",
LineBreak: "br",
Link: "a",
ListItem: "li",
HorizontalRule: "hr",
OrderedList: "ol",
SmallCaps: "span",
Strikeout: "strike",
Subscript: "sub",
Superscript: "sup",
Underline: "u",
TableBody: "tbody",
TableHead: "thead",
TableFoot: "tfoot",
TableRow: "tr",
TableCell: "td",
InlineGroup: "span",
BlockGroup: "div"
except KeyError:
return type(e).__name__.lower()
11 months ago
def common_attributes(self, e) -> dict[str,str]:
attributes = {}
if hasattr(e, "identifier") and e.identifier != "":
attributes["id"] = e.identifier
if hasattr(e, "classes") and len(e.classes) != 0:
attributes["class"] = " ".join(e.classes)
return attributes
def generate_NBSP(self, e: NBSP):
self.write(" ") # Unicode no-break space, because we trust unicode?
def generate_Null(self, e: Null):
#def generate_Doc(self, e: Doc):
# formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
# self.generate_simple_tag(tag="style", attributes={}, content=formatter.get_style_defs(".highlight"))
#self.generate_simple_tag(e, tag="main")
def generate_CodeBlock(self, e: CodeBlock):
lexer = None
if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):
# Syntax highlighting using pygments
for cl in e.classes:
lexer = get_lexer_by_name(cl)
except ClassNotFound:
warnings.warn(f"Syntax highligher does not have lexer for element with these classes: {e.classes}", UserWarning)
if lexer:
formatter = HtmlFormatter(style=e.attributes["style"], noclasses=True)
result = highlight(e.text, lexer, formatter)
e.text = self.escape_special_chars(e.text)
self.generate_simple_tag(e, tag="pre")
def generate_Code(self, e: Code):
e.text = self.escape_special_chars(e.text)
def generate_Image(self, e: Image):
url = e.url
additional_args = self.get_image_processor_args(e.attributes)
# The directory of the current file relative to the current working directory
source_dir = self.context.dir
# The directory of the current file relative to the md file we were called on
rel_dir = self.context.rel_dir
searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)
_, ext = os.path.splitext(url)
ext = ext[1:]
# Conversions between various formats.
if ext in ["svg", "png", "jpeg", "gif"]:
# Even supported elements have to be 'converted' because the
# processing contains finding and moving them to the output
# directory.
url = self.imageProcessor.process_image(url, ext, searcher, **additional_args)
elif ext in ["pdf", "epdf","asy"]:
# Only relevant for when these were PNGs, leaving this here for future reference.
# if not "dpi" in additional_args:
# additional_args["dpi"] = 300
url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args)
elif ext in ["jpg"]:
url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args)
url = self.imageProcessor.process_image(url, "png", searcher, **additional_args)
# Srcset generation - multiple alternative sizes of images browsers can
# choose from.
_, ext = os.path.splitext(url)
ext = ext[1:]
srcset = []
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
# This is inspired by @vojta001's blogPhoto shortcode he made for
width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir()))
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes:
if width <= size[0] and height <= size[1]:
srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w'))
quality = size[2] if ext == "jpeg" else None
cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality)
srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w'))
url = searcher.get_web_path() + "/" + url
attributes = self.common_attributes(e)
if "width" in e.attributes:
attributes["width"] = e.attributes["width"]
if e.title:
attributes["alt"] = e.title
fake_out = io.StringIO()
HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content)
attributes["alt"] = fake_out.getvalue()
if len(srcset) != 0:
attributes["src"] = srcset[-1][0]
attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])
attributes["src"] = url
img = RawInline(self.single_tag("img", attributes))
link = Link(img, url=url)
def generate_InlineGroup(self, e: InlineGroup):
def generate_BlockGroup(self, e: BlockGroup):
def generate_Group(self, e: Group):
self.generate_simple_tag(e, attributes=self.common_attributes(e) | {"lang":self.context.get_metadata("lang")})
def generate_Plain(self, e: Plain):
def generate_LineItem(self, e: LineItem):
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))
def generate_Note(self, e: Note):
inline = inlinify(e)
tag = self.tagname(e)
if inline is not None:
self.write(self.start_tag(tag)+" (")
self.write(") "+self.end_tag(tag))
self.writeln(self.start_tag(tag) + "(")
self.writeln(self.end_tag(tag) + ")")
def generate_Math(self, e: Math):
formats = {
"DisplayMath": True,
"InlineMath": False
rawhtml = self.katexClient.render(e.text, {"displayMode": formats[e.format]})
if (e.format == "InlineMath"):
def generate_RawInline(self, e: RawInline):
if e.format == "html":
def generate_RawBlock(self, e: RawBlock):
if e.format == "html":
def generate_Link(self, e: Link):
attributes = {}
attributes["href"] = e.url
if e.title:
attributes["title"] = e.title
self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)
def generate_OrderedList(self, e: OrderedList):
attributes = {}
if e.start and e.start != 1:
attributes["start"] = str(e.start)
html_styles = {
"Decimal": "1",
"LowerRoman": "i",
"UpperRoman:": "I",
"LowerAlpha": "a",
"UpperAlpha": "A"
if and != "DefaultStyle":
attributes["type"] = html_styles[]
# FIXME: Delimeter styles: 1. 1) (1)
self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)
def generate_TableCell(self, e: TableCell):
attributes = self.common_attributes(e)
if e.colspan != 1:
attributes["colspan"] = str(e.colspan)
if e.rowspan != 1:
attributes["rowspan"] = str(e.rowspan)
aligns = {
"AlignLeft": "left",
"AlignRight": "right",
"AlignCenter": "center"
if e.alignment and e.alignment != "AlignDefault":
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"
self.generate_simple_tag(e, attributes=attributes)
def generate_Cite(self, e: Cite):
self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"})
# These are also disabled in pandoc so they shouldn't appear in the AST at all.
def generate_Citation(self, e: Citation):
self.writeln("<!-- FIXME: Citations not implemented -->")
def generate_Definition(self, e: Definition):
self.writeln("<!-- FIXME: Definitions not implemented -->")
def generate_DefinitionItem(self, e: DefinitionItem):
self.writeln("<!-- FIXME: DefinitionItems not implemented -->")
def generate_DefinitionList(self, e: DefinitionList):
self.writeln("<!-- FIXME: DefinitionLists not implemented -->")