formatitko/src/formatitko/html_generator.py


								from panflute import Cite, Code, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline

								from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead

								from panflute import TableRow, TableCell, Caption, Doc

								from panflute import ListContainer, Element

								from typing import Union


								import os

								import io

								import warnings


								from pygments import highlight

								from pygments.lexers import get_lexer_by_name

								from pygments.formatters import HtmlFormatter

								from pygments.util import ClassNotFound


								from .whitespace import NBSP

								from .context import Group, BlockGroup, InlineGroup

								from .output_generator import OutputGenerator

								from .katex import KatexClient

								from .images import ImageProcessor, ImageProcessorNamespaceSearcher

								from .util import inlinify


								class HTMLGenerator(OutputGenerator):

									imageProcessor: ImageProcessor

									katexClient: KatexClient


									def __init__(self, output_file, katexClient: KatexClient, imageProcessor: ImageProcessor, *args, **kwargs):

										self.katexClient = katexClient

										self.imageProcessor = imageProcessor

										super().__init__(output_file, *args, **kwargs)


									def generate(self, e: Union[Element, ListContainer]):

										if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":

											return

										super().generate(e)


									def escape_special_chars(self, text: str) -> str:

										text = text.replace("&", "&amp;")

										text = text.replace("<", "&lt;")

										text = text.replace(">", "&gt;")

										text = text.replace("\"", "&quot;")

										text = text.replace("'", "&#39;")

										# text = text.replace(" ", "&nbsp;") # Don't replace no-break spaces with HTML escapes, because we trust unicode?

										return text


									def start_tag(self, tag: str, attributes: dict[str,str]={}) -> str:

										words = [tag]

										for key, value in attributes.items():

											if value is not None:

												words.append(f"{key}=\"{self.escape_special_chars(value)}\"")

										return "<" + " ".join(words) + ">"


									def end_tag(self, tag: str, attributes: dict[str,str]={}) -> str:

										return "</" + tag + ">"


									def single_tag(self, tag: str, attributes: dict[str,str]={}) -> str:

										return self.start_tag(tag, attributes)


									def tagname(self, e) -> str:

										if isinstance(e, Header):

											return "h" + str(e.level)

										try:

											return {

												BulletList: "ul",

												Doc: "main",

												Emph: "em",

												Caption: "figcaption",

												Para: "p",

												LineBlock: "p",

												LineBreak: "br",

												Link: "a",

												ListItem: "li",

												HorizontalRule: "hr",

												OrderedList: "ol",

												SmallCaps: "span",

												Strikeout: "strike",

												Subscript: "sub",

												Superscript: "sup",

												Underline: "u",

												TableBody: "tbody",

												TableHead: "thead",

												TableFoot: "tfoot",

												TableRow: "tr",

												TableCell: "td",

												InlineGroup: "span",

												BlockGroup: "div"

											}[type(e)]

										except KeyError:

											return type(e).__name__.lower()


									def common_attributes(self, e) -> dict[str,str]:

										attributes = {}

										if hasattr(e, "identifier") and e.identifier != "":

											attributes["id"] = e.identifier

										if hasattr(e, "classes") and len(e.classes) != 0:

											attributes["class"] = " ".join(e.classes)

										return attributes


									def generate_NBSP(self, e: NBSP):

										self.write(" ") # Unicode no-break space, because we trust unicode?


									def generate_Null(self, e: Null):

										pass


									#def generate_Doc(self, e: Doc):

										# formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")

										# self.generate_simple_tag(tag="style", attributes={}, content=formatter.get_style_defs(".highlight"))

										#self.generate_simple_tag(e, tag="main")


									def generate_CodeBlock(self, e: CodeBlock):

										lexer = None

										if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):

											# Syntax highlighting using pygments

											for cl in e.classes:

												try:

													lexer = get_lexer_by_name(cl)

												except ClassNotFound:

													continue

												break

											else:

												warnings.warn(f"Syntax highligher does not have lexer for element with these classes: {e.classes}", UserWarning)


										if lexer:

											formatter = HtmlFormatter(style=e.attributes["style"], noclasses=True)

											result = highlight(e.text, lexer, formatter)

											self.writeraw(result)

										else:

											e.text = self.escape_special_chars(e.text)

											self.generate_simple_tag(e, tag="pre")


									def generate_Code(self, e: Code):

										e.text = self.escape_special_chars(e.text)

										self.generate_simple_tag(e)


									def generate_Image(self, e: Image):

										url = e.url


										additional_args = self.get_image_processor_args(e.attributes)


										# The directory of the current file relative to the current working directory

										source_dir = self.context.dir

										# The directory of the current file relative to the md file we were called on

										rel_dir = self.context.rel_dir


										searcher = self.imageProcessor.get_searcher_by_path(url, rel_dir, source_dir)

										url = self.imageProcessor.get_path_without_namespace(url)


										_, ext = os.path.splitext(url)

										ext = ext[1:]


										# Conversions between various formats.

										if ext in ["svg", "png", "jpeg", "gif"]:

											# Even supported elements have to be 'converted' because the

											# processing contains finding and moving them to the output

											# directory.

											url = self.imageProcessor.process_image(url, ext, searcher, **additional_args)

										elif ext in ["pdf", "epdf","asy"]:

											# Only relevant for when these were PNGs, leaving this here for future reference.

											# if not "dpi" in additional_args:

											#	additional_args["dpi"] = 300

											url = self.imageProcessor.process_image(url, "svg", searcher, **additional_args)

										elif ext in ["jpg"]:

											url = self.imageProcessor.process_image(url, "jpeg", searcher, **additional_args)

										else:

											url = self.imageProcessor.process_image(url, "png", searcher, **additional_args)


										# Srcset generation - multiple alternative sizes of images browsers can

										# choose from.

										_, ext = os.path.splitext(url)

										ext = ext[1:]

										srcset = []

										if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):

											# This is inspired by @vojta001's blogPhoto shortcode he made for

											# patek.cz:

											# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html

											width, height = self.imageProcessor.get_image_size(searcher.find_image_in_dir(url, searcher.get_cache_dir()))

											sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)

											for size in sizes:

												if width <= size[0] and height <= size[1]:

													srcset.append((f'{searcher.get_web_path()}/{url}', f'{width}w'))

													break

												quality = size[2] if ext == "jpeg" else None

												cache_img = self.imageProcessor.process_image(url, ext, searcher.get_cache_searcher(), width=size[0], height=size[1], quality=quality)

												searcher.publish_image(cache_img)

												srcset.append((f'{searcher.get_web_path()}/{cache_img}', f'{size[0]}w'))


										searcher.publish_image(url)

										url = searcher.get_web_path() + "/" + url


										attributes = self.common_attributes(e)

										if "width" in e.attributes:

											attributes["width"] = e.attributes["width"]

										if "height" in e.attributes:

											attributes["height"] = e.attributes["height"]


										if e.title:

											attributes["alt"] = e.title

										else:

											fake_out = io.StringIO()

											HTMLGenerator(fake_out, self.katexClient, self.imageProcessor).generate(e.content)

											attributes["alt"] = fake_out.getvalue()


										if len(srcset) != 0:

											attributes["src"] = srcset[-1][0]

											attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])

										else:

											attributes["src"] = url


										img = RawInline(self.single_tag("img", attributes))

										link = Link(img, url=url)


										self.generate(link)


									def generate_InlineGroup(self, e: InlineGroup):

										self.generate_Group(e)


									def generate_BlockGroup(self, e: BlockGroup):

										self.generate_Group(e)


									def generate_Group(self, e: Group):

										self.katexClient.begingroup()

										self.generate_simple_tag(e, attributes=self.common_attributes(e) | {"lang":self.context.get_metadata("lang")})

										self.katexClient.endgroup()


									def generate_Plain(self, e: Plain):

										self.generate(e.content)


									def generate_LineItem(self, e: LineItem):

										self.generate(e.content)

										self.write("<br>")

										self.endln()


									# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))

									def generate_Note(self, e: Note):

										inline = inlinify(e)

										tag = self.tagname(e)

										if inline is not None:

											self.write(self.start_tag(tag)+" (")

											self.generate(inline)

											self.write(") "+self.end_tag(tag))

										else:

											self.writeln(self.start_tag(tag) + "(")

											self.indent_more()

											self.generate(e.content)

											self.indent_less()

											self.writeln(self.end_tag(tag) + ")")


									def generate_Math(self, e: Math):

										formats = {

											"DisplayMath": True,

											"InlineMath": False

										}

										rawhtml = self.katexClient.render(e.text, {"displayMode": formats[e.format]})

										if (e.format == "InlineMath"):

											self.write(rawhtml)

										else:

											self.writeraw(rawhtml)


									def generate_RawInline(self, e: RawInline):

										if e.format == "html":

											self.write(e.text)


									def generate_RawBlock(self, e: RawBlock):

										if e.format == "html":

											self.writeraw(e.text)


									def generate_Link(self, e: Link):

										attributes = {}

										attributes["href"] = e.url

										if e.title:

											attributes["title"] = e.title

										self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)


									def generate_OrderedList(self, e: OrderedList):

										attributes = {}

										if e.start and e.start != 1:

											attributes["start"] = str(e.start)

										html_styles = {

											"Decimal": "1",

											"LowerRoman": "i",

											"UpperRoman:": "I",

											"LowerAlpha": "a",

											"UpperAlpha": "A"

										}

										if e.style and e.style != "DefaultStyle":

											attributes["type"] = html_styles[e.style]

										# FIXME: Delimeter styles: 1. 1) (1)

										self.generate_simple_tag(e, attributes=self.common_attributes(e) | attributes)


									def generate_TableCell(self, e: TableCell):

										attributes = self.common_attributes(e)

										if e.colspan != 1:

											attributes["colspan"] = str(e.colspan)

										if e.rowspan != 1:

											attributes["rowspan"] = str(e.rowspan)

										aligns = {

											"AlignLeft": "left",

											"AlignRight": "right",

											"AlignCenter": "center"

										}

										if e.alignment and e.alignment != "AlignDefault":

											attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"

										self.generate_simple_tag(e, attributes=attributes)


									def generate_Cite(self, e: Cite):

										self.generate_simple_tag(e, tag="a", attributes=self.common_attributes(e) | {"href": f"#ref-{e.citations[0].id}"})


									# These are also disabled in pandoc so they shouldn't appear in the AST at all.

									def generate_Citation(self, e: Citation):

										self.writeln("<!-- FIXME: Citations not implemented -->")


									def generate_Definition(self, e: Definition):

										self.writeln("<!-- FIXME: Definitions not implemented -->")


									def generate_DefinitionItem(self, e: DefinitionItem):

										self.writeln("<!-- FIXME: DefinitionItems not implemented -->")


									def generate_DefinitionList(self, e: DefinitionList):

										self.writeln("<!-- FIXME: DefinitionLists not implemented -->")