formatitko/src/formatitko/html.py

from panflute import *
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound
import os

from .whitespace import NBSP
from .transform import FQuoted
from .katex import KatexClient
from .util import inlinify
from .context import Group
from .images import ImageProcessor

def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:

	# `only` attribute which makes transformed elements appear only in tex
	# output or html output
	if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
		return ""

	if isinstance(e, ListContainer):
		return ''.join([html(child, k, i, indent_level, indent_str) for child in e])

	# Bits from which the final element output is built at the end of this
	# function. Most elements override this by returning their own output.
	tag = e.tag.lower()
	attributes = ""
	content_foot = ""
	content_head = ""

	if isinstance(e, Str):
		return e.text.replace(" ", "&nbsp;")

	# Most elements fit the general template at the end of the function, just
	# need their html tag specified.
	tags = {
		BulletList: "ul",
		Doc: "main",
		Emph: "em",
		Caption: "figcaption",
		Para: "p",
		Header: "h"+str(e.level) if hasattr(e, "level") else "",
		LineBlock: "p",
		ListItem: "li",
		SmallCaps: "span",
		Strikeout: "strike",
		Subscript: "sub",
		Superscript: "sup",
		Underline: "u",
		TableBody: "tbody",
		TableHead: "thead",
		TableFoot: "tfoot",
		TableRow: "tr",
		TableCell: "td",
	}
	if type(e) in tags:
		tag = tags[type(e)]

	# These are also disabled in pandoc so they shouldn't appear in the AST at all.
	not_implemented = {
		Citation: True,
		Cite: True,
		Definition: True,
		DefinitionItem: True,
		DefinitionList: True
	}
	if type(e) in not_implemented:
		return f'<!-- FIXME: {type(e)}s not implemented -->'

	# Elements which can be represented by a simple string
	simple_string = {
		NBSP: "&nbsp;",
		Space: " ",
		Null: "",
		LineBreak: f"\n{indent_level*indent_str}<br>\n{indent_level*indent_str}",
		SoftBreak: f" ",
		HorizontalRule: f"{indent_level*indent_str}<hr>\n"
	}
	if type(e) in simple_string:
		return simple_string[type(e)]

	if hasattr(e, "identifier") and e.identifier != "":
		attributes += f' id="{e.identifier}"'

	if hasattr(e, "classes") and len(e.classes) != 0:
		attributes += f' class="{" ".join(e.classes)}"'

	# Attributes are only passed down manually, because we use them internally.
	# Maybe this should be a blocklist instead of an allowlist?

	# Overriding elements with their own returns
	if isinstance(e, CodeBlock):
		if len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):
			# Syntax highlighting using pygments
			for cl in e.classes:
				try:
					lexer = get_lexer_by_name(cl)
				except ClassNotFound:
					continue
				break
			else:
				print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}")
			formatter = HtmlFormatter(style=e.attributes["style"])
			result = highlight(e.text, lexer, formatter)
			return f'{result}'
		else:
			return f'<pre>{e.text}</pre>'

	if isinstance(e, Doc):
		formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
		content_head = f'<style>{formatter.get_style_defs(".highlight")}</style>'

	if isinstance(e, Image):
		url = e.url

		# Attributes → image processor args
		additional_args = {}
		if "file-width" in e.attributes:
			additional_args["width"] = int(e.attributes["file-width"])
		if "file-height" in e.attributes:
			additional_args["height"] = int(e.attributes["file-height"])
		if "file-quality" in e.attributes:
			additional_args["quality"] = int(e.attributes["file-quality"])
		if "file-dpi" in e.attributes:
			additional_args["dpi"] = int(e.attributes["file-dpi"])

		# The directory of the current file, will also look for images there.
		source_dir = e.attributes["source_dir"]

		_, ext = os.path.splitext(url)
		ext = ext[1:]

		# Conversions between various formats.
		if ext in ["svg", "png", "jpeg", "gif"]:
			# Even supported elements have to be 'converted' because the
			# processing contains finding and moving them to the output
			# directory.
			url = i.process_image(url, ext, source_dir, **additional_args)
		elif ext in ["pdf", "epdf"]:
			if not "dpi" in additional_args:
				additional_args["dpi"] = 300
			url = i.process_image(url, "png", source_dir, **additional_args)
		elif ext in ["jpg"]:
			url = i.process_image(url, "jpeg", source_dir, **additional_args)
		else:
			url = i.process_image(url, "png", source_dir, **additional_args)

		# Srcset generation - multiple alternative sizes of images browsers can
		# choose from.
		_, ext = os.path.splitext(url)
		ext = ext[1:]
		srcset = []
		if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
			# This is inspired by @vojta001's blogPhoto shortcode he made for
			# patek.cz:
			# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
			width, height = i.get_image_size(url, [i.public_dir])
			sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
			for size in sizes:
				if width <= size[0] and height <= size[1]:
					srcset.append((f'{i.web_path}/{url}', f'{width}w'))
					break
				quality = size[2] if ext == "jpeg" else None
				srcset.append((f'{i.web_path}/{i.process_image(url, ext, i.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))

		url = i.web_path + "/" + url

		attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"'
		if len(srcset) != 0:
			return f'<a href="{url}"><img src="{srcset[-1][0]}" srcset="{", ".join([" ".join(src) for src in srcset])}"{attributes}></a>'
		else:
			return f'<img src="{url}"{attributes}>'

	# See https://pandoc.org/MANUAL.html#line-blocks
	if isinstance(e, LineItem):
		return indent_level*indent_str + html(e.content, k, i) + "<br>\n"

	# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before me))
	if isinstance(e, Note):
		content_head = "("
		content_foot = ")"
		if inlinify(e) is not None:
			return f' <note>({html(inlinify(e), k, i, 0, "")})</note>'

	if isinstance(e, FQuoted):
		if e.style == "cs":
			if e.quote_type == "SingleQuote":
				return f'‚{html(e.content, k, i, 0, "")}‘'
			elif e.quote_type == "DoubleQuote":
				return f'„{html(e.content, k, i, 0, "")}“'
		elif e.style == "en":
			if e.quote_type == "SingleQuote":
				return f'‘{html(e.content, k, i, 0, "")}’'
			elif e.quote_type == "DoubleQuote":
				return f'“{html(e.content, k, i, 0, "")}”'
		else:
			if e.quote_type == "SingleQuote":
				return f'\'{html(e.content, k, i, 0, "")}\''
			elif e.quote_type == "DoubleQuote":
				return f'"{html(e.content, k, i, 0, "")}"'
			else:
				return f'"{html(e.content, k, i, 0, "")}"'

	if isinstance(e, Group):
		k.begingroup()
		ret = html(e.content, k, i, indent_level, indent_str)
		k.endgroup()
		return ret

	if isinstance(e, Math):
		formats = {
			"DisplayMath": True,
			"InlineMath": False
		}
		return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]})

	if isinstance(e, RawInline):
		if e.format == "html":
			return e.text
		else:
			return ""

	if isinstance(e, RawBlock):
		if e.format == "html":
			return f'{e.text}\n'
		else:
			return ""


	# Non-overriding elements, they get generated using the template at the end
	# of this function
	if isinstance(e, Header):
		tag = "h"+str(e.level)

	if isinstance(e, Figure):
		content_foot = html(e.caption, k, i, indent_level+1, indent_str)

	if isinstance(e, Caption):
		tag = "figcaption"

	if isinstance(e, Link):
		tag = "a"
		attributes += f' href="{e.url}"'
		if e.title:
			attributes += f' title="{e.title}"'

	if isinstance(e, OrderedList):
		tag = "ol"
		if e.start and e.start != 1:
			attributes += f' start="{e.start}"'
		html_styles = {
			"Decimal": "1",
			"LowerRoman": "i",
			"UpperRoman:": "I",
			"LowerAlpha": "a",
			"UpperAlpha": "A"
		}
		if e.style and e.style != "DefaultStyle":
			attributes += f' type="{html_styles[e.style]}"'
		# FIXME: Delimeter styles

	if isinstance(e, Table):
		content_head = html(e.head, k, i, indent_level+1, indent_str)
		content_foot = html(e.foot, k, i, indent_level+1, indent_str)
		# FIXME: Fancy pandoc tables, using colspec

	if isinstance(e, TableCell):
		tag = "td"
		if e.colspan != 1:
			attributes += f' colspan="{e.colspan}"'
		if e.rowspan != 1:
			attributes += f' rowspan="{e.rowspan}"'
		aligns = {
			"AlignLeft": "left",
			"AlignRight": "right",
			"AlignCenter": "center"
		}
		if e.alignment and e.alignment != "AlignDefault":
			attributes += f' style="text-align: {aligns[e.alignment]}"'

	# The default which all non-overriding elements get generated by. This
	# includes elements, which were not explicitly mentioned in this function,
	# e. g. Strong

	if isinstance(e, Inline):
		return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}</{tag}>'

	out_str = ""
	if not isinstance(e, Plain):
		out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n"
	out_str += content_head
	if hasattr(e, "_content"):
		if len(e.content) > 0 and isinstance(e.content[0], Inline):
			out_str += (indent_level+1)*indent_str
		out_str += html(e.content, k, i, indent_level+1, indent_str)
	if hasattr(e, "text"):
		out_str += e.text
	out_str += f"{content_foot}\n"
	if not isinstance(e, Plain):
		out_str += f"{indent_level*indent_str}</{tag}>\n"

	return out_str