formatitko/html.py

from panflute import *
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound

from whitespace import NBSP
from transform import FQuoted
from katex import KatexClient

def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") -> str:

	if isinstance(e, ListContainer):
		return ''.join([html(child, k, indent_level, indent_str) for child in e])

	tag = e.tag.lower()
	attributes = ""
	content_foot = ""
	content_head = ""

	if isinstance(e, Str):
		return e.text.replace(" ", "&nbsp;")

	tags = {
		BulletList: "ul",
		Doc: "main",
		Emph: "em",
		Caption: "figcaption",
		Para: "p",
		Header: "h"+str(e.level) if hasattr(e, "level") else "",
		LineBlock: "p",
		ListItem: "li",
		SmallCaps: "span",
		Strikeout: "strike",
		Subscript: "sub",
		Superscript: "sup",
		Underline: "u",
		TableBody: "tbody",
		TableHead: "thead",
		TableFoot: "tfoot",
		TableRow: "tr",
		TableCell: "td",
	}
	if type(e) in tags:
		tag = tags[type(e)]

	not_implemented = {
		Citation: True,
		Cite: True,
		Definition: True,
		DefinitionItem: True,
		DefinitionList: True
	}
	if type(e) in not_implemented:
		return f'<!-- FIXME: {type(e)}s not implemented -->'

	simple_string = {
		NBSP: "&nbsp;",
		Space: " ",
		Null: "",
		LineBreak: f"\n{indent_level*indent_str}<br>\n{indent_level*indent_str}",
		SoftBreak: f" ",
		HorizontalRule: f"{indent_level*indent_str}<hr>\n"
	}
	if type(e) in simple_string:
		return simple_string[type(e)]

	if hasattr(e, "identifier") and e.identifier != "":
		attributes += f' id="{e.identifier}"'

	if hasattr(e, "classes") and len(e.classes) != 0:
		attributes += f' class="{" ".join(e.classes)}"'

	if isinstance(e, CodeBlock):
		if e.attributes["highlight"] == True or e.attributes["highlight"] == 'True':
			for cl in e.classes:
				try:
					lexer = get_lexer_by_name(cl)
				except ClassNotFound:
					continue
				break
			formatter = HtmlFormatter(style=e.attributes["style"])
			result = highlight(e.text, lexer, formatter)
			style = formatter.get_style_defs(".highlight")
			return f'<style>{style}</style>{result}'

		else:
			return f'<pre>{e.text}</pre>'

	if isinstance(e, Figure):
		content_foot = html(e.caption, k, indent_level+1, indent_str)

	if isinstance(e, Caption):
		tag = "figcaption"

	if isinstance(e, Image):
		# TODO: Image processing
		return f'<img src="{e.url}" alt="{e.title or html(e.content, k, 0, "")}">'

	if isinstance(e, Header):
		tag = "h"+str(e.level)

	if isinstance(e, Link):
		tag = "a"
		attributes += f' href="{e.url}"'
		if e.title:
			attributes += f' title="{e.title}"'

	if isinstance(e, LineItem):
		return indent_level*indent_str + html(e.content, k) + "<br>\n"

	if isinstance(e, Note):
		content_head = "("
		content_foot = ")"
		if len(e.content) == 1 and isinstance(e.content[0], Para):
			return f' <note>({html(e.content[0].content, k, 0, "")})</note>'

	if isinstance(e, OrderedList):
		tag = "ol"
		if e.start and e.start != 1:
			attributes += f' start="{e.start}"'
		html_styles = {
			"Decimal": "1",
			"LowerRoman": "i",
			"UpperRoman:": "I",
			"LowerAlpha": "a",
			"UpperAlpha": "A"
		}
		if e.style and e.style != "DefaultStyle":
			attributes += f' type="{html_styles[e.style]}"'
		# FIXME: Delimeter styles

	if isinstance(e, Table):
		content_head = html(e.head, k, indent_level+1, indent_str)
		content_foot = html(e.foot, k, indent_level+1, indent_str)
		# FIXME: Fancy pandoc tables, using colspec

	if isinstance(e, TableCell):
		tag = "td"
		if e.colspan != 1:
			attributes += f' colspan="{e.colspan}"'
		if e.rowspan != 1:
			attributes += f' rowspan="{e.rowspan}"'
		aligns = {
			"AlignLeft": "left",
			"AlignRight": "right",
			"AlignCenter": "center"
		}
		if e.alignment and e.alignment != "AlignDefault":
			attributes += f' style="text-align: {aligns[e.alignment]}"'

	if isinstance(e, FQuoted):
		if e.style == "cs":
			if e.quote_type == "SingleQuote":
				return f'‚{html(e.content, k, 0, "")}‘'
			elif e.quote_type == "DoubleQuote":
				return f'„{html(e.content, k, 0, "")}“'
		elif e.style == "en":
			if e.quote_type == "SingleQuote":
				return f'‘{html(e.content, k, 0, "")}’'
			elif e.quote_type == "DoubleQuote":
				return f'“{html(e.content, k, 0, "")}”'
		else:
			if e.quote_type == "SingleQuote":
				return f'\'{html(e.content, k, 0, "")}\''
			elif e.quote_type == "DoubleQuote":
				return f'"{html(e.content, k, 0, "")}"'
			else:
				return f'"{html(e.content, k, 0, "")}"'

	if isinstance(e, Math):
		formats = {
			"DisplayMath": True,
			"InlineMath": False
		}
		# FIXME: Currently, all bits of math are isolated from each other, this
		# means that \defs and and alike work only inside a single math block
		# and are forgotten in the next one.
		return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]})

	if isinstance(e, RawInline):
		if e.format == "html":
			return e.text
		else:
			return ""

	if isinstance(e, RawBlock):
		if e.format == "html":
			return f'{e.text}\n'
		else:
			return ""

	if isinstance(e, Inline):
		return f"<{tag}{attributes}>{content_head}{html(e.content, k, 0, '')}{content_foot}</{tag}>"

	out_str = ""
	if not isinstance(e, Plain):
		out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n"
	out_str += content_head
	if hasattr(e, "_content"):
		if len(e.content) > 0 and isinstance(e.content[0], Inline):
			out_str += (indent_level+1)*indent_str
		out_str += html(e.content, k, indent_level+1, indent_str)
	if hasattr(e, "text"):
		out_str += e.text
	out_str += f"{content_foot}\n"
	if not isinstance(e, Plain):
		out_str += f"{indent_level*indent_str}</{tag}>\n"

	return out_str