formatitko/src/formatitko/html_generator.py

from panflute import Cite, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead
from panflute import TableRow, TableCell, Caption, Doc
from panflute import ListContainer, Element
from typing import Union, Dict

import re
import os
import io

from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound

from .whitespace import NBSP
from .context import Group
from .output_generator import Output_generator
from .katex import KatexClient
from .images import ImageProcessor
from .util import inlinify

class HTML_generator(Output_generator):
	def __init__(self, output_file, katex_client: KatexClient, image_processor:ImageProcessor, *args, **kwargs):
		self.katex_client = katex_client
		self.image_processor = image_processor
		super().__init__(output_file, *args, **kwargs)

	def generate(self, e: Union[Element, ListContainer]):
		if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
			return
		super().generate(e)

	def htmlescapespecialchars(self, text: str) -> str:
		text = re.sub(re.compile(r"&"), "&amp;", text)
		text = re.sub(re.compile(r"<"), "&lt;", text)
		text = re.sub(re.compile(r">"), "&rt;", text)
		text = re.sub(re.compile(r"\""), "&quot;", text)
		text = re.sub(re.compile(r"'"), "&#39;", text)
		# text = re.sub(re.compile(r" '), "&nbsp;", text) # Don't replace no-break spaces with HTML escapes, because we trust unicode?
		return text
	
	def stag(self, tag: str, attributes: Dict[str,str]={}) -> str:
		words = [tag]
		for key, value in attributes.items():
			words.append(f"{key}=\"{self.htmlescapespecialchars(value)}\"")
		return "<" + " ".join(words) + ">"

	def etag(self, tag: str, attributes: Dict[str,str]={}) -> str:
		return "</" + tag + ">"

	def ntag(self, tag: str, attributes: Dict[str,str]={}) -> str:
		return self.stag(tag, attributes)

	def tagname(self, e) -> str:
		if isinstance(e, Header):
			return "h" + str(e.level)
		try:
			return {
				BulletList: "ul",
				Doc: "main",
				Emph: "em",
				Caption: "figcaption",
				Para: "p",
				LineBlock: "p",
				LineBreak: "br",
				Link: "a",
				ListItem: "li",
				HorizontalRule: "hr",
				OrderedList: "ol",
				SmallCaps: "span",
				Strikeout: "strike",
				Subscript: "sub",
				Superscript: "sup",
				Underline: "u",
				TableBody: "tbody",
				TableHead: "thead",
				TableFoot: "tfoot",
				TableRow: "tr",
				TableCell: "td",
			}[type(e)]
		except KeyError:
			return type(e).__name__.lower()

	def common_attributes(self, e) -> Dict[str,str]:
		attributes = {}
		if hasattr(e, "identifier") and e.identifier != "":
			attributes["id"] = e.identifier
		if hasattr(e, "classes") and len(e.classes) != 0:
			attributes["class"] = " ".join(e.classes)
		return attributes

	def generate_Str(self, e: Str):
		self.write(self.htmlescapespecialchars(e.text))

	def generate_NBSP(self, e: NBSP):
		self.write(" ") # Unicode no-break space, because we trust unicode?

	def generate_Null(self, e: Null):
		pass

	def generate_Doc(self, e: Doc):
		formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
		self.writeln("<style>")
		self.writeraw(formatter.get_style_defs(".highlight"))
		self.writeln("</style>")
		self.generate_simple_block_tag(e, "main", self.common_attributes(e))

	def generate_CodeBlock(self, e: CodeBlock):
		if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):
			# Syntax highlighting using pygments
			for cl in e.classes:
				try:
					lexer = get_lexer_by_name(cl)
				except ClassNotFound:
					continue
				break
			else:
				lexer = None
				print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}")
			formatter = HtmlFormatter(style=e.attributes["style"])
			result = highlight(e.text, lexer, formatter)
			self.writeraw(result)
		else:
			self.generate_raw_block_tag(e, "pre", self.common_attributes(e))

	def generate_Image(self, e: Image):
		url = e.url

		# Attributes → image processor args
		additional_args = {}
		if "file-width" in e.attributes:
			additional_args["width"] = int(e.attributes["file-width"])
		if "file-height" in e.attributes:
			additional_args["height"] = int(e.attributes["file-height"])
		if "file-quality" in e.attributes:
			additional_args["quality"] = int(e.attributes["file-quality"])
		if "file-dpi" in e.attributes:
			additional_args["dpi"] = int(e.attributes["file-dpi"])

		# The directory of the current file, will also look for images there.
		source_dir = e.attributes["source_dir"]

		_, ext = os.path.splitext(url)
		ext = ext[1:]

		# Conversions between various formats.
		if ext in ["svg", "png", "jpeg", "gif"]:
			# Even supported elements have to be 'converted' because the
			# processing contains finding and moving them to the output
			# directory.
			url = self.image_processor.process_image(url, ext, source_dir, **additional_args)
		elif ext in ["pdf", "epdf"]:
			if not "dpi" in additional_args:
				additional_args["dpi"] = 300
			url = self.image_processor.process_image(url, "png", source_dir, **additional_args)
		elif ext in ["jpg"]:
			url = self.image_processor.process_image(url, "jpeg", source_dir, **additional_args)
		else:
			url = self.image_processor.process_image(url, "png", source_dir, **additional_args)
		
		# Srcset generation - multiple alternative sizes of images browsers can
		# choose from.
		_, ext = os.path.splitext(url)
		ext = ext[1:]
		srcset = []
		if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):
			# This is inspired by @vojta001's blogPhoto shortcode he made for
			# patek.cz:
			# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
			width, height = self.image_processor.get_image_size(url, [self.image_processor.public_dir])
			sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
			for size in sizes:
				if width <= size[0] and height <= size[1]:
					srcset.append((f'{self.image_processor.web_path}/{url}', f'{width}w'))
					break
				quality = size[2] if ext == "jpeg" else None
				srcset.append((f'{self.image_processor.web_path}/{self.image_processor.process_image(url, ext, self.image_processor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))

		url = self.image_processor.web_path + "/" + url
		
		attributes = self.common_attributes(e)
		if "width" in e.attributes:
			attributes["width"] = e.attributes["width"]

		if e.title:
			attributes["alt"] = e.title
		else:
			fake_out = io.StringIO()
			HTML_generator(fake_out, self.katex_client, self.image_processor).generate(e.content)
			attributes["alt"] = fake_out.getvalue()
		
		if len(srcset) != 0:
			attributes["src"] = srcset[-1][0]
			attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])
		else:
			attributes["src"] = url

		img = RawInline(self.ntag("img", attributes))
		link = Link(img, url=url)

		self.generate(link)

	def generate_Group(self, e: Group):
		self.katex_client.begingroup()
		self.generate(e.content)
		self.katex_client.endgroup()

	def generate_Plain(self, e: Plain):
		self.generate(e.content)

	def generate_LineItem(self, e: LineItem):
		self.generate(e.content)
		self.write("<br>")
		self.endln()
	
	# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))
	def generate_Note(self, e: Note):
		inline = inlinify(e)
		tag = self.tagname(e)
		if inline is not None:
			self.write(self.stag(tag)+" (")
			self.generate(inline)
			self.write(") "+self.etag(tag))
		else:
			self.writeln(self.stag(tag) + "(")
			self.iup()
			self.generate(e.content)
			self.ido()
			self.writeln(self.etag(tag) + ")")

	def generate_Math(self, e: Math):
		formats = {
			"DisplayMath": True,
			"InlineMath": False
		}
		self.writeln(self.katex_client.render(e.text, {"displayMode": formats[e.format]}))

	def generate_RawInline(self, e: RawInline):
		if e.format == "html":
			self.write(e.text)

	def generate_RawBlock(self, e: RawBlock):
		if e.format == "html":
			self.writeraw(e.text)

	def generate_Link(self, e: Link):
		attributes = {}
		attributes["href"] = e.url
		if e.title:
			attributes["title"] = e.title
		self.generate_simple_inline_tag(e, self.tagname(e), self.common_attributes(e) | attributes)

	def generate_OrderedList(self, e: OrderedList):
		attributes = {}
		if e.start and e.start != 1:
			attributes["start"] = str(e.start)
		html_styles = {
			"Decimal": "1",
			"LowerRoman": "i",
			"UpperRoman:": "I",
			"LowerAlpha": "a",
			"UpperAlpha": "A"
		}
		if e.style and e.style != "DefaultStyle":
			attributes["type"] = html_styles[e.style]
		# FIXME: Delimeter styles: 1. 1) (1)
		self.generate_simple_block_tag(e, self.tagname(e), self.common_attributes(e) | attributes)

	def generate_TableCell(self, e: TableCell):
		attributes = self.common_attributes(e)
		if e.colspan != 1:
			attributes["colspan"] = str(e.colspan)
		if e.rowspan != 1:
			attributes["rowspan"] = str(e.rowspan)
		aligns = {
			"AlignLeft": "left",
			"AlignRight": "right",
			"AlignCenter": "center"
		}
		if e.alignment and e.alignment != "AlignDefault":
			attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"
		self.generate_simple_block_tag(e, self.tagname(e), attributes)

	# These are also disabled in pandoc so they shouldn't appear in the AST at all.
	def generate_Citation(self, e: Citation):
		self.writeln("<!-- FIXME: Citations not implemented -->")

	def generate_Cite(self, e: Cite):
		self.writeln("<!-- FIXME: Cites not implemented -->")

	def generate_Definition(self, e: Definition):
		self.writeln("<!-- FIXME: Definitions not implemented -->")

	def generate_DefinitionItem(self, e: DefinitionItem):
		self.writeln("<!-- FIXME: DefinitionItems not implemented -->")

	def generate_DefinitionList(self, e: DefinitionList):
		self.writeln("<!-- FIXME: DefinitionLists not implemented -->")
#13: Předěláno generování HTML na nový systém. Resolves #10. Pracuje se s tím podstatně líp. 11 months ago			`from panflute import Cite, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline`
			`from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead`
			`from panflute import TableRow, TableCell, Caption, Doc`
			`from panflute import ListContainer, Element`
			`from typing import Union, Dict`

			`import re`
			`import os`
			`import io`

			`from pygments import highlight`
			`from pygments.lexers import get_lexer_by_name`
			`from pygments.formatters import HtmlFormatter`
			`from pygments.util import ClassNotFound`

			`from .whitespace import NBSP`
			`from .context import Group`
			`from .output_generator import Output_generator`
			`from .katex import KatexClient`
			`from .images import ImageProcessor`
			`from .util import inlinify`

			`class HTML_generator(Output_generator):`
			`def __init__(self, output_file, katex_client: KatexClient, image_processor:ImageProcessor, args, *kwargs):`
			`self.katex_client = katex_client`
			`self.image_processor = image_processor`
			`super().__init__(output_file, args, *kwargs)`

			`def generate(self, e: Union[Element, ListContainer]):`
			`if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":`
			`return`
			`super().generate(e)`

			`def htmlescapespecialchars(self, text: str) -> str:`
			`text = re.sub(re.compile(r"&"), "&", text)`
			`text = re.sub(re.compile(r"<"), "<", text)`
			`text = re.sub(re.compile(r">"), "&rt;", text)`
			`text = re.sub(re.compile(r"\""), """, text)`
			`text = re.sub(re.compile(r"'"), "'", text)`
			`# text = re.sub(re.compile(r" '), " ", text) # Don't replace no-break spaces with HTML escapes, because we trust unicode?`
			`return text`

			`def stag(self, tag: str, attributes: Dict[str,str]={}) -> str:`
			`words = [tag]`
			`for key, value in attributes.items():`
			`words.append(f"{key}=\"{self.htmlescapespecialchars(value)}\"")`
			`return "<" + " ".join(words) + ">"`

			`def etag(self, tag: str, attributes: Dict[str,str]={}) -> str:`
			`return "</" + tag + ">"`

			`def ntag(self, tag: str, attributes: Dict[str,str]={}) -> str:`
			`return self.stag(tag, attributes)`

			`def tagname(self, e) -> str:`
			`if isinstance(e, Header):`
			`return "h" + str(e.level)`
			`try:`
			`return {`
			`BulletList: "ul",`
			`Doc: "main",`
			`Emph: "em",`
			`Caption: "figcaption",`
			`Para: "p",`
			`LineBlock: "p",`
			`LineBreak: "br",`
			`Link: "a",`
			`ListItem: "li",`
			`HorizontalRule: "hr",`
			`OrderedList: "ol",`
			`SmallCaps: "span",`
			`Strikeout: "strike",`
			`Subscript: "sub",`
			`Superscript: "sup",`
			`Underline: "u",`
			`TableBody: "tbody",`
			`TableHead: "thead",`
			`TableFoot: "tfoot",`
			`TableRow: "tr",`
			`TableCell: "td",`
			`}[type(e)]`
			`except KeyError:`
			`return type(e).__name__.lower()`

			`def common_attributes(self, e) -> Dict[str,str]:`
			`attributes = {}`
			`if hasattr(e, "identifier") and e.identifier != "":`
			`attributes["id"] = e.identifier`
			`if hasattr(e, "classes") and len(e.classes) != 0:`
			`attributes["class"] = " ".join(e.classes)`
			`return attributes`

			`def generate_Str(self, e: Str):`
			`self.write(self.htmlescapespecialchars(e.text))`

			`def generate_NBSP(self, e: NBSP):`
			`self.write(" ") # Unicode no-break space, because we trust unicode?`

			`def generate_Null(self, e: Null):`
			`pass`

			`def generate_Doc(self, e: Doc):`
			`formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")`
			`self.writeln("<style>")`
			`self.writeraw(formatter.get_style_defs(".highlight"))`
			`self.writeln("</style>")`
			`self.generate_simple_block_tag(e, "main", self.common_attributes(e))`

			`def generate_CodeBlock(self, e: CodeBlock):`
			`if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):`
			`# Syntax highlighting using pygments`
			`for cl in e.classes:`
			`try:`
			`lexer = get_lexer_by_name(cl)`
			`except ClassNotFound:`
			`continue`
			`break`
			`else:`
			`lexer = None`
			`print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}")`
			`formatter = HtmlFormatter(style=e.attributes["style"])`
			`result = highlight(e.text, lexer, formatter)`
			`self.writeraw(result)`
			`else:`
			`self.generate_raw_block_tag(e, "pre", self.common_attributes(e))`

			`def generate_Image(self, e: Image):`
			`url = e.url`

			`# Attributes → image processor args`
			`additional_args = {}`
			`if "file-width" in e.attributes:`
			`additional_args["width"] = int(e.attributes["file-width"])`
			`if "file-height" in e.attributes:`
			`additional_args["height"] = int(e.attributes["file-height"])`
			`if "file-quality" in e.attributes:`
			`additional_args["quality"] = int(e.attributes["file-quality"])`
			`if "file-dpi" in e.attributes:`
			`additional_args["dpi"] = int(e.attributes["file-dpi"])`

			`# The directory of the current file, will also look for images there.`
			`source_dir = e.attributes["source_dir"]`

			`_, ext = os.path.splitext(url)`
			`ext = ext[1:]`

			`# Conversions between various formats.`
			`if ext in ["svg", "png", "jpeg", "gif"]:`
			`# Even supported elements have to be 'converted' because the`
			`# processing contains finding and moving them to the output`
			`# directory.`
			`url = self.image_processor.process_image(url, ext, source_dir, **additional_args)`
			`elif ext in ["pdf", "epdf"]:`
			`if not "dpi" in additional_args:`
			`additional_args["dpi"] = 300`
			`url = self.image_processor.process_image(url, "png", source_dir, **additional_args)`
			`elif ext in ["jpg"]:`
			`url = self.image_processor.process_image(url, "jpeg", source_dir, **additional_args)`
			`else:`
			`url = self.image_processor.process_image(url, "png", source_dir, **additional_args)`

			`# Srcset generation - multiple alternative sizes of images browsers can`
			`# choose from.`
			`_, ext = os.path.splitext(url)`
			`ext = ext[1:]`
			`srcset = []`
			`if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'):`
			`# This is inspired by @vojta001's blogPhoto shortcode he made for`
			`# patek.cz:`
			`# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html`
			`width, height = self.image_processor.get_image_size(url, [self.image_processor.public_dir])`
			`sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)`
			`for size in sizes:`
			`if width <= size[0] and height <= size[1]:`
			`srcset.append((f'{self.image_processor.web_path}/{url}', f'{width}w'))`
			`break`
			`quality = size[2] if ext == "jpeg" else None`
			`srcset.append((f'{self.image_processor.web_path}/{self.image_processor.process_image(url, ext, self.image_processor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))`

			`url = self.image_processor.web_path + "/" + url`

			`attributes = self.common_attributes(e)`
			`if "width" in e.attributes:`
			`attributes["width"] = e.attributes["width"]`

			`if e.title:`
			`attributes["alt"] = e.title`
			`else:`
			`fake_out = io.StringIO()`
			`HTML_generator(fake_out, self.katex_client, self.image_processor).generate(e.content)`
			`attributes["alt"] = fake_out.getvalue()`

			`if len(srcset) != 0:`
			`attributes["src"] = srcset[-1][0]`
			`attributes["srcset"] = ", ".join([" ".join(src) for src in srcset])`
			`else:`
			`attributes["src"] = url`

			`img = RawInline(self.ntag("img", attributes))`
			`link = Link(img, url=url)`

			`self.generate(link)`

			`def generate_Group(self, e: Group):`
			`self.katex_client.begingroup()`
			`self.generate(e.content)`
			`self.katex_client.endgroup()`

			`def generate_Plain(self, e: Plain):`
			`self.generate(e.content)`

			`def generate_LineItem(self, e: LineItem):`
			`self.generate(e.content)`
			`self.write("<br>")`
			`self.endln()`

			`# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before))`
			`def generate_Note(self, e: Note):`
			`inline = inlinify(e)`
			`tag = self.tagname(e)`
			`if inline is not None:`
			`self.write(self.stag(tag)+" (")`
			`self.generate(inline)`
			`self.write(") "+self.etag(tag))`
			`else:`
			`self.writeln(self.stag(tag) + "(")`
			`self.iup()`
			`self.generate(e.content)`
			`self.ido()`
			`self.writeln(self.etag(tag) + ")")`

			`def generate_Math(self, e: Math):`
			`formats = {`
			`"DisplayMath": True,`
			`"InlineMath": False`
			`}`
			`self.writeln(self.katex_client.render(e.text, {"displayMode": formats[e.format]}))`

			`def generate_RawInline(self, e: RawInline):`
			`if e.format == "html":`
			`self.write(e.text)`

			`def generate_RawBlock(self, e: RawBlock):`
			`if e.format == "html":`
			`self.writeraw(e.text)`

			`def generate_Link(self, e: Link):`
			`attributes = {}`
			`attributes["href"] = e.url`
			`if e.title:`
			`attributes["title"] = e.title`
			`self.generate_simple_inline_tag(e, self.tagname(e), self.common_attributes(e) \| attributes)`

			`def generate_OrderedList(self, e: OrderedList):`
			`attributes = {}`
			`if e.start and e.start != 1:`
			`attributes["start"] = str(e.start)`
			`html_styles = {`
			`"Decimal": "1",`
			`"LowerRoman": "i",`
			`"UpperRoman:": "I",`
			`"LowerAlpha": "a",`
			`"UpperAlpha": "A"`
			`}`
			`if e.style and e.style != "DefaultStyle":`
			`attributes["type"] = html_styles[e.style]`
			`# FIXME: Delimeter styles: 1. 1) (1)`
			`self.generate_simple_block_tag(e, self.tagname(e), self.common_attributes(e) \| attributes)`

			`def generate_TableCell(self, e: TableCell):`
			`attributes = self.common_attributes(e)`
			`if e.colspan != 1:`
			`attributes["colspan"] = str(e.colspan)`
			`if e.rowspan != 1:`
			`attributes["rowspan"] = str(e.rowspan)`
			`aligns = {`
			`"AlignLeft": "left",`
			`"AlignRight": "right",`
			`"AlignCenter": "center"`
			`}`
			`if e.alignment and e.alignment != "AlignDefault":`
			`attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};"`
			`self.generate_simple_block_tag(e, self.tagname(e), attributes)`

			`# These are also disabled in pandoc so they shouldn't appear in the AST at all.`
			`def generate_Citation(self, e: Citation):`
			`self.writeln("<!-- FIXME: Citations not implemented -->")`

			`def generate_Cite(self, e: Cite):`
			`self.writeln("<!-- FIXME: Cites not implemented -->")`

			`def generate_Definition(self, e: Definition):`
			`self.writeln("<!-- FIXME: Definitions not implemented -->")`

			`def generate_DefinitionItem(self, e: DefinitionItem):`
			`self.writeln("<!-- FIXME: DefinitionItems not implemented -->")`

			`def generate_DefinitionList(self, e: DefinitionList):`
			`self.writeln("<!-- FIXME: DefinitionLists not implemented -->")`