formatitko/tex.py

from panflute import *
import os

from whitespace import NBSP
from transform import FQuoted
from util import inlinify
from context import Group
from images import ImageProcessor

# Heavily inspired by: git://git.ucw.cz/labsconf2022.git
def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:

	# `only` attribute which makes transformed elements appear only in tex
	# output or html output
	if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex":
		return ""

	if isinstance(e, ListContainer):
		return ''.join([tex(child, i, indent_level, indent_str) for child in e])

	# Bits from which the final element output is built at the end of this
	# function. Most elements override this by returning their own output.
	content_foot = ""
	content_head = ""

	arguments = ""
	open = "{"
	close = "}"

	tag = e.tag.lower()

	tags = {
		Header: "h"+chr(64 + e.level) if hasattr(e, "level") else "",
	}
	if type(e) in tags:
		tag = tags[type(e)]

	# These are also disabled in pandoc so they shouldn't appear in the AST at all.
	not_implemented = {
		Citation: True,
		Cite: True,
		Definition: True,
		DefinitionItem: True,
		DefinitionList: True
	}
	if type(e) in not_implemented:
		return f'% FIXME: {type(e)}s not implemented \n'

	# Elements which can be represented by a simple string
	simple_string = {
		NBSP: "~",
		Space: " ",
		Null: "",
		LineBreak: f"\\\\",
		SoftBreak: f" ",
		HorizontalRule: "\\hr\n\n"
	}
	if type(e) in simple_string:
		return simple_string[type(e)]

	# Simplest basic elements
	if isinstance(e, Str):
		return e.text.replace(" ", "~")

	if isinstance(e, Para):
		return tex(e.content, i, 0, "")+"\n\n"

	if isinstance(e, Span) or isinstance(e, Plain):
		return tex(e.content, i, 0, "")

	# Overriding elements with their own returns
	if isinstance(e, Image):
		url = e.url

		# Attributes → image processor args
		additional_args = {}
		if "file-width" in e.attributes:
			additional_args["width"] = int(e.attributes["file-width"])
		if "file-height" in e.attributes:
			additional_args["height"] = int(e.attributes["file-height"])
		if "file-quality" in e.attributes:
			additional_args["quality"] = int(e.attributes["file-quality"])
		if "file-dpi" in e.attributes:
			additional_args["dpi"] = int(e.attributes["file-dpi"])

		# The directory of the current file, will also look for images there.
		source_dir = e.attributes["source_dir"]

		_, ext = os.path.splitext(url)
		ext = ext[1:]

		# Conversions between various formats.
		if ext in ["pdf", "png", "jpeg"]:
			# Even supported elements have to be 'converted' because the
			# processing contains finding and moving them to the output
			# directory.
			url = i.process_image(url, ext, source_dir, relative=False, **additional_args)
		elif ext in ["svg"]:
			url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
		elif ext in ["epdf"]:
			url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
		elif ext in ["jpg"]:
			url = i.process_image(url, "jpeg", source_dir, relative=False, **additional_args)
		else:
			url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)

		width = ""
		if "width" in e.attributes:
			width = e.attributes["width"]
			# 50% → 0.5\hsize
			if e.attributes["width"][-1] == "%":
				width = str(int(e.attributes["width"][:-1])/100) + "\\hsize"
			width = "width " + width
		return f'\\image{{{width}}}{{{url}}}'

	if isinstance(e, FQuoted):
		if e.style == "cs":
			if e.quote_type == "SingleQuote":
				return f'‚{tex(e.content, i, 0, "")}‘'
			elif e.quote_type == "DoubleQuote":
				return f'„{tex(e.content, i, 0, "")}“'
		elif e.style == "en":
			if e.quote_type == "SingleQuote":
				return f'‘{tex(e.content, i, 0, "")}’'
			elif e.quote_type == "DoubleQuote":
				return f'“{tex(e.content, i, 0, "")}”'
		else:
			if e.quote_type == "SingleQuote":
				return f'\'{tex(e.content, i, 0, "")}\''
			elif e.quote_type == "DoubleQuote":
				return f'"{tex(e.content, i, 0, "")}"'
			else:
				return f'"{tex(e.content, i, 0, "")}"'

	if isinstance(e, Code):
		return f'\\verb`{e.text.replace("`", "backtick")}`'

	if isinstance(e, Figure):
		return f'\\figure{{{tex(e.content, i, indent_level+1, indent_str)}}}{{{tex(e.caption, i, indent_level+1, indent_str)}}}\n\n'

	# Figure caption
	if isinstance(e, Caption):
		if inlinify(e) is not None:
			return f'\\figcaption{{{tex(e.content, i, 0, "")}}}'

	if isinstance(e, Math):
		if e.format == "DisplayMath":
			return f'$${e.text}$$\n'
		else:
			return f'${e.text}$'

	# Footnote
	if isinstance(e, Note):
		tag = "fn"
		if inlinify(e) is not None:
			return f'\\fn{{{tex(inlinify(e), i, 0, "")}}}'

	if isinstance(e, Table):
		aligns = {
			"AlignLeft": "\\quad#\\quad\\hfil",
			"AlignRight": "\\quad\\hfil#\\quad",
			"AlignCenter": "\\quad\\hfil#\\hfil\\quad",
			"AlignDefault": "\\quad#\\quad\\hfil"
		}
		text = "\strut"+"&".join([aligns[col[0]] for col in e.colspec])+"\cr\n"
		text += tex(e.head.content, i, 0, "")
		text += "\\noalign{\\hrule}\n"
		text += tex(e.content[0].content, i, 0, "")
		text += "\\noalign{\\hrule}\n"
		text += tex(e.foot.content, i, 0, "")
		return "\\vskip1em\n\\halign{"+text+"}\n\\vskip1em\n"
		# FIXME: Implement rowspan

	if isinstance(e, TableRow):
		return "&".join([("\\multispan"+str(cell.colspan)+" " if cell.colspan > 1 else "")+tex(cell.content, i, 0, "") for cell in e.content])+"\cr\n"

	if isinstance(e, RawInline):
		if e.format == "tex":
			return e.text
		else:
			return ""

	if isinstance(e, RawBlock):
		if e.format == "tex":
			return f'{e.text}\n'
		else:
			return ""

	# See https://pandoc.org/MANUAL.html#line-blocks
	if isinstance(e, LineBlock):
		return f'{tex(e.content, i, indent_level+1, indent_str)}\n'

	if isinstance(e, LineItem):
		return tex(e.content, i, 0, "") + ("\\\\\n" if e.next else "\n")

	if type(e) is Div:
		return f'{tex(e.content, i, indent_level+1, indent_str)}'

	if isinstance(e, Doc):
		return tex(e.content, i, indent_level, indent_str)+"\n\\bye" # Is having the \bye a bad idea here?


	# Non-overriding elements, they get generated using the template at the end
	# of this function
	if isinstance(e, BulletList):
		tag = "list"
		open = ""
		arguments = "{o}"
		close = "\\endlist"

	elif isinstance(e, OrderedList):
		tag = "list"
		open = ""
		styles = {
			"DefaultStyle": "n",
			"Decimal": "n",
			"LowerRoman": "i",
			"UpperRoman:": "I",
			"LowerAlpha": "a",
			"UpperAlpha": "A"
		}
		style = styles[e.style]
		delimiters = {
			"DefaultDelim": f"{style}.",
			"Period": f"{style}.",
			"OneParen": f"{style})",
			"TwoParens": f"({style})"
		}
		style = delimiters[e.delimiter]
		arguments = f"{{{style}}}"
		close = "\\endlist"
		# FIXME: Starting number of list

	elif isinstance(e, ListItem):
		tag = ":"

	elif isinstance(e, Link):
		if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url:
			tag = "url"
		else:
			tag = "linkurl"
			arguments = f'{{{e.url}}}'

	elif isinstance(e, Group):
		tag = "begingroup"
		open = ""
		if "language" in e.metadata and e.metadata["language"] is not None:
			open = "\\language"+e.metadata["language"]
		close = "\\endgroup"

	# The default which all non-overriding elements get generated by. This
	# includes elements, which were not explicitly mentioned in this function,
	# e. g. Strong, Emph...

	if isinstance(e, Inline):
		return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}'

	out_str = ""
	out_str = f"\\{tag}{arguments}{open}\n"
	out_str += content_head
	if hasattr(e, "_content"):
		out_str += tex(e.content, i, indent_level+1, indent_str)
	if hasattr(e, "text"):
		out_str += e.text
	out_str += f"{content_foot}\n{close}\n\n"

	return out_str