formatitko/html.py

212 lines
5.6 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from panflute import *
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.util import ClassNotFound
from whitespace import NBSP
from transform import FQuoted
from katex import KatexClient
def html(e: Element, k: KatexClient, indent_level: int=0, indent_str: str="\t") -> str:
if isinstance(e, ListContainer):
return ''.join([html(child, k, indent_level, indent_str) for child in e])
tag = e.tag.lower()
attributes = ""
content_foot = ""
content_head = ""
if isinstance(e, Str):
return e.text.replace(" ", " ")
tags = {
BulletList: "ul",
Doc: "main",
Emph: "em",
Caption: "figcaption",
Para: "p",
Header: "h"+str(e.level) if hasattr(e, "level") else "",
LineBlock: "p",
ListItem: "li",
SmallCaps: "span",
Strikeout: "strike",
Subscript: "sub",
Superscript: "sup",
Underline: "u",
TableBody: "tbody",
TableHead: "thead",
TableFoot: "tfoot",
TableRow: "tr",
TableCell: "td",
}
if type(e) in tags:
tag = tags[type(e)]
not_implemented = {
Citation: True,
Cite: True,
Definition: True,
DefinitionItem: True,
DefinitionList: True
}
if type(e) in not_implemented:
return f'<!-- FIXME: {type(e)}s not implemented -->'
simple_string = {
NBSP: "&nbsp;",
Space: " ",
Null: "",
LineBreak: f"\n{indent_level*indent_str}<br>\n{indent_level*indent_str}",
SoftBreak: f" ",
HorizontalRule: f"{indent_level*indent_str}<hr>\n"
}
if type(e) in simple_string:
return simple_string[type(e)]
if hasattr(e, "identifier") and e.identifier != "":
attributes += f' id="{e.identifier}"'
if hasattr(e, "classes") and len(e.classes) != 0:
attributes += f' class="{" ".join(e.classes)}"'
if isinstance(e, CodeBlock):
if e.attributes["highlight"] == True or e.attributes["highlight"] == 'True':
for cl in e.classes:
try:
lexer = get_lexer_by_name(cl)
except ClassNotFound:
continue
break
formatter = HtmlFormatter(style=e.attributes["style"])
result = highlight(e.text, lexer, formatter)
style = formatter.get_style_defs(".highlight")
return f'<style>{style}</style>{result}'
else:
return f'<pre>{e.text}</pre>'
if isinstance(e, Figure):
content_foot = html(e.caption, k, indent_level+1, indent_str)
if isinstance(e, Caption):
tag = "figcaption"
if isinstance(e, Image):
# TODO: Image processing
return f'<img src="{e.url}" alt="{e.title or html(e.content, k, 0, "")}">'
if isinstance(e, Header):
tag = "h"+str(e.level)
if isinstance(e, Link):
tag = "a"
attributes += f' href="{e.url}"'
if e.title:
attributes += f' title="{e.title}"'
if isinstance(e, LineItem):
return indent_level*indent_str + html(e.content, k) + "<br>\n"
if isinstance(e, Note):
content_head = "("
content_foot = ")"
if len(e.content) == 1 and isinstance(e.content[0], Para):
return f' <note>({html(e.content[0].content, k, 0, "")})</note>'
if isinstance(e, OrderedList):
tag = "ol"
if e.start and e.start != 1:
attributes += f' start="{e.start}"'
html_styles = {
"Decimal": "1",
"LowerRoman": "i",
"UpperRoman:": "I",
"LowerAlpha": "a",
"UpperAlpha": "A"
}
if e.style and e.style != "DefaultStyle":
attributes += f' type="{html_styles[e.style]}"'
# FIXME: Delimeter styles
if isinstance(e, Table):
content_head = html(e.head, k, indent_level+1, indent_str)
content_foot = html(e.foot, k, indent_level+1, indent_str)
# FIXME: Fancy pandoc tables, using colspec
if isinstance(e, TableCell):
tag = "td"
if e.colspan != 1:
attributes += f' colspan="{e.colspan}"'
if e.rowspan != 1:
attributes += f' rowspan="{e.rowspan}"'
aligns = {
"AlignLeft": "left",
"AlignRight": "right",
"AlignCenter": "center"
}
if e.alignment and e.alignment != "AlignDefault":
attributes += f' style="text-align: {aligns[e.alignment]}"'
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
return f'{html(e.content, k, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{html(e.content, k, 0, "")}'
elif e.style == "en":
if e.quote_type == "SingleQuote":
return f'{html(e.content, k, 0, "")}'
elif e.quote_type == "DoubleQuote":
return f'{html(e.content, k, 0, "")}'
else:
if e.quote_type == "SingleQuote":
return f'\'{html(e.content, k, 0, "")}\''
elif e.quote_type == "DoubleQuote":
return f'"{html(e.content, k, 0, "")}"'
else:
return f'"{html(e.content, k, 0, "")}"'
if isinstance(e, Math):
formats = {
"DisplayMath": True,
"InlineMath": False
}
# FIXME: Currently, all bits of math are isolated from each other, this
# means that \defs and and alike work only inside a single math block
# and are forgotten in the next one.
return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]})
if isinstance(e, RawInline):
if e.format == "html":
return e.text
else:
return ""
if isinstance(e, RawBlock):
if e.format == "html":
return f'{e.text}\n'
else:
return ""
if isinstance(e, Inline):
return f"<{tag}{attributes}>{content_head}{html(e.content, k, 0, '')}{content_foot}</{tag}>"
out_str = ""
if not isinstance(e, Plain):
out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n"
out_str += content_head
if hasattr(e, "_content"):
if len(e.content) > 0 and isinstance(e.content[0], Inline):
out_str += (indent_level+1)*indent_str
out_str += html(e.content, k, indent_level+1, indent_str)
if hasattr(e, "text"):
out_str += e.text
out_str += f"{content_foot}\n"
if not isinstance(e, Plain):
out_str += f"{indent_level*indent_str}</{tag}>\n"
return out_str