from panflute import * from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.util import ClassNotFound import os from whitespace import NBSP from transform import FQuoted from katex import KatexClient from util import inlinify from group import Group from images import ImageProcessor def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str: if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": return "" if isinstance(e, ListContainer): return ''.join([html(child, k, i, indent_level, indent_str) for child in e]) tag = e.tag.lower() attributes = "" content_foot = "" content_head = "" if isinstance(e, Str): return e.text.replace(" ", " ") tags = { BulletList: "ul", Doc: "main", Emph: "em", Caption: "figcaption", Para: "p", Header: "h"+str(e.level) if hasattr(e, "level") else "", LineBlock: "p", ListItem: "li", SmallCaps: "span", Strikeout: "strike", Subscript: "sub", Superscript: "sup", Underline: "u", TableBody: "tbody", TableHead: "thead", TableFoot: "tfoot", TableRow: "tr", TableCell: "td", } if type(e) in tags: tag = tags[type(e)] not_implemented = { Citation: True, Cite: True, Definition: True, DefinitionItem: True, DefinitionList: True } if type(e) in not_implemented: return f'' simple_string = { NBSP: " ", Space: " ", Null: "", LineBreak: f"\n{indent_level*indent_str}
\n{indent_level*indent_str}", SoftBreak: f" ", HorizontalRule: f"{indent_level*indent_str}
\n" } if type(e) in simple_string: return simple_string[type(e)] if hasattr(e, "identifier") and e.identifier != "": attributes += f' id="{e.identifier}"' if hasattr(e, "classes") and len(e.classes) != 0: attributes += f' class="{" ".join(e.classes)}"' # TODO: Pass attributes down to HTML too if isinstance(e, CodeBlock): if e.attributes["highlight"] == True or e.attributes["highlight"] == 'True': for cl in e.classes: try: lexer = get_lexer_by_name(cl) except ClassNotFound: continue break formatter = HtmlFormatter(style=e.attributes["style"]) result = highlight(e.text, lexer, formatter) style = formatter.get_style_defs(".highlight") return f'{result}' else: return f'
{e.text}
' if isinstance(e, Figure): content_foot = html(e.caption, k, i, indent_level+1, indent_str) if isinstance(e, Caption): tag = "figcaption" if isinstance(e, Image): url = e.url _, ext = os.path.splitext(url) ext = ext[1:] if ext in ["svg", "png", "jpeg", "gif"]: url = i.process_image(url, ext) elif ext in ["pdf", "epdf"]: url = i.process_image(url, "png", dpi=300) elif ext in ["jpg"]: url = i.process_image(url, "jpeg") else: url = i.process_image(url, ".png") return f'{e.title or html(e.content, k, i, 0, ' if isinstance(e, Header): tag = "h"+str(e.level) if isinstance(e, Link): tag = "a" attributes += f' href="{e.url}"' if e.title: attributes += f' title="{e.title}"' if isinstance(e, LineItem): return indent_level*indent_str + html(e.content, k, i) + "
\n" if isinstance(e, Note): content_head = "(" content_foot = ")" if inlinify(e) is not None: return f' ({html(inlinify(e), k, i, 0, "")})' if isinstance(e, OrderedList): tag = "ol" if e.start and e.start != 1: attributes += f' start="{e.start}"' html_styles = { "Decimal": "1", "LowerRoman": "i", "UpperRoman:": "I", "LowerAlpha": "a", "UpperAlpha": "A" } if e.style and e.style != "DefaultStyle": attributes += f' type="{html_styles[e.style]}"' # FIXME: Delimeter styles if isinstance(e, Table): content_head = html(e.head, k, i, indent_level+1, indent_str) content_foot = html(e.foot, k, i, indent_level+1, indent_str) # FIXME: Fancy pandoc tables, using colspec if isinstance(e, TableCell): tag = "td" if e.colspan != 1: attributes += f' colspan="{e.colspan}"' if e.rowspan != 1: attributes += f' rowspan="{e.rowspan}"' aligns = { "AlignLeft": "left", "AlignRight": "right", "AlignCenter": "center" } if e.alignment and e.alignment != "AlignDefault": attributes += f' style="text-align: {aligns[e.alignment]}"' if isinstance(e, FQuoted): if e.style == "cs": if e.quote_type == "SingleQuote": return f'‚{html(e.content, k, i, 0, "")}‘' elif e.quote_type == "DoubleQuote": return f'„{html(e.content, k, i, 0, "")}“' elif e.style == "en": if e.quote_type == "SingleQuote": return f'‘{html(e.content, k, i, 0, "")}’' elif e.quote_type == "DoubleQuote": return f'“{html(e.content, k, i, 0, "")}”' else: if e.quote_type == "SingleQuote": return f'\'{html(e.content, k, i, 0, "")}\'' elif e.quote_type == "DoubleQuote": return f'"{html(e.content, k, i, 0, "")}"' else: return f'"{html(e.content, k, i, 0, "")}"' if isinstance(e, Group): k.begingroup() ret = html(e.content, k, i, indent_level, indent_str) k.endgroup() return ret if isinstance(e, Math): formats = { "DisplayMath": True, "InlineMath": False } # FIXME: Currently, all bits of math are isolated from each other, this # means that \defs and and alike work only inside a single math block # and are forgotten in the next one. return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]}) if isinstance(e, RawInline): if e.format == "html": return e.text else: return "" if isinstance(e, RawBlock): if e.format == "html": return f'{e.text}\n' else: return "" if isinstance(e, Inline): return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}' out_str = "" if not isinstance(e, Plain): out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n" out_str += content_head if hasattr(e, "_content"): if len(e.content) > 0 and isinstance(e.content[0], Inline): out_str += (indent_level+1)*indent_str out_str += html(e.content, k, i, indent_level+1, indent_str) if hasattr(e, "text"): out_str += e.text out_str += f"{content_foot}\n" if not isinstance(e, Plain): out_str += f"{indent_level*indent_str}\n" return out_str