from panflute import * from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.util import ClassNotFound import os from whitespace import NBSP from transform import FQuoted from katex import KatexClient from util import inlinify from context import Group from images import ImageProcessor def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str: # `only` attribute which makes transformed elements appear only in tex # output or html output if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": return "" if isinstance(e, ListContainer): return ''.join([html(child, k, i, indent_level, indent_str) for child in e]) # Bits from which the final element output is built at the end of this # function. Most elements override this by returning their own output. tag = e.tag.lower() attributes = "" content_foot = "" content_head = "" if isinstance(e, Str): return e.text.replace(" ", " ") # Most elements fit the general template at the end of the function, just # need their html tag specified. tags = { BulletList: "ul", Doc: "main", Emph: "em", Caption: "figcaption", Para: "p", Header: "h"+str(e.level) if hasattr(e, "level") else "", LineBlock: "p", ListItem: "li", SmallCaps: "span", Strikeout: "strike", Subscript: "sub", Superscript: "sup", Underline: "u", TableBody: "tbody", TableHead: "thead", TableFoot: "tfoot", TableRow: "tr", TableCell: "td", } if type(e) in tags: tag = tags[type(e)] # These are also disabled in pandoc so they shouldn't appear in the AST at all. not_implemented = { Citation: True, Cite: True, Definition: True, DefinitionItem: True, DefinitionList: True } if type(e) in not_implemented: return f'' # Elements which can be represented by a simple string simple_string = { NBSP: " ", Space: " ", Null: "", LineBreak: f"\n{indent_level*indent_str}
\n{indent_level*indent_str}", SoftBreak: f" ", HorizontalRule: f"{indent_level*indent_str}
\n" } if type(e) in simple_string: return simple_string[type(e)] if hasattr(e, "identifier") and e.identifier != "": attributes += f' id="{e.identifier}"' if hasattr(e, "classes") and len(e.classes) != 0: attributes += f' class="{" ".join(e.classes)}"' # Attributes are only passed down manually, because we use them internally. # Maybe this should be a blocklist instead of an allowlist? # Overriding elements with their own returns if isinstance(e, CodeBlock): if len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'): # Syntax highlighting using pygments for cl in e.classes: try: lexer = get_lexer_by_name(cl) except ClassNotFound: continue break else: print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}") formatter = HtmlFormatter(style=e.attributes["style"]) result = highlight(e.text, lexer, formatter) return f'{result}' else: return f'
{e.text}
' if isinstance(e, Doc): formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default") content_head = f'' if isinstance(e, Image): url = e.url # Attributes → image processor args additional_args = {} if "file-width" in e.attributes: additional_args["width"] = int(e.attributes["file-width"]) if "file-height" in e.attributes: additional_args["height"] = int(e.attributes["file-height"]) if "file-quality" in e.attributes: additional_args["quality"] = int(e.attributes["file-quality"]) if "file-dpi" in e.attributes: additional_args["dpi"] = int(e.attributes["file-dpi"]) # The directory of the current file, will also look for images there. source_dir = e.attributes["source_dir"] _, ext = os.path.splitext(url) ext = ext[1:] # Conversions between various formats. if ext in ["svg", "png", "jpeg", "gif"]: # Even supported elements have to be 'converted' because the # processing contains finding and moving them to the output # directory. url = i.process_image(url, ext, source_dir, **additional_args) elif ext in ["pdf", "epdf"]: if not "dpi" in additional_args: additional_args["dpi"] = 300 url = i.process_image(url, "png", source_dir, **additional_args) elif ext in ["jpg"]: url = i.process_image(url, "jpeg", source_dir, **additional_args) else: url = i.process_image(url, "png", source_dir, **additional_args) # Srcset generation - multiple alternative sizes of images browsers can # choose from. _, ext = os.path.splitext(url) ext = ext[1:] srcset = [] if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'): # This is inspired by @vojta001's blogPhoto shortcode he made for # patek.cz: # https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html width, height = i.get_image_size(url, [i.public_dir]) sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) for size in sizes: if width <= size[0] and height <= size[1]: srcset.append((f'{i.web_path}/{url}', f'{width}w')) break quality = size[2] if ext == "jpeg" else None srcset.append((f'{i.web_path}/{i.process_image(url, ext, i.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w')) url = i.web_path + "/" + url attributes = f'{" style=width:"+e.attributes["width"] if "width" in e.attributes else ""} alt="{e.title or html(e.content, k, i, 0, "")}"' if len(srcset) != 0: return f'' else: return f'' # See https://pandoc.org/MANUAL.html#line-blocks if isinstance(e, LineItem): return indent_level*indent_str + html(e.content, k, i) + "
\n" # Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before me)) if isinstance(e, Note): content_head = "(" content_foot = ")" if inlinify(e) is not None: return f' ({html(inlinify(e), k, i, 0, "")})' if isinstance(e, FQuoted): if e.style == "cs": if e.quote_type == "SingleQuote": return f'‚{html(e.content, k, i, 0, "")}‘' elif e.quote_type == "DoubleQuote": return f'„{html(e.content, k, i, 0, "")}“' elif e.style == "en": if e.quote_type == "SingleQuote": return f'‘{html(e.content, k, i, 0, "")}’' elif e.quote_type == "DoubleQuote": return f'“{html(e.content, k, i, 0, "")}”' else: if e.quote_type == "SingleQuote": return f'\'{html(e.content, k, i, 0, "")}\'' elif e.quote_type == "DoubleQuote": return f'"{html(e.content, k, i, 0, "")}"' else: return f'"{html(e.content, k, i, 0, "")}"' if isinstance(e, Group): k.begingroup() ret = html(e.content, k, i, indent_level, indent_str) k.endgroup() return ret if isinstance(e, Math): formats = { "DisplayMath": True, "InlineMath": False } return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]}) if isinstance(e, RawInline): if e.format == "html": return e.text else: return "" if isinstance(e, RawBlock): if e.format == "html": return f'{e.text}\n' else: return "" # Non-overriding elements, they get generated using the template at the end # of this function if isinstance(e, Header): tag = "h"+str(e.level) if isinstance(e, Figure): content_foot = html(e.caption, k, i, indent_level+1, indent_str) if isinstance(e, Caption): tag = "figcaption" if isinstance(e, Link): tag = "a" attributes += f' href="{e.url}"' if e.title: attributes += f' title="{e.title}"' if isinstance(e, OrderedList): tag = "ol" if e.start and e.start != 1: attributes += f' start="{e.start}"' html_styles = { "Decimal": "1", "LowerRoman": "i", "UpperRoman:": "I", "LowerAlpha": "a", "UpperAlpha": "A" } if e.style and e.style != "DefaultStyle": attributes += f' type="{html_styles[e.style]}"' # FIXME: Delimeter styles if isinstance(e, Table): content_head = html(e.head, k, i, indent_level+1, indent_str) content_foot = html(e.foot, k, i, indent_level+1, indent_str) # FIXME: Fancy pandoc tables, using colspec if isinstance(e, TableCell): tag = "td" if e.colspan != 1: attributes += f' colspan="{e.colspan}"' if e.rowspan != 1: attributes += f' rowspan="{e.rowspan}"' aligns = { "AlignLeft": "left", "AlignRight": "right", "AlignCenter": "center" } if e.alignment and e.alignment != "AlignDefault": attributes += f' style="text-align: {aligns[e.alignment]}"' # The default which all non-overriding elements get generated by. This # includes elements, which were not explicitly mentioned in this function, # e. g. Strong if isinstance(e, Inline): return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}' out_str = "" if not isinstance(e, Plain): out_str += f"{indent_level*indent_str}<{tag}{attributes}>\n" out_str += content_head if hasattr(e, "_content"): if len(e.content) > 0 and isinstance(e.content[0], Inline): out_str += (indent_level+1)*indent_str out_str += html(e.content, k, i, indent_level+1, indent_str) if hasattr(e, "text"): out_str += e.text out_str += f"{content_foot}\n" if not isinstance(e, Plain): out_str += f"{indent_level*indent_str}\n" return out_str