Jan Černohorský
1 year ago
3 changed files with 476 additions and 127 deletions
@ -0,0 +1,299 @@ |
|||||
|
from panflute import Cite, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline |
||||
|
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead |
||||
|
from panflute import TableRow, TableCell, Caption, Doc |
||||
|
from panflute import ListContainer, Element |
||||
|
from typing import Union, Dict |
||||
|
|
||||
|
import re |
||||
|
import os |
||||
|
import io |
||||
|
|
||||
|
from pygments import highlight |
||||
|
from pygments.lexers import get_lexer_by_name |
||||
|
from pygments.formatters import HtmlFormatter |
||||
|
from pygments.util import ClassNotFound |
||||
|
|
||||
|
from .whitespace import NBSP |
||||
|
from .context import Group |
||||
|
from .output_generator import Output_generator |
||||
|
from .katex import KatexClient |
||||
|
from .images import ImageProcessor |
||||
|
from .util import inlinify |
||||
|
|
||||
|
class HTML_generator(Output_generator): |
||||
|
def __init__(self, output_file, katex_client: KatexClient, image_processor:ImageProcessor, *args, **kwargs): |
||||
|
self.katex_client = katex_client |
||||
|
self.image_processor = image_processor |
||||
|
super().__init__(output_file, *args, **kwargs) |
||||
|
|
||||
|
def generate(self, e: Union[Element, ListContainer]): |
||||
|
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": |
||||
|
return |
||||
|
super().generate(e) |
||||
|
|
||||
|
def htmlescapespecialchars(self, text: str) -> str: |
||||
|
text = re.sub(re.compile(r"&"), "&", text) |
||||
|
text = re.sub(re.compile(r"<"), "<", text) |
||||
|
text = re.sub(re.compile(r">"), "&rt;", text) |
||||
|
text = re.sub(re.compile(r"\""), """, text) |
||||
|
text = re.sub(re.compile(r"'"), "'", text) |
||||
|
# text = re.sub(re.compile(r" '), " ", text) # Don't replace no-break spaces with HTML escapes, because we trust unicode? |
||||
|
return text |
||||
|
|
||||
|
def stag(self, tag: str, attributes: Dict[str,str]={}) -> str: |
||||
|
words = [tag] |
||||
|
for key, value in attributes.items(): |
||||
|
words.append(f"{key}=\"{self.htmlescapespecialchars(value)}\"") |
||||
|
return "<" + " ".join(words) + ">" |
||||
|
|
||||
|
def etag(self, tag: str, attributes: Dict[str,str]={}) -> str: |
||||
|
return "</" + tag + ">" |
||||
|
|
||||
|
def ntag(self, tag: str, attributes: Dict[str,str]={}) -> str: |
||||
|
return self.stag(tag, attributes) |
||||
|
|
||||
|
def tagname(self, e) -> str: |
||||
|
if isinstance(e, Header): |
||||
|
return "h" + str(e.level) |
||||
|
try: |
||||
|
return { |
||||
|
BulletList: "ul", |
||||
|
Doc: "main", |
||||
|
Emph: "em", |
||||
|
Caption: "figcaption", |
||||
|
Para: "p", |
||||
|
LineBlock: "p", |
||||
|
LineBreak: "br", |
||||
|
Link: "a", |
||||
|
ListItem: "li", |
||||
|
HorizontalRule: "hr", |
||||
|
OrderedList: "ol", |
||||
|
SmallCaps: "span", |
||||
|
Strikeout: "strike", |
||||
|
Subscript: "sub", |
||||
|
Superscript: "sup", |
||||
|
Underline: "u", |
||||
|
TableBody: "tbody", |
||||
|
TableHead: "thead", |
||||
|
TableFoot: "tfoot", |
||||
|
TableRow: "tr", |
||||
|
TableCell: "td", |
||||
|
}[type(e)] |
||||
|
except KeyError: |
||||
|
return type(e).__name__.lower() |
||||
|
|
||||
|
def common_attributes(self, e) -> Dict[str,str]: |
||||
|
attributes = {} |
||||
|
if hasattr(e, "identifier") and e.identifier != "": |
||||
|
attributes["id"] = e.identifier |
||||
|
if hasattr(e, "classes") and len(e.classes) != 0: |
||||
|
attributes["class"] = " ".join(e.classes) |
||||
|
return attributes |
||||
|
|
||||
|
def generate_Str(self, e: Str): |
||||
|
self.write(self.htmlescapespecialchars(e.text)) |
||||
|
|
||||
|
def generate_NBSP(self, e: NBSP): |
||||
|
self.write(" ") # Unicode no-break space, because we trust unicode? |
||||
|
|
||||
|
def generate_Null(self, e: Null): |
||||
|
pass |
||||
|
|
||||
|
def generate_Doc(self, e: Doc): |
||||
|
formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default") |
||||
|
self.writeln("<style>") |
||||
|
self.writeraw(formatter.get_style_defs(".highlight")) |
||||
|
self.writeln("</style>") |
||||
|
self.generate_simple_block_tag(e, "main", self.common_attributes(e)) |
||||
|
|
||||
|
def generate_CodeBlock(self, e: CodeBlock): |
||||
|
if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'): |
||||
|
# Syntax highlighting using pygments |
||||
|
for cl in e.classes: |
||||
|
try: |
||||
|
lexer = get_lexer_by_name(cl) |
||||
|
except ClassNotFound: |
||||
|
continue |
||||
|
break |
||||
|
else: |
||||
|
lexer = None |
||||
|
print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}") |
||||
|
formatter = HtmlFormatter(style=e.attributes["style"]) |
||||
|
result = highlight(e.text, lexer, formatter) |
||||
|
self.writeraw(result) |
||||
|
else: |
||||
|
self.generate_raw_block_tag(e, "pre", self.common_attributes(e)) |
||||
|
|
||||
|
def generate_Image(self, e: Image): |
||||
|
url = e.url |
||||
|
|
||||
|
# Attributes → image processor args |
||||
|
additional_args = {} |
||||
|
if "file-width" in e.attributes: |
||||
|
additional_args["width"] = int(e.attributes["file-width"]) |
||||
|
if "file-height" in e.attributes: |
||||
|
additional_args["height"] = int(e.attributes["file-height"]) |
||||
|
if "file-quality" in e.attributes: |
||||
|
additional_args["quality"] = int(e.attributes["file-quality"]) |
||||
|
if "file-dpi" in e.attributes: |
||||
|
additional_args["dpi"] = int(e.attributes["file-dpi"]) |
||||
|
|
||||
|
# The directory of the current file, will also look for images there. |
||||
|
source_dir = e.attributes["source_dir"] |
||||
|
|
||||
|
_, ext = os.path.splitext(url) |
||||
|
ext = ext[1:] |
||||
|
|
||||
|
# Conversions between various formats. |
||||
|
if ext in ["svg", "png", "jpeg", "gif"]: |
||||
|
# Even supported elements have to be 'converted' because the |
||||
|
# processing contains finding and moving them to the output |
||||
|
# directory. |
||||
|
url = self.image_processor.process_image(url, ext, source_dir, **additional_args) |
||||
|
elif ext in ["pdf", "epdf"]: |
||||
|
if not "dpi" in additional_args: |
||||
|
additional_args["dpi"] = 300 |
||||
|
url = self.image_processor.process_image(url, "png", source_dir, **additional_args) |
||||
|
elif ext in ["jpg"]: |
||||
|
url = self.image_processor.process_image(url, "jpeg", source_dir, **additional_args) |
||||
|
else: |
||||
|
url = self.image_processor.process_image(url, "png", source_dir, **additional_args) |
||||
|
|
||||
|
# Srcset generation - multiple alternative sizes of images browsers can |
||||
|
# choose from. |
||||
|
_, ext = os.path.splitext(url) |
||||
|
ext = ext[1:] |
||||
|
srcset = [] |
||||
|
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'): |
||||
|
# This is inspired by @vojta001's blogPhoto shortcode he made for |
||||
|
# patek.cz: |
||||
|
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html |
||||
|
width, height = self.image_processor.get_image_size(url, [self.image_processor.public_dir]) |
||||
|
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) |
||||
|
for size in sizes: |
||||
|
if width <= size[0] and height <= size[1]: |
||||
|
srcset.append((f'{self.image_processor.web_path}/{url}', f'{width}w')) |
||||
|
break |
||||
|
quality = size[2] if ext == "jpeg" else None |
||||
|
srcset.append((f'{self.image_processor.web_path}/{self.image_processor.process_image(url, ext, self.image_processor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w')) |
||||
|
|
||||
|
url = self.image_processor.web_path + "/" + url |
||||
|
|
||||
|
attributes = self.common_attributes(e) |
||||
|
if "width" in e.attributes: |
||||
|
attributes["width"] = e.attributes["width"] |
||||
|
|
||||
|
if e.title: |
||||
|
attributes["alt"] = e.title |
||||
|
else: |
||||
|
fake_out = io.StringIO() |
||||
|
HTML_generator(fake_out, self.katex_client, self.image_processor).generate(e.content) |
||||
|
attributes["alt"] = fake_out.getvalue() |
||||
|
|
||||
|
if len(srcset) != 0: |
||||
|
attributes["src"] = srcset[-1][0] |
||||
|
attributes["srcset"] = ", ".join([" ".join(src) for src in srcset]) |
||||
|
else: |
||||
|
attributes["src"] = url |
||||
|
|
||||
|
img = RawInline(self.ntag("img", attributes)) |
||||
|
link = Link(img, url=url) |
||||
|
|
||||
|
self.generate(link) |
||||
|
|
||||
|
def generate_Group(self, e: Group): |
||||
|
self.katex_client.begingroup() |
||||
|
self.generate(e.content) |
||||
|
self.katex_client.endgroup() |
||||
|
|
||||
|
def generate_Plain(self, e: Plain): |
||||
|
self.generate(e.content) |
||||
|
|
||||
|
def generate_LineItem(self, e: LineItem): |
||||
|
self.generate(e.content) |
||||
|
self.write("<br>") |
||||
|
self.endln() |
||||
|
|
||||
|
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before)) |
||||
|
def generate_Note(self, e: Note): |
||||
|
inline = inlinify(e) |
||||
|
tag = self.tagname(e) |
||||
|
if inline is not None: |
||||
|
self.write(self.stag(tag)+" (") |
||||
|
self.generate(inline) |
||||
|
self.write(") "+self.etag(tag)) |
||||
|
else: |
||||
|
self.writeln(self.stag(tag) + "(") |
||||
|
self.iup() |
||||
|
self.generate(e.content) |
||||
|
self.ido() |
||||
|
self.writeln(self.etag(tag) + ")") |
||||
|
|
||||
|
def generate_Math(self, e: Math): |
||||
|
formats = { |
||||
|
"DisplayMath": True, |
||||
|
"InlineMath": False |
||||
|
} |
||||
|
self.writeln(self.katex_client.render(e.text, {"displayMode": formats[e.format]})) |
||||
|
|
||||
|
def generate_RawInline(self, e: RawInline): |
||||
|
if e.format == "html": |
||||
|
self.write(e.text) |
||||
|
|
||||
|
def generate_RawBlock(self, e: RawBlock): |
||||
|
if e.format == "html": |
||||
|
self.writeraw(e.text) |
||||
|
|
||||
|
def generate_Link(self, e: Link): |
||||
|
attributes = {} |
||||
|
attributes["href"] = e.url |
||||
|
if e.title: |
||||
|
attributes["title"] = e.title |
||||
|
self.generate_simple_inline_tag(e, self.tagname(e), self.common_attributes(e) | attributes) |
||||
|
|
||||
|
def generate_OrderedList(self, e: OrderedList): |
||||
|
attributes = {} |
||||
|
if e.start and e.start != 1: |
||||
|
attributes["start"] = str(e.start) |
||||
|
html_styles = { |
||||
|
"Decimal": "1", |
||||
|
"LowerRoman": "i", |
||||
|
"UpperRoman:": "I", |
||||
|
"LowerAlpha": "a", |
||||
|
"UpperAlpha": "A" |
||||
|
} |
||||
|
if e.style and e.style != "DefaultStyle": |
||||
|
attributes["type"] = html_styles[e.style] |
||||
|
# FIXME: Delimeter styles: 1. 1) (1) |
||||
|
self.generate_simple_block_tag(e, self.tagname(e), self.common_attributes(e) | attributes) |
||||
|
|
||||
|
def generate_TableCell(self, e: TableCell): |
||||
|
attributes = self.common_attributes(e) |
||||
|
if e.colspan != 1: |
||||
|
attributes["colspan"] = str(e.colspan) |
||||
|
if e.rowspan != 1: |
||||
|
attributes["rowspan"] = str(e.rowspan) |
||||
|
aligns = { |
||||
|
"AlignLeft": "left", |
||||
|
"AlignRight": "right", |
||||
|
"AlignCenter": "center" |
||||
|
} |
||||
|
if e.alignment and e.alignment != "AlignDefault": |
||||
|
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};" |
||||
|
self.generate_simple_block_tag(e, self.tagname(e), attributes) |
||||
|
|
||||
|
# These are also disabled in pandoc so they shouldn't appear in the AST at all. |
||||
|
def generate_Citation(self, e: Citation): |
||||
|
self.writeln("<!-- FIXME: Citations not implemented -->") |
||||
|
|
||||
|
def generate_Cite(self, e: Cite): |
||||
|
self.writeln("<!-- FIXME: Cites not implemented -->") |
||||
|
|
||||
|
def generate_Definition(self, e: Definition): |
||||
|
self.writeln("<!-- FIXME: Definitions not implemented -->") |
||||
|
|
||||
|
def generate_DefinitionItem(self, e: DefinitionItem): |
||||
|
self.writeln("<!-- FIXME: DefinitionItems not implemented -->") |
||||
|
|
||||
|
def generate_DefinitionList(self, e: DefinitionList): |
||||
|
self.writeln("<!-- FIXME: DefinitionLists not implemented -->") |
Loading…
Reference in new issue