Jan Černohorský
1 year ago
3 changed files with 476 additions and 127 deletions
@ -0,0 +1,299 @@ |
|||
from panflute import Cite, Emph, Image, LineBreak, Link, Math, Note, RawInline, SmallCaps, Str, Strikeout, Subscript, Superscript, Underline |
|||
from panflute import BulletList, Citation, CodeBlock, Definition, DefinitionItem, DefinitionList, Header, HorizontalRule, LineBlock, LineItem, ListItem, Null, OrderedList, Para, Plain, RawBlock, TableBody, TableFoot, TableHead |
|||
from panflute import TableRow, TableCell, Caption, Doc |
|||
from panflute import ListContainer, Element |
|||
from typing import Union, Dict |
|||
|
|||
import re |
|||
import os |
|||
import io |
|||
|
|||
from pygments import highlight |
|||
from pygments.lexers import get_lexer_by_name |
|||
from pygments.formatters import HtmlFormatter |
|||
from pygments.util import ClassNotFound |
|||
|
|||
from .whitespace import NBSP |
|||
from .context import Group |
|||
from .output_generator import Output_generator |
|||
from .katex import KatexClient |
|||
from .images import ImageProcessor |
|||
from .util import inlinify |
|||
|
|||
class HTML_generator(Output_generator): |
|||
def __init__(self, output_file, katex_client: KatexClient, image_processor:ImageProcessor, *args, **kwargs): |
|||
self.katex_client = katex_client |
|||
self.image_processor = image_processor |
|||
super().__init__(output_file, *args, **kwargs) |
|||
|
|||
def generate(self, e: Union[Element, ListContainer]): |
|||
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html": |
|||
return |
|||
super().generate(e) |
|||
|
|||
def htmlescapespecialchars(self, text: str) -> str: |
|||
text = re.sub(re.compile(r"&"), "&", text) |
|||
text = re.sub(re.compile(r"<"), "<", text) |
|||
text = re.sub(re.compile(r">"), "&rt;", text) |
|||
text = re.sub(re.compile(r"\""), """, text) |
|||
text = re.sub(re.compile(r"'"), "'", text) |
|||
# text = re.sub(re.compile(r" '), " ", text) # Don't replace no-break spaces with HTML escapes, because we trust unicode? |
|||
return text |
|||
|
|||
def stag(self, tag: str, attributes: Dict[str,str]={}) -> str: |
|||
words = [tag] |
|||
for key, value in attributes.items(): |
|||
words.append(f"{key}=\"{self.htmlescapespecialchars(value)}\"") |
|||
return "<" + " ".join(words) + ">" |
|||
|
|||
def etag(self, tag: str, attributes: Dict[str,str]={}) -> str: |
|||
return "</" + tag + ">" |
|||
|
|||
def ntag(self, tag: str, attributes: Dict[str,str]={}) -> str: |
|||
return self.stag(tag, attributes) |
|||
|
|||
def tagname(self, e) -> str: |
|||
if isinstance(e, Header): |
|||
return "h" + str(e.level) |
|||
try: |
|||
return { |
|||
BulletList: "ul", |
|||
Doc: "main", |
|||
Emph: "em", |
|||
Caption: "figcaption", |
|||
Para: "p", |
|||
LineBlock: "p", |
|||
LineBreak: "br", |
|||
Link: "a", |
|||
ListItem: "li", |
|||
HorizontalRule: "hr", |
|||
OrderedList: "ol", |
|||
SmallCaps: "span", |
|||
Strikeout: "strike", |
|||
Subscript: "sub", |
|||
Superscript: "sup", |
|||
Underline: "u", |
|||
TableBody: "tbody", |
|||
TableHead: "thead", |
|||
TableFoot: "tfoot", |
|||
TableRow: "tr", |
|||
TableCell: "td", |
|||
}[type(e)] |
|||
except KeyError: |
|||
return type(e).__name__.lower() |
|||
|
|||
def common_attributes(self, e) -> Dict[str,str]: |
|||
attributes = {} |
|||
if hasattr(e, "identifier") and e.identifier != "": |
|||
attributes["id"] = e.identifier |
|||
if hasattr(e, "classes") and len(e.classes) != 0: |
|||
attributes["class"] = " ".join(e.classes) |
|||
return attributes |
|||
|
|||
def generate_Str(self, e: Str): |
|||
self.write(self.htmlescapespecialchars(e.text)) |
|||
|
|||
def generate_NBSP(self, e: NBSP): |
|||
self.write(" ") # Unicode no-break space, because we trust unicode? |
|||
|
|||
def generate_Null(self, e: Null): |
|||
pass |
|||
|
|||
def generate_Doc(self, e: Doc): |
|||
formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default") |
|||
self.writeln("<style>") |
|||
self.writeraw(formatter.get_style_defs(".highlight")) |
|||
self.writeln("</style>") |
|||
self.generate_simple_block_tag(e, "main", self.common_attributes(e)) |
|||
|
|||
def generate_CodeBlock(self, e: CodeBlock): |
|||
if e.classes and len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'): |
|||
# Syntax highlighting using pygments |
|||
for cl in e.classes: |
|||
try: |
|||
lexer = get_lexer_by_name(cl) |
|||
except ClassNotFound: |
|||
continue |
|||
break |
|||
else: |
|||
lexer = None |
|||
print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}") |
|||
formatter = HtmlFormatter(style=e.attributes["style"]) |
|||
result = highlight(e.text, lexer, formatter) |
|||
self.writeraw(result) |
|||
else: |
|||
self.generate_raw_block_tag(e, "pre", self.common_attributes(e)) |
|||
|
|||
def generate_Image(self, e: Image): |
|||
url = e.url |
|||
|
|||
# Attributes → image processor args |
|||
additional_args = {} |
|||
if "file-width" in e.attributes: |
|||
additional_args["width"] = int(e.attributes["file-width"]) |
|||
if "file-height" in e.attributes: |
|||
additional_args["height"] = int(e.attributes["file-height"]) |
|||
if "file-quality" in e.attributes: |
|||
additional_args["quality"] = int(e.attributes["file-quality"]) |
|||
if "file-dpi" in e.attributes: |
|||
additional_args["dpi"] = int(e.attributes["file-dpi"]) |
|||
|
|||
# The directory of the current file, will also look for images there. |
|||
source_dir = e.attributes["source_dir"] |
|||
|
|||
_, ext = os.path.splitext(url) |
|||
ext = ext[1:] |
|||
|
|||
# Conversions between various formats. |
|||
if ext in ["svg", "png", "jpeg", "gif"]: |
|||
# Even supported elements have to be 'converted' because the |
|||
# processing contains finding and moving them to the output |
|||
# directory. |
|||
url = self.image_processor.process_image(url, ext, source_dir, **additional_args) |
|||
elif ext in ["pdf", "epdf"]: |
|||
if not "dpi" in additional_args: |
|||
additional_args["dpi"] = 300 |
|||
url = self.image_processor.process_image(url, "png", source_dir, **additional_args) |
|||
elif ext in ["jpg"]: |
|||
url = self.image_processor.process_image(url, "jpeg", source_dir, **additional_args) |
|||
else: |
|||
url = self.image_processor.process_image(url, "png", source_dir, **additional_args) |
|||
|
|||
# Srcset generation - multiple alternative sizes of images browsers can |
|||
# choose from. |
|||
_, ext = os.path.splitext(url) |
|||
ext = ext[1:] |
|||
srcset = [] |
|||
if ext in ["png", "jpeg"] and (not "no-srcset" in e.attributes or e.attributes["no-srcset"] == False or e.attributes["no-srcset"] == 'False'): |
|||
# This is inspired by @vojta001's blogPhoto shortcode he made for |
|||
# patek.cz: |
|||
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html |
|||
width, height = self.image_processor.get_image_size(url, [self.image_processor.public_dir]) |
|||
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality) |
|||
for size in sizes: |
|||
if width <= size[0] and height <= size[1]: |
|||
srcset.append((f'{self.image_processor.web_path}/{url}', f'{width}w')) |
|||
break |
|||
quality = size[2] if ext == "jpeg" else None |
|||
srcset.append((f'{self.image_processor.web_path}/{self.image_processor.process_image(url, ext, self.image_processor.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w')) |
|||
|
|||
url = self.image_processor.web_path + "/" + url |
|||
|
|||
attributes = self.common_attributes(e) |
|||
if "width" in e.attributes: |
|||
attributes["width"] = e.attributes["width"] |
|||
|
|||
if e.title: |
|||
attributes["alt"] = e.title |
|||
else: |
|||
fake_out = io.StringIO() |
|||
HTML_generator(fake_out, self.katex_client, self.image_processor).generate(e.content) |
|||
attributes["alt"] = fake_out.getvalue() |
|||
|
|||
if len(srcset) != 0: |
|||
attributes["src"] = srcset[-1][0] |
|||
attributes["srcset"] = ", ".join([" ".join(src) for src in srcset]) |
|||
else: |
|||
attributes["src"] = url |
|||
|
|||
img = RawInline(self.ntag("img", attributes)) |
|||
link = Link(img, url=url) |
|||
|
|||
self.generate(link) |
|||
|
|||
def generate_Group(self, e: Group): |
|||
self.katex_client.begingroup() |
|||
self.generate(e.content) |
|||
self.katex_client.endgroup() |
|||
|
|||
def generate_Plain(self, e: Plain): |
|||
self.generate(e.content) |
|||
|
|||
def generate_LineItem(self, e: LineItem): |
|||
self.generate(e.content) |
|||
self.write("<br>") |
|||
self.endln() |
|||
|
|||
# Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before)) |
|||
def generate_Note(self, e: Note): |
|||
inline = inlinify(e) |
|||
tag = self.tagname(e) |
|||
if inline is not None: |
|||
self.write(self.stag(tag)+" (") |
|||
self.generate(inline) |
|||
self.write(") "+self.etag(tag)) |
|||
else: |
|||
self.writeln(self.stag(tag) + "(") |
|||
self.iup() |
|||
self.generate(e.content) |
|||
self.ido() |
|||
self.writeln(self.etag(tag) + ")") |
|||
|
|||
def generate_Math(self, e: Math): |
|||
formats = { |
|||
"DisplayMath": True, |
|||
"InlineMath": False |
|||
} |
|||
self.writeln(self.katex_client.render(e.text, {"displayMode": formats[e.format]})) |
|||
|
|||
def generate_RawInline(self, e: RawInline): |
|||
if e.format == "html": |
|||
self.write(e.text) |
|||
|
|||
def generate_RawBlock(self, e: RawBlock): |
|||
if e.format == "html": |
|||
self.writeraw(e.text) |
|||
|
|||
def generate_Link(self, e: Link): |
|||
attributes = {} |
|||
attributes["href"] = e.url |
|||
if e.title: |
|||
attributes["title"] = e.title |
|||
self.generate_simple_inline_tag(e, self.tagname(e), self.common_attributes(e) | attributes) |
|||
|
|||
def generate_OrderedList(self, e: OrderedList): |
|||
attributes = {} |
|||
if e.start and e.start != 1: |
|||
attributes["start"] = str(e.start) |
|||
html_styles = { |
|||
"Decimal": "1", |
|||
"LowerRoman": "i", |
|||
"UpperRoman:": "I", |
|||
"LowerAlpha": "a", |
|||
"UpperAlpha": "A" |
|||
} |
|||
if e.style and e.style != "DefaultStyle": |
|||
attributes["type"] = html_styles[e.style] |
|||
# FIXME: Delimeter styles: 1. 1) (1) |
|||
self.generate_simple_block_tag(e, self.tagname(e), self.common_attributes(e) | attributes) |
|||
|
|||
def generate_TableCell(self, e: TableCell): |
|||
attributes = self.common_attributes(e) |
|||
if e.colspan != 1: |
|||
attributes["colspan"] = str(e.colspan) |
|||
if e.rowspan != 1: |
|||
attributes["rowspan"] = str(e.rowspan) |
|||
aligns = { |
|||
"AlignLeft": "left", |
|||
"AlignRight": "right", |
|||
"AlignCenter": "center" |
|||
} |
|||
if e.alignment and e.alignment != "AlignDefault": |
|||
attributes["style"] = attributes.get("style", "")+f"text-align: {aligns[e.alignment]};" |
|||
self.generate_simple_block_tag(e, self.tagname(e), attributes) |
|||
|
|||
# These are also disabled in pandoc so they shouldn't appear in the AST at all. |
|||
def generate_Citation(self, e: Citation): |
|||
self.writeln("<!-- FIXME: Citations not implemented -->") |
|||
|
|||
def generate_Cite(self, e: Cite): |
|||
self.writeln("<!-- FIXME: Cites not implemented -->") |
|||
|
|||
def generate_Definition(self, e: Definition): |
|||
self.writeln("<!-- FIXME: Definitions not implemented -->") |
|||
|
|||
def generate_DefinitionItem(self, e: DefinitionItem): |
|||
self.writeln("<!-- FIXME: DefinitionItems not implemented -->") |
|||
|
|||
def generate_DefinitionList(self, e: DefinitionList): |
|||
self.writeln("<!-- FIXME: DefinitionLists not implemented -->") |
Loading…
Reference in new issue