diff --git a/.gitignore b/.gitignore
index 8aa3d05..5663c89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@ public/
*.pdf
*.jpeg
*.svg
+!test/1px.png
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9bdc0ff
--- /dev/null
+++ b/README.md
@@ -0,0 +1,376 @@
+---
+language: en
+highlight-style: native
+---
+
+# Formátítko 2.0
+A python program based on [pandoc](https://pandoc.org/) and its python library
+[panflute](http://scorreia.com/software/panflute) for converting from markdown
+to TeX and HTML with added fancy features like image processing, python-based
+macros and much more.
+
+## Requirements
+This project requires `panflute 2.3.0` that itself requires `pandoc 3.0`. If the
+version of `pandoc` doesn't match, very weird things can happen. ImageMagick and
+Inkscape are used for image processing. Nodejs is used for KaTeX.
+
+## Usage
+```
+usage: formatitko.py [-h] [-l IMG_LOOKUP_DIRS [IMG_LOOKUP_DIRS ...]] [-p IMG_PUBLIC_DIR] [-i IMG_WEB_PATH] [-w OUTPUT_HTML] [-t OUTPUT_TEX] input_filename
+
+positional arguments:
+ input_filename The markdown file to process.
+
+options:
+ -h, --help show this help message and exit
+ -l IMG_LOOKUP_DIRS [IMG_LOOKUP_DIRS ...], --img-lookup-dirs IMG_LOOKUP_DIRS [IMG_LOOKUP_DIRS ...]
+ Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown
+ file. (default: [])
+ -p IMG_PUBLIC_DIR, --img-public-dir IMG_PUBLIC_DIR
+ Directory to put processed images into. The program will not overwrite existing images. (default: public)
+ -i IMG_WEB_PATH, --img-web-path IMG_WEB_PATH
+ Path where the processed images are available on the website. (default: /)
+ -w OUTPUT_HTML, --output-html OUTPUT_HTML
+ The HTML file (for Web) to write into. (default: output.html)
+ -t OUTPUT_TEX, --output-tex OUTPUT_TEX
+ The TEX file to write into. (default: output.tex)
+```
+
+## Format
+Formátítko uses all the default pandoc markdown extensions except for
+definition lists and citations. It also adds its own custom features.
+
+## Features
+
+### Hiding and showing elements based on flags
+
+Flags can be set in the Front Matter or with python code. Then, elements with
+the `if` attribute will only be shown if the flag is set to True and elements
+with the `ifn` attribute will only be show if the flag is not set to True.
+
+**Example:**
+
+```markdown {.group}
+---
+flags:
+ foo: true
+---
+[This will be shown]{if=foo}
+
+[This will not be shown]{if=bar}
+
+[This will be shown]{ifn=bar}
+```
+
+### Including other files
+
+There are two ways of including files.
+
+#### Importing
+The first is importing, which only takes the state (defined commands, metadata,
+etc.) from the file and any content is omitted. This is useful for creating
+libraries of commands. The syntax is as follows:
+
+[#test/empty.md]{}
+
+The curly braces are required for pandoc to parse the import properly and should
+be left empty.
+
+#### Partials
+Partials are the very opposite of imports, they have their own context, which
+inherits everything from the context they're included in, but gets reset after
+the file ends.
+
+:::{partial=test/empty.md}
+:::
+
+If the `untrusted` attribute is set to True, the partial and all its children
+will not be able to define commands or run inline blocks (but it will be able to
+run commands defined in the parent). ^[Please don't trust this for any security
+though, we're playing with *eval* fire, this will never be secure.]
+
+You can also import raw HTML and TeX if you set the `type` attribute of the
+partial to `tex` or `html`.
+
+### Groups
+
+Groups are pieces of markdown with their own sandboxed context, in other words,
+inline partials. They function exactly the same as partials, namely can have
+their own front matter.
+
+```markdown {.group}
+---
+language: cs
+---
+OOOoo český mód
+```
+
+If you need to nest groups or have code blocks inside groups, you can increase
+the amount of backticks around the outer block:
+
+````markdown {.group}
+```go
+fmt.Pritln("owo")
+```
+````
+
+Groups and partials are also enclosed in `\begingroup` and `\endgroup` in the
+output TeX.
+
+### Raw HTML and TeX ^[This is a pandoc feature]
+If raw HTML or TeX is included in the markdown file, it will automagically pop
+out into the respective output file.
+
+red text
+
+\vskip1em
+
+This has the advantage and disadvantage of being very *"automagic"*, which means
+that for instance markdown inside HTML will still get interpreted as markdown.
+It is however very very unreliable, so in most cases, you should use explicit
+raw blocks with the unnamed attribute set to either `html` or `tex`. ^[Still a
+pandoc feature.]
+
+``` {=html}
+red text
+```
+
+### Running python code
+
+Formátítko allows you to run Python code directly from your MD file. Any
+`python` code block with the class `run` will be executed:
+
+#### Context
+
+You can access the current context using the `ctx` variable. The context
+provides read/write access to the FrontMatter metadata. The context has the
+following methods:
+
+`ctx.get_metadata(key: str, simple: bool=True, immediate: bool=False)`
+
+- `key`: The key of the metadatum you want to get. Separate child keys with
+ dots: `ctx.get_metadata("flags.foo")`
+- `simple`: Whether to use python's simple builtin types or panflute's
+ MetaValues. MetaValues can contain formatted text, simple values loose all
+ formatting.
+- `immediate`: Only get metadatum from the current context, not from its
+ parents.
+
+`ctx.set_metadata(key: str, value)`
+
+- `key`: The key of the metadatum you want to get. Separate child keys with
+ dots: `ctx.get_metadata("flags.foo")`
+- `value`: Any value you want to assign to the metadatum
+
+`ctx.unset_metadata(key: str)`
+
+Delete the metadatum in the current context and allow it to inherit the value
+from the parent context.
+
+- `key`: The key of the metadatum you want to get. Separate child keys with
+ dots: `ctx.get_metadata("flags.foo")`
+
+Helper functions for flags exist which work the same as for metadata:
+
+`ctx.is_flag_set(flag: str) -> bool`
+
+`ctx.set_flag(flag: str, val: bool)`
+
+`ctx.unset_flag(flag: str)`
+
+#### Writing output
+
+There are two modes of writing output, plaintext and element-based.
+
+Plaintext mode uses the `print(text: str)` and `println(text: str)` functions,
+that append text to a buffer which is then interpreted as markdown input.
+
+Element-based mode uses the `appendChild(element: pf.Element)` and
+`appendChildren(*elements: List[pf.Element])` functions which allow you to
+append `panflute` elements to a list which is then again interpreted as input.
+The `panflute` library is available as `pf`.
+
+When one of these functions is called, the mode is set and functions from the
+other mode cannot be called within the same block of code.
+
+**Examples:**
+
+````markdown {.group}
+---
+title: Foo
+---
+```python {.run}
+println("*wooo*")
+println()
+println("The title of this file is: " + ctx.get_metadata("title"))
+```
+````
+
+```python {.run}
+appendChild(pf.Para(pf.Strong(pf.Str("foo"))))
+```
+
+### Defining and running commands
+
+Code blocks can be also saved and executed later. Defining is done using the
+`define` attribute:
+
+**Example:**
+
+```python {define=commandname}
+print("foo")
+```
+
+If you try to define the same command twice, you will get an error. To redefine
+a command, use the `define` attribute instead of `redefine`.
+
+### Running defined commands
+
+There are multiple ways of running commands. There is the shorthand way:
+
+[!commandname]{}
+
+Or using the `c` attribute on a span or a div:
+
+[Some content]{c=commandname}
+
+:::{c=commandname}
+Some content
+:::
+
+To access the content or attributes of the div or span the command has been
+called on, the `element` variable is available, which contains the `panflute`
+representation of the element.
+
+**Example:**
+
+```python {define=index}
+appendChild(element.content[int(element.attributes["i"])])
+```
+
+[Pick the third element from this span]{c=index i=2}
+
+### Direct metadata print
+Metadata can be printed directly using a shorthand. The advantage of this is it
+keeps the formatting from the metadatum's definition
+
+```markdown {.group}
+---
+a:
+ b: some text with **strong**
+---
+[$a.b]{}
+```
+
+### Syntax highlighting
+Formátítko uses [pygments](https://pygments.org/) to highlight syntax in code
+blocks. To turn it off for a single block, don't specify a language or set the
+`highlight` attribute to `False`. You can also set the metadatum `highlight` to
+`false` in the FrontMatter to disable it in a given Group. To change the [highlighting
+style](https://pygments.org/styles/), you have to set the `highlight-style`
+metadatum in the **top-level document** this is to prevent the need for many
+inline style definitions.
+
+**Examples:**
+```python
+print("cool")
+```
+
+```zsh {highlight=False}
+./formatitko.py README.md
+```
+
+### Language awareness
+Formátítko is language aware, this means that the `language` metadatum is
+somewhat special. When set using the front matter, it is also popped out to TeX
+as a `\languagexx` macro. Currently supported values are `cs` and `en` for
+internal uses but can be set to anything.
+
+### NBSP
+Formátítko automatically inserts no-break spaces according to its sorta smart
+rules. (See the `whitespace.py` file for more info) These rules **depend on the
+chosen language**. (`cs` has some additional rules)
+
+To insert a literal no-break space, you can either insert the unicode no-break
+space or use the html escape.
+
+Enforcing a breakable space is not as painless, you should insert a zero-width
+space beside the normal space.
+
+### Smart quotes
+Quotes get automatically converted to the slanted ones according to the current
+language.
+
+**Examples:**
+
+```markdown {.group}
+---
+language: cs
+---
+"Uvozovky se v českém testu píší 'jinak' než v angličtině."
+```
+
+"In Czech texts, quotes are written 'differently' than in English"
+
+### Math
+Math blocks get automatically converted to HTML using $Ka\TeX$ and fall out
+unchanged into TeX output.
+
+To make KaTeX as consistent with TeX as possible, the `\begingroup` and
+`\endgroup` that are produced by [Groups](#groups) are also emulated in the
+KaTeX environment, so macro definitions should be isolated as you expect.
+
+### Images
+
+#### Figures
+Pandoc's [implicit
+figures](https://pandoc.org/MANUAL.html#extension-implicit_figures) are enabled,
+so images which are alone in a paragraph are automatically converted to figures:
+
+![A single pixel image, wow!](test/1px.png "This is the alt text shown to screen readers (it defaults to the caption)"){width=10em}
+
+To prevent this, add a backslash at the end of the line with the image:
+
+![A single pixel image, wow!](test/1px.png "This is the alt text shown to screen readers"){width=10em}\
+
+#### Image gathering
+Images are automatically searched for in the directory where each markdown file is
+(including partials) and also in directories listed in the `--lookup-dirs`
+command line parameter. After processing, they're all put into the folder
+specified with `--public-dir`.
+
+#### Image processing
+Images are automatically processed so that they can be successfully used in both
+output formats. This includes generating multiple sizes and providing a
+[srcset](https://developer.mozilla.org/en-US/docs/Learn/HTML/Multimedia_and_embedding/Responsive_images).
+
+To customize this, the `file-width`, `file-height`, `file-dpi`, `file-quality`
+and `no-srcset` attributes are available. All but the last one should be
+integers.
+
+Keep in mind that the processing tries to be as lazy as possible, so it never
+overwrites any files and if it finds the right format or resolution (only
+judging by the filenames) in the lookup directories it will just copy that. This
+means that any automatic attempts at conversion can be overridden by converting
+the file yourself, naming it accordingly and placing it either in the public or
+one of the lookup directories.
+
+## Working with the produced output
+
+### HTML
+The HTML should be almost usable as-is. The styles for synstax-highlighting are
+added automatically. The styles for KaTeX however are not and should be added in
+your `
`^[This is taken directly from [KaTeX's docs](https://katex.org/docs/browser.html)]:
+
+```html
+
+```
+
+Also the output HTML is not intended as a standalone file but should be included
+as part of a larger template. (That includes a doctype, other css, etc.)
+
+### TeX
+The TeX output is not usable as is. Many of the elements are just converted to
+macros, which you have to define yourself. There is an example implementation in
+`formatitko.tex`, which uses LuaTeX and the ucwmac package, but you should
+customize it to your needs (and to the context in which the output is used).
diff --git a/command.py b/command.py
index 358d4a2..1128a17 100644
--- a/command.py
+++ b/command.py
@@ -9,6 +9,8 @@ from mj_show import show
class Command:
pass
+# This distinction is needed because while transforming the tree, inline
+# elements cannot be replaced with block ones
class InlineCommand(Span, Command):
def replaceSelf(self, content: List[Element]) -> Span:
try:
@@ -25,7 +27,8 @@ class BlockCommand(Div, Command):
return Div(*content)
pass
-
+# This function is called in trasform.py, defining a command which can be
+# called later using the function below
def handle_command_define(e: Element, c: Context):
if "define" in e.attributes:
if not c.get_command(e.attributes["define"]):
@@ -38,6 +41,23 @@ def handle_command_define(e: Element, c: Context):
return nullify(e)
return e
+# This function executes commands and inline runnable code blocks (see
+# transform.py for their syntax). Context can be accessed using `ctx` and there
+# are four functions available to create output from these commands and the
+# element the command has been called on (including its .content) can be
+# accessed using `element`. Arguments can be passed down to the comand using
+# the element's attributes.
+#
+# print and println append text to a buffer which is then interpreted as
+# markdown with the current context.
+#
+# appendChild and appendChildren append panflute elements to a list which is
+# then transformed. A command which does nothing looks like this:
+# ```python {define=nop}
+# appendChildren(element.content)
+# ```
+#
+# These two types, appending and printing, cannot be mixed.
def executeCommand(source, element: Element, ctx: Context) -> List[Element]:
mode = 'empty'
@@ -64,6 +84,7 @@ def executeCommand(source, element: Element, ctx: Context) -> List[Element]:
for e in l:
appendChild(e)
+ import panflute as pf
exec(source)
if mode == 'text':
diff --git a/context.py b/context.py
index 89b8b9b..2f48823 100644
--- a/context.py
+++ b/context.py
@@ -1,12 +1,26 @@
-from panflute import Doc
+from panflute import Doc, Div
+from typing import Dict
import os
+
+# This class is used to keep state while transforming the document using
+# transform.py. For the context to be available to the html and TeX generators,
+# individual keys must be manually assigned to the individual elements. This is
+# done in transform.py.
+#
+# The context is also aware of its parent contexts and relevant data (such as
+# metadata and commands) can be read from the closest parent context. Writing
+# only happens to the current one.
+#
+# This class is basically an extension to panflute's doc, this is why metadata
+# is read directly from it.
class Context:
- def __init__(self, doc: Doc, path: str, parent: 'Context'=None):
+ def __init__(self, doc: Doc, path: str, parent: 'Context'=None, trusted: bool=True):
self.parent = parent
self._commands = {}
self.doc = doc
+ self.trusted = trusted
self.path = path
self.dir = os.path.dirname(path) if os.path.dirname(path) != "" else "."
self.filename = os.path.basename(path)
@@ -41,10 +55,10 @@ class Context:
def set_flag(self, flag: str, val: bool):
self.set_metadata("flags."+flag, val)
- def unset_flag(self, flag):
+ def unset_flag(self, flag: str):
self.unset_metadata("flags."+flag)
- def get_metadata(self, key, simple=True, immediate=False):
+ def get_metadata(self, key: str, simple: bool=True, immediate: bool=False):
value = self.doc.get_metadata(key, None, simple)
if value is not None:
return value
@@ -53,7 +67,7 @@ class Context:
else:
return None
- def set_metadata(self, key, value):
+ def set_metadata(self, key: str, value):
if key == "language":
print("WARN: Setting language this way doesn't propagate to TeX. Either use the Front Matter or specify it additionally using the \\languagexx macro.")
meta = self.doc.metadata
@@ -62,10 +76,19 @@ class Context:
meta = meta[k]
meta[key[-1]] = value
- def unset_metadata(self, key):
+ def unset_metadata(self, key: str):
meta = self.doc.metadata
key = key.split(".")
for k in key[:-1]:
meta = meta[k]
del meta.content[key[-1]] # A hack because MetaMap doesn't have a __delitem__
+
+# This is a custom element which creates \begingroup \endgroup groups in TeX
+# and also causes KaTeX math blocks to be isolated in a similar way.
+#
+# Whenever a new context is created, its content should be eclosed in a group and vice-versa.
+class Group(Div):
+ def __init__(self, *args, metadata={}, **kwargs):
+ self.metadata = metadata
+ super().__init__(*args, **kwargs)
diff --git a/formatitko.py b/formatitko.py
index 94dfc04..5d50bd8 100755
--- a/formatitko.py
+++ b/formatitko.py
@@ -9,8 +9,7 @@ import os
# Import local files
from transform import transform
from util import *
-from context import Context
-from group import Group
+from context import Context, Group
from katex import KatexClient
from html import html
from tex import tex
@@ -18,26 +17,46 @@ from images import ImageProcessor
from mj_show import show
-parser = argparse.ArgumentParser()
-parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. By default contains the directory of each MarkDown file.", nargs="+", default=[])
-parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", nargs=1, default="public")
-parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", nargs=1, default="/")
-parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", nargs=1, default="output.html")
-parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", nargs=1, default="output.tex")
-parser.add_argument("input_filename", help="The MarkDown file to process.")
+# Initialize command line arguments
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-l", "--img-lookup-dirs", help="Image lookup directories. When processing images, the program will try to find the image in them first. Always looks for images in the same folder as the markdown file.", nargs="+", default=[])
+parser.add_argument("-p", "--img-public-dir", help="Directory to put processed images into. The program will not overwrite existing images.", default="public")
+parser.add_argument("-i", "--img-web-path", help="Path where the processed images are available on the website.", default="/")
+parser.add_argument("-w", "--output-html", help="The HTML file (for Web) to write into.", default="output.html")
+parser.add_argument("-t", "--output-tex", help="The TEX file to write into.", default="output.tex")
+parser.add_argument("input_filename", help="The markdown file to process.")
+parser.add_argument("--debug", action='store_true')
args = parser.parse_args()
+# TODO: Accept path to unix socket for katexClient, then don't init our own,
+# just connect to an existing one. For formátíking many files in a row.
+# Use panflute to parse the input MD file
doc = import_md(open(args.input_filename, "r").read())
+if args.debug:
+ print(show(doc))
+
+# The language metadatum is important, so it's read before transformation and
+# then attached to a group inside the Doc
language = doc.get_metadata("language", None, True)
context = Context(doc, args.input_filename)
+# Transform the document. This includes all the fancy formatting this software does.
doc = doc.walk(transform, context)
+# Now wrap the document contents in a group, which is able to pop its language
+# setting out to TeX
doc.content = [Group(*doc.content, metadata={"language":language})]
+# Initialize KaTeX client (this runs the node app and connects to a unix socket)
katexClient = KatexClient()
+# Initialize the image processor (this just keeps some basic state)
imageProcessor = ImageProcessor(args.img_public_dir, args.img_web_path, *args.img_lookup_dirs)
+# Generate HTML and TeX out of the transformed document
open(args.output_html, "w").write(html(doc, katexClient, imageProcessor))
open(args.output_tex, "w").write(tex(doc, imageProcessor))
+
+if args.debug:
+ print(show(doc))
+
diff --git a/formatitko.tex b/formatitko.tex
index a5de47d..e48135a 100644
--- a/formatitko.tex
+++ b/formatitko.tex
@@ -21,24 +21,19 @@
\fncount=1
\def\fnmark{\superscript{\the\fncount}}
\def\fn#1{\footnote\fnmark{#1}\advance\fncount by 1}
-
\def\hA#1{{\parskip1em\settextsize{14}\bf #1}}
\def\hB#1{{\parskip1em\settextsize{12}\bf #1}}
\def\hC#1{{\parskip1em\settextsize{10}\bf #1}}
+\def\hD#1{{\parskip1em\settextsize{10}\bi #1}}
\def\hr{{\vskip5pt\hrule\vskip5pt}}
\long\def\blockquote#1{\vskip\lineskip\vskip\parskip\hbox{\vrule\hskip5pt\vbox{#1}}}
-\def\code#1{{\tt #1}}
+\let\code\verbatim
\let\codeblock\verbatim
\def\subscript#1{\leavevmode\lower1pt\hbox{\fiverm#1}}
\def\strikeout#1{FIXME: Strikeout not implemented}
\def\underline#1{FIXME: Underline not implemented}
\def\figure#1#2{\vskip5pt\centerline{#1}\centerline{#2}\vskip5pt}
-\def\caption#1{{\it #1}}
+\def\figcaption#1{{\it #1}}
\let\image\putimage
\def\languagecs{} % KSP should define this to \cze probably
\def\languageen{} % KSP should define this to \eng probably
-\def\table#1{#1}
-\def\tablebody#1{#1}
-\def\tablerow#1{#1}
-\def\tablehead#1{#1}
-\def\tablecell#1{#1}
diff --git a/group.py b/group.py
deleted file mode 100644
index 4e1a203..0000000
--- a/group.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from panflute import Block
-from typing import Dict
-
-class Group(Block):
- def __init__(self, *args, identifier='', classes=[], attributes={}, metadata={}):
- self._set_ica(identifier, classes, attributes)
- self._set_content(args, Block)
- self.metadata = metadata
diff --git a/html.py b/html.py
index 11dde85..17267ed 100644
--- a/html.py
+++ b/html.py
@@ -9,17 +9,21 @@ from whitespace import NBSP
from transform import FQuoted
from katex import KatexClient
from util import inlinify
-from group import Group
+from context import Group
from images import ImageProcessor
def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
+ # `only` attribute which makes transformed elements appear only in tex
+ # output or html output
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "html":
return ""
if isinstance(e, ListContainer):
return ''.join([html(child, k, i, indent_level, indent_str) for child in e])
+ # Bits from which the final element output is built at the end of this
+ # function. Most elements override this by returning their own output.
tag = e.tag.lower()
attributes = ""
content_foot = ""
@@ -28,6 +32,8 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
if isinstance(e, Str):
return e.text.replace(" ", " ")
+ # Most elements fit the general template at the end of the function, just
+ # need their html tag specified.
tags = {
BulletList: "ul",
Doc: "main",
@@ -51,6 +57,7 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
if type(e) in tags:
tag = tags[type(e)]
+ # These are also disabled in pandoc so they shouldn't appear in the AST at all.
not_implemented = {
Citation: True,
Cite: True,
@@ -61,6 +68,7 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
if type(e) in not_implemented:
return f''
+ # Elements which can be represented by a simple string
simple_string = {
NBSP: " ",
Space: " ",
@@ -78,44 +86,68 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
if hasattr(e, "classes") and len(e.classes) != 0:
attributes += f' class="{" ".join(e.classes)}"'
- # TODO: Pass attributes down to HTML too
+ # Attributes are only passed down manually, because we use them internally.
+ # Maybe this should be a blocklist instead of an allowlist?
+ # Overriding elements with their own returns
if isinstance(e, CodeBlock):
- if e.attributes["highlight"] == True or e.attributes["highlight"] == 'True':
+ if len(e.classes) > 0 and (e.attributes["highlight"] == True or e.attributes["highlight"] == 'True'):
+ # Syntax highlighting using pygments
for cl in e.classes:
try:
lexer = get_lexer_by_name(cl)
except ClassNotFound:
continue
break
+ else:
+ print(f"WARN: Syntax highligher does not have lexer for element with these classes: {e.classes}")
formatter = HtmlFormatter(style=e.attributes["style"])
result = highlight(e.text, lexer, formatter)
- style = formatter.get_style_defs(".highlight")
- return f'{result}'
-
+ return f'{result}'
else:
return f'
{e.text}
'
- if isinstance(e, Figure):
- content_foot = html(e.caption, k, i, indent_level+1, indent_str)
-
- if isinstance(e, Caption):
- tag = "figcaption"
+ if isinstance(e, Doc):
+ formatter = HtmlFormatter(style=e.get_metadata("highlight-style") if e.get_metadata("highlight-style") is not None else "default")
+ content_head = f''
if isinstance(e, Image):
url = e.url
+
+ # Attributes → image processor args
+ additional_args = {}
+ if "file-width" in e.attributes:
+ additional_args["width"] = int(e.attributes["file-width"])
+ if "file-height" in e.attributes:
+ additional_args["height"] = int(e.attributes["file-height"])
+ if "file-quality" in e.attributes:
+ additional_args["quality"] = int(e.attributes["file-quality"])
+ if "file-dpi" in e.attributes:
+ additional_args["dpi"] = int(e.attributes["file-dpi"])
+
+ # The directory of the current file, will also look for images there.
source_dir = e.attributes["source_dir"]
+
_, ext = os.path.splitext(url)
ext = ext[1:]
+
+ # Conversions between various formats.
if ext in ["svg", "png", "jpeg", "gif"]:
- url = i.process_image(url, ext, source_dir)
+ # Even supported elements have to be 'converted' because the
+ # processing contains finding and moving them to the output
+ # directory.
+ url = i.process_image(url, ext, source_dir, **additional_args)
elif ext in ["pdf", "epdf"]:
- url = i.process_image(url, "png", source_dir, dpi=300)
+ if not "dpi" in additional_args:
+ additional_args["dpi"] = 300
+ url = i.process_image(url, "png", source_dir, **additional_args)
elif ext in ["jpg"]:
- url = i.process_image(url, "jpeg", source_dir)
+ url = i.process_image(url, "jpeg", source_dir, **additional_args)
else:
- url = i.process_image(url, "png", source_dir)
+ url = i.process_image(url, "png", source_dir, **additional_args)
+ # Srcset generation - multiple alternative sizes of images browsers can
+ # choose from.
_, ext = os.path.splitext(url)
ext = ext[1:]
srcset = []
@@ -123,14 +155,14 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
# This is inspired by @vojta001's blogPhoto shortcode he made for
# patek.cz:
# https://gitlab.com/patek-devs/patek.cz/-/blob/master/themes/patek/layouts/shortcodes/blogPhoto.html
- width, height = i.get_image_size(url, [source_dir, i.public_dir])
+ width, height = i.get_image_size(url, [i.public_dir])
sizes = [(640, 360, 85), (1280, 720, 85), (1920, 1080, 90)] # (widht, height, quality)
for size in sizes:
if width <= size[0] and height <= size[1]:
srcset.append((f'{i.web_path}/{url}', f'{width}w'))
break
quality = size[2] if ext == "jpeg" else None
- srcset.append((f'{i.web_path}/{i.process_image(url, ext, source_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
+ srcset.append((f'{i.web_path}/{i.process_image(url, ext, i.public_dir, width=size[0], height=size[1], quality=quality)}', f'{size[0]}w'))
url = i.web_path + "/" + url
@@ -140,58 +172,17 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
else:
return f''
- if isinstance(e, Header):
- tag = "h"+str(e.level)
-
- if isinstance(e, Link):
- tag = "a"
- attributes += f' href="{e.url}"'
- if e.title:
- attributes += f' title="{e.title}"'
-
+ # See https://pandoc.org/MANUAL.html#line-blocks
if isinstance(e, LineItem):
return indent_level*indent_str + html(e.content, k, i) + " \n"
+ # Footnotes are placed into parentheses. (And not footnotes (This is how KSP did it before me))
if isinstance(e, Note):
content_head = "("
content_foot = ")"
if inlinify(e) is not None:
return f' ({html(inlinify(e), k, i, 0, "")})'
- if isinstance(e, OrderedList):
- tag = "ol"
- if e.start and e.start != 1:
- attributes += f' start="{e.start}"'
- html_styles = {
- "Decimal": "1",
- "LowerRoman": "i",
- "UpperRoman:": "I",
- "LowerAlpha": "a",
- "UpperAlpha": "A"
- }
- if e.style and e.style != "DefaultStyle":
- attributes += f' type="{html_styles[e.style]}"'
- # FIXME: Delimeter styles
-
- if isinstance(e, Table):
- content_head = html(e.head, k, i, indent_level+1, indent_str)
- content_foot = html(e.foot, k, i, indent_level+1, indent_str)
- # FIXME: Fancy pandoc tables, using colspec
-
- if isinstance(e, TableCell):
- tag = "td"
- if e.colspan != 1:
- attributes += f' colspan="{e.colspan}"'
- if e.rowspan != 1:
- attributes += f' rowspan="{e.rowspan}"'
- aligns = {
- "AlignLeft": "left",
- "AlignRight": "right",
- "AlignCenter": "center"
- }
- if e.alignment and e.alignment != "AlignDefault":
- attributes += f' style="text-align: {aligns[e.alignment]}"'
-
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
@@ -222,9 +213,6 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
"DisplayMath": True,
"InlineMath": False
}
- # FIXME: Currently, all bits of math are isolated from each other, this
- # means that \defs and and alike work only inside a single math block
- # and are forgotten in the next one.
return indent_level*indent_str + k.render(e.text, {"displayMode": formats[e.format]})
if isinstance(e, RawInline):
@@ -239,6 +227,62 @@ def html(e: Element, k: KatexClient, i: ImageProcessor, indent_level: int=0, ind
else:
return ""
+
+ # Non-overriding elements, they get generated using the template at the end
+ # of this function
+ if isinstance(e, Header):
+ tag = "h"+str(e.level)
+
+ if isinstance(e, Figure):
+ content_foot = html(e.caption, k, i, indent_level+1, indent_str)
+
+ if isinstance(e, Caption):
+ tag = "figcaption"
+
+ if isinstance(e, Link):
+ tag = "a"
+ attributes += f' href="{e.url}"'
+ if e.title:
+ attributes += f' title="{e.title}"'
+
+ if isinstance(e, OrderedList):
+ tag = "ol"
+ if e.start and e.start != 1:
+ attributes += f' start="{e.start}"'
+ html_styles = {
+ "Decimal": "1",
+ "LowerRoman": "i",
+ "UpperRoman:": "I",
+ "LowerAlpha": "a",
+ "UpperAlpha": "A"
+ }
+ if e.style and e.style != "DefaultStyle":
+ attributes += f' type="{html_styles[e.style]}"'
+ # FIXME: Delimeter styles
+
+ if isinstance(e, Table):
+ content_head = html(e.head, k, i, indent_level+1, indent_str)
+ content_foot = html(e.foot, k, i, indent_level+1, indent_str)
+ # FIXME: Fancy pandoc tables, using colspec
+
+ if isinstance(e, TableCell):
+ tag = "td"
+ if e.colspan != 1:
+ attributes += f' colspan="{e.colspan}"'
+ if e.rowspan != 1:
+ attributes += f' rowspan="{e.rowspan}"'
+ aligns = {
+ "AlignLeft": "left",
+ "AlignRight": "right",
+ "AlignCenter": "center"
+ }
+ if e.alignment and e.alignment != "AlignDefault":
+ attributes += f' style="text-align: {aligns[e.alignment]}"'
+
+ # The default which all non-overriding elements get generated by. This
+ # includes elements, which were not explicitly mentioned in this function,
+ # e. g. Strong
+
if isinstance(e, Inline):
return f'<{tag}{attributes}>{content_head}{html(e.content, k, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{tag}>'
diff --git a/images.py b/images.py
index 132912a..aee0b4a 100644
--- a/images.py
+++ b/images.py
@@ -2,6 +2,7 @@ from typing import List
import os
import shutil
import subprocess
+from PIL import Image
class ImageProcessor:
def __init__(self, public_dir: str, web_path: str, *lookup_dirs: List[str]):
@@ -18,7 +19,8 @@ class ImageProcessor:
full_path = self.find_image(input_filename, [source_dir])
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
-
+
+ # Generate filename from arguments
suffix = ""
geometry = None
if width is not None or height is not None:
@@ -28,24 +30,32 @@ class ImageProcessor:
suffix += f'_q{quality}'
target_name = base+suffix+"."+format
target_path = self.public_dir + "/" + target_name
-
+
+ # Only regenerate if the file doesn't already exist.
if not os.path.isfile(target_path):
- if (((ext == format and width)
+
+ # If the format is the same or it is just a different extension for
+ # the same format, just copy it.
+ if (((ext == format)
or (ext == "epdf" and format == "pdf")
or (ext == "jpg" and format == "jpeg"))
and width is None and height is None and quality is None and dpi is None):
shutil.copyfile(full_path, target_path)
+ # Try to find the converted filename in lookup_dirs, if you find
+ # it, don't convert, just copy.
elif self.find_image(target_name, [source_dir]):
shutil.copyfile(self.find_image(target_name, [source_dir]), target_path)
+ # Convert SVGs using inkscape
elif ext == "svg":
width_arg = ['--export-width', str(width)] if width is not None else []
height_arg = ['--export-height', str(height)] if height is not None else []
dpi_arg = ['--export-dpi', str(dpi)] if dpi is not None else []
if subprocess.run(['inkscape', full_path, '-o', target_path, *width_arg, *height_arg, *dpi_arg]).returncode != 0:
raise Exception(f"Could not convert '{full_path}' to '{format}'")
-
+
+ # Convert everything else using ImageMagick.
else:
resize_arg = ['-resize', str(geometry)] if geometry is not None else []
density_arg = ['-density', str(dpi)] if dpi is not None else []
@@ -60,7 +70,8 @@ class ImageProcessor:
full_path = self.find_image(input_filename, additional_dirs)
if full_path is None:
raise FileNotFoundError(f'Image {input_filename} not found.')
- return (int(x) for x in subprocess.run(['convert', full_path, '-print', '%w %h\\n', '/dev/null'], capture_output=True).stdout.split(b" "))
+ # Getting image size using ImageMagick is slow. VERY
+ return Image.open(full_path).size
def find_image(self, input_filename: str, additional_dirs: List[str]=[]) -> str:
diff --git a/katex-server/index.mjs b/katex-server/index.mjs
index 141e5a0..6beddf2 100644
--- a/katex-server/index.mjs
+++ b/katex-server/index.mjs
@@ -78,10 +78,16 @@ function socketWrite(socket, data) {
async function handleClient(client) {
const rl = readline.createInterface({ input: client })
+ /* Added by GS: A stack of katex's `macros` objects, each group inherits
+ * the one from the parent group and can add its own stuff without
+ * affecting the parent.
+ */
const macroStack = [{}]
for await (const line of rl) {
try {
+ // The custom commands for pushing and popping the macro stack.
if (line === "begingroup") {
+ // Copy the current state of macros and push it onto the stack.
macroStack.push({...macroStack.slice(-1)[0]})
continue
} else if (line === "endgroup") {
@@ -92,12 +98,16 @@ async function handleClient(client) {
const results = []
for (const input of query.formulas) {
const options = input.options ?? query.options ?? defaultOptions
+ // Add macros from the macros option
if (options.macros) {
for (const macro of Object.keys(options.macros)) {
macroStack.slice(-1)[macro] = options.macros[macro]
}
}
options.macros = macroStack.slice(-1)[0]
+ // Enforce globalGroup option, katex then saves created macros
+ // into the options.macros object.
+ options.globalGroup = true
try {
const html = katex.renderToString(input.tex, options)
results.push({ html })
diff --git a/katex.py b/katex.py
index 7879e7e..c0aadea 100644
--- a/katex.py
+++ b/katex.py
@@ -4,6 +4,7 @@ import tempfile
import json
import os
from typing import Dict
+import time
class KatexError(Exception):
@@ -11,26 +12,37 @@ class KatexError(Exception):
class KatexClient:
def __init__(self):
- self._client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ # Create temporary directory for socket
self._temp_dir = tempfile.TemporaryDirectory(prefix='formatitko')
self._socket_file = self._temp_dir.name + "/katex-socket"
+
self._server_process = subprocess.Popen(["node", os.path.dirname(os.path.realpath(__file__)) + "/katex-server/index.mjs", self._socket_file])
+
+ self._client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+
+ # Wait for the node program to create the socket file
while not os.path.exists(self._socket_file):
- pass
+ time.sleep(0.01)
+
+ # Wait for the node program to start accepting connections
while True:
try:
self._client.connect(self._socket_file)
+ time.sleep(0.01)
except ConnectionRefusedError:
continue
break
def render(self, tex: str, options: Dict={}):
- options["globalGroup"] = True
+ # Send formulas to translate
self._client.sendall((json.dumps({"formulas":[{"tex":tex}], "options":options})+"\n").encode("utf-8"))
- data = self._client.recv(1024)
+
+ # Receive response
+ data = self._client.recv(4096)
while data[-1] != 0x0a:
data += self._client.recv(128)
response = json.loads(data)
+
if "error" in response:
raise Exception(response["error"])
if "error" in response["results"][0]:
@@ -38,6 +50,7 @@ class KatexClient:
else:
return response["results"][0]["html"]
+ # Special commands implemented in the JS file for grouping defs together.
def begingroup(self):
self._client.sendall("begingroup\n".encode("utf-8"))
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..250abf8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+Pygments==2.14.0
+panflute==2.3.0
+fontTools==4.38.0
+Pillow==9.4.0
diff --git a/test.md b/test.md
deleted file mode 100644
index 692aaa1..0000000
--- a/test.md
+++ /dev/null
@@ -1,162 +0,0 @@
----
-title: 'Wooooo a title'
-subtitle: 'A subtitle'
-are_we_there_yet: False
-language: "en"
----
-[#test-import.md]{}
-
-# Hello world!
-
-This is an *example* **yay**!
-
-This is *very **strongly** emphasised*
-
-Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký kůň pěl dábelské ódy.* **Příliš žluťoučký kůň pěl dábelské ódy.** ***Příliš žluťoučký kůň pěl dábelské ódy.***
-
-
-:::{partial=test-partial.md}
-:::
-
-:::{if=cat}
-This should only be shown to cats
-:::
-
-
-```python {.run}
-ctx.set_flag("cat", True)
-```
-
-```python {.run}
-println(f"The main document's title is '{ctx.get_metadata('title')}'")
-ctx.set_metadata("a", {})
-ctx.set_metadata("a.b", {})
-ctx.set_metadata("a.b.c", "Bruh **bruh** bruh")
-```
-
-```python {style=native}
-def bruh(no):
- wat
-```
-
-Inline `code`
-
-::::{if=cat}
-This should only be shown to cats the second time
-::::
-
-# [$are_we_there_yet]{}
-
-![This is a figure, go figure...](/tmp/logo.pdf)
-
-![This is a figure, go figure...](/tmp/logo.jpg){width=10em}
-
-![This is a figure, go figure...](/tmp/logo.png){width=10em}
-
-![Fakt epesní reproduktor](/tmp/reproduktor.jpeg){width=10em}
-
-```python {.run}
-ctx.set_metadata("language", "cs")
-```
-[!opendatatask]{}
-```python {.run}
-ctx.set_metadata("language","en")
-```
-[This too!]{if=cat}
-
-[What]{.co}
-
-[An inline command with contents and **bold** and another [!nop]{} inside!]{c=nop}
-
-[!nop]{a=b}
-
-> OOO a blockquote mate init
->
->> Nesting??
->> Woah
-
-A non-breakable space bro
-
-A lot of spaces
-
-A text with some inline math: $\sum_{i=1}^nn^2$. Plus some display math:
-
-A link with the link in the link:
-
-H~2~O is a liquid. 2^10^ is 1024.
-
-[Underline]{.underline}
-
-:::{only=html}
-$$
-\def\eqalign#1{\begin{align*}#1\end{align*}}
-$$
-:::
-
-$$
-\eqalign{
- 2 x_2 + 6 x_3 &= 14 \cr
- x_1 - 3 x_2 + 2 x_3 &= 5 \cr
- -x_1 + 4 x_2 + \phantom{1} x_3 &= 2
-}
-$$
-
-:::{partial=test-partial.md}
-:::
-
----
-
-This should be seen by all.^[This is a footnote]
-
-| Matematicko-fyzikální fakulta University Karlovy
-| Malostranské nám. 2/25
-| 118 00 Praha 1
-
-More footnotes.^[I am a foot]
-
-To Do:
-
-- buy eggs
-- buy milk
-- ???
-- profit
- - also create sublists preferrably
-
-1. Woah
-2. Wooo
-3. no
-
-4) WOO
-
-``` {=html}
-
-```
-
-#. brum
-#. BRUHHH
-#. woah
-
-i. bro
-ii. wym bro
-
-
-+---------------------+-----------------------+
-| Location | Temperature 1961-1990 |
-| | in degree Celsius |
-+---------------------+-------+-------+-------+
-| | min | mean | max |
-+=====================+=======+=======+======:+
-| Antarctica | -89.2 | N/A | 19.8 |
-+---------------------+-------+-------+-------+
-| Earth | -89.2 | 14 | 56.7 |
-+---------------------+-------+-------+-------+
-
-------- ------ ---------- -------
- 12 12 12 12
- 123 123 123 123
- 1 1 1 1
-------- ------ ---------- -------
-
diff --git a/test/1px.png b/test/1px.png
new file mode 100644
index 0000000..4edadd3
Binary files /dev/null and b/test/1px.png differ
diff --git a/test/Makefile b/test/Makefile
index 455c41e..58842e5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,10 +1,10 @@
-all: test.pdf public/test.html
+all: test.pdf public/index.html
output.tex output.html:
../formatitko.py test.md
-public/test.html: output.html
- cat test-top.html output.html > public/test.html
+public/index.html: output.html
+ cat test-top.html output.html > public/index.html
test.tex: output.tex
cat test-top.tex output.tex > test.tex
diff --git a/test/empty.md b/test/empty.md
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/test/empty.md
@@ -0,0 +1 @@
+
diff --git a/test/test-files/evil.md b/test/test-files/evil.md
new file mode 100644
index 0000000..a27c03d
--- /dev/null
+++ b/test/test-files/evil.md
@@ -0,0 +1,8 @@
+---
+title: "I am a little evil md file hehe"
+---
+```python {.run}
+import sys
+sys.exit(666)
+```
+I am very innocent wym bro :(
\ No newline at end of file
diff --git a/test/test-files/test-partial.md b/test/test-files/test-partial.md
index bdf1c1e..6450237 100644
--- a/test/test-files/test-partial.md
+++ b/test/test-files/test-partial.md
@@ -1,6 +1,5 @@
---
title: A subfile!
-language: "cs"
---
I am a little piece of content
@@ -23,21 +22,18 @@ println()
println(f"The subdocument's subtitle is \n\n## {ctx.get_metadata('subtitle')}")
```
-``` {.python .run}
-ctx.set_metadata("language", "cs")
-```
-
+```markdown {.group}
+---
+language: "cs"
+---
Tak toto je "v prádelně" pánové!
-
-``` {.python .run}
-ctx.set_metadata("language", "en")
```
+```markdown {.group}
+---
+language: "en"
+---
This is "in a laundry room" gentlemen!
-
-
-``` {.python .run}
-ctx.unset_metadata("language")
```
I am a duck.
@@ -63,5 +59,5 @@ $$
![Fakt epesní reproduktor](reproduktor.jpeg){width=10em}
-![Fakt epesní reproduktor](reproduktor.png){width=10em}
+![Fakt epesní reproduktor](reproduktor.png "Hodně rozpixelovaný obrázek reproduktoru"){width=10em file-width=1000}
diff --git a/test/test.md b/test/test.md
index b82831e..39b587e 100644
--- a/test/test.md
+++ b/test/test.md
@@ -14,6 +14,8 @@ This is *very **strongly** emphasised*
Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký kůň pěl dábelské ódy.* **Příliš žluťoučký kůň pěl dábelské ódy.** ***Příliš žluťoučký kůň pěl dábelské ódy.***
+:::{partial=test-files/evil.md untrusted=True}
+:::
:::{partial=test-files/test-partial.md}
:::
@@ -22,7 +24,6 @@ Příliš žluťoučký kůň pěl dábelské ódy. *Příliš žluťoučký ků
This should only be shown to cats
:::
-
```python {.run}
ctx.set_flag("cat", True)
```
@@ -47,13 +48,18 @@ This should only be shown to cats the second time
# [$are_we_there_yet]{}
-```python {.run}
-ctx.set_metadata("language", "cs")
-```
+```markdown {.group}
+---
+language: cs
+---
+V pravém jízdním bruhu.
+V pravém jízdním bruhu.
+V pravém jízdním bruhu.
+V pravém jízdním bruhu.
+
[!opendatatask]{}
-```python {.run}
-ctx.set_metadata("language","en")
```
+
[This too!]{if=cat}
[What]{.co}
diff --git a/tex.py b/tex.py
index fb97d8f..238255e 100644
--- a/tex.py
+++ b/tex.py
@@ -4,18 +4,22 @@ import os
from whitespace import NBSP
from transform import FQuoted
from util import inlinify
-from group import Group
+from context import Group
from images import ImageProcessor
# Heavily inspired by: git://git.ucw.cz/labsconf2022.git
def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t") -> str:
-
+
+ # `only` attribute which makes transformed elements appear only in tex
+ # output or html output
if hasattr(e, "attributes") and "only" in e.attributes and e.attributes["only"] != "tex":
return ""
if isinstance(e, ListContainer):
return ''.join([tex(child, i, indent_level, indent_str) for child in e])
+ # Bits from which the final element output is built at the end of this
+ # function. Most elements override this by returning their own output.
content_foot = ""
content_head = ""
@@ -31,7 +35,7 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
if type(e) in tags:
tag = tags[type(e)]
-
+ # These are also disabled in pandoc so they shouldn't appear in the AST at all.
not_implemented = {
Citation: True,
Cite: True,
@@ -41,7 +45,8 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
}
if type(e) in not_implemented:
return f'% FIXME: {type(e)}s not implemented \n'
-
+
+ # Elements which can be represented by a simple string
simple_string = {
NBSP: "~",
Space: " ",
@@ -53,12 +58,61 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
if type(e) in simple_string:
return simple_string[type(e)]
+ # Simplest basic elements
if isinstance(e, Str):
- return e.text.replace(" ", "~").replace(" ", "~")
+ return e.text.replace(" ", "~")
if isinstance(e, Para):
return tex(e.content, i, 0, "")+"\n\n"
+ if isinstance(e, Span) or isinstance(e, Plain):
+ return tex(e.content, i, 0, "")
+
+ # Overriding elements with their own returns
+ if isinstance(e, Image):
+ url = e.url
+
+ # Attributes → image processor args
+ additional_args = {}
+ if "file-width" in e.attributes:
+ additional_args["width"] = int(e.attributes["file-width"])
+ if "file-height" in e.attributes:
+ additional_args["height"] = int(e.attributes["file-height"])
+ if "file-quality" in e.attributes:
+ additional_args["quality"] = int(e.attributes["file-quality"])
+ if "file-dpi" in e.attributes:
+ additional_args["dpi"] = int(e.attributes["file-dpi"])
+
+ # The directory of the current file, will also look for images there.
+ source_dir = e.attributes["source_dir"]
+
+ _, ext = os.path.splitext(url)
+ ext = ext[1:]
+
+ # Conversions between various formats.
+ if ext in ["pdf", "png", "jpeg"]:
+ # Even supported elements have to be 'converted' because the
+ # processing contains finding and moving them to the output
+ # directory.
+ url = i.process_image(url, ext, source_dir, relative=False, **additional_args)
+ elif ext in ["svg"]:
+ url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
+ elif ext in ["epdf"]:
+ url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
+ elif ext in ["jpg"]:
+ url = i.process_image(url, "jpeg", source_dir, relative=False, **additional_args)
+ else:
+ url = i.process_image(url, "pdf", source_dir, relative=False, **additional_args)
+
+ width = ""
+ if "width" in e.attributes:
+ width = e.attributes["width"]
+ # 50% → 0.5\hsize
+ if e.attributes["width"][-1] == "%":
+ width = str(int(e.attributes["width"][:-1])/100) + "\\hsize"
+ width = "width " + width
+ return f'\\image{{{width}}}{{{url}}}'
+
if isinstance(e, FQuoted):
if e.style == "cs":
if e.quote_type == "SingleQuote":
@@ -78,81 +132,24 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
else:
return f'"{tex(e.content, i, 0, "")}"'
- if isinstance(e, BulletList):
- tag = "list"
- open = ""
- arguments = "{o}"
- close = "\\endlist"
-
- if isinstance(e, OrderedList):
- tag = "list"
- open = ""
- styles = {
- "DefaultStyle": "n",
- "Decimal": "n",
- "LowerRoman": "i",
- "UpperRoman:": "I",
- "LowerAlpha": "a",
- "UpperAlpha": "A"
- }
- style = styles[e.style]
- delimiters = {
- "DefaultDelim": f"{style}.",
- "Period": f"{style}.",
- "OneParen": f"{style})",
- "TwoParens": f"({style})"
- }
- style = delimiters[e.delimiter]
- arguments = f"{{{style}}}"
- close = "\\endlist"
- # FIXME: Starting number of list
-
- if isinstance(e, Image):
- url = e.url
- source_dir = e.attributes["source_dir"]
- _, ext = os.path.splitext(url)
- ext = ext[1:]
- if ext in ["pdf", "png", "jpeg"]:
- url = i.process_image(url, ext, source_dir, relative=False)
- elif ext in ["svg"]:
- url = i.process_image(url, "pdf", source_dir, relative=False)
- elif ext in ["epdf"]:
- url = i.process_image(url, "pdf", source_dir, relative=False)
- elif ext in ["jpg"]:
- url = i.process_image(url, "jpeg", source_dir, relative=False)
- else:
- url = i.process_image(url, "pdf", source_dir, relative=False)
- width = ""
- if "width" in e.attributes:
- width = e.attributes["width"]
- if e.attributes["width"][-1] == "%":
- width = str(int(e.attributes["width"][:-1])/100) + "\\hsize"
- width = "width " + width
- return f'\\image{{{width}}}{{{url}}}'
+ if isinstance(e, Code):
+ return f'\\verb`{e.text.replace("`", "backtick")}`'
if isinstance(e, Figure):
return f'\\figure{{{tex(e.content, i, indent_level+1, indent_str)}}}{{{tex(e.caption, i, indent_level+1, indent_str)}}}\n\n'
-
+
+ # Figure caption
if isinstance(e, Caption):
if inlinify(e) is not None:
- return f'\\caption{{{tex(e.content, i, 0, "")}}}'
-
- if isinstance(e, ListItem):
- tag = ":"
-
- if isinstance(e, Link):
- if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url:
- tag = "url"
- else:
- tag = "linkurl"
- arguments = f'{{{e.url}}}'
+ return f'\\figcaption{{{tex(e.content, i, 0, "")}}}'
if isinstance(e, Math):
if e.format == "DisplayMath":
return f'$${e.text}$$\n'
else:
return f'${e.text}$'
-
+
+ # Footnote
if isinstance(e, Note):
tag = "fn"
if inlinify(e) is not None:
@@ -189,27 +186,71 @@ def tex(e: Element, i: ImageProcessor, indent_level: int=0, indent_str: str="\t"
else:
return ""
- if isinstance(e, Span) or isinstance(e, Plain):
- return tex(e.content, i, 0, "")
+ # See https://pandoc.org/MANUAL.html#line-blocks
+ if isinstance(e, LineBlock):
+ return f'{tex(e.content, i, indent_level+1, indent_str)}\n'
if isinstance(e, LineItem):
return tex(e.content, i, 0, "") + ("\\\\\n" if e.next else "\n")
- if isinstance(e, LineBlock):
- return f'{tex(e.content, i, indent_level+1, indent_str)}\n'
+ if type(e) is Div:
+ return f'{tex(e.content, i, indent_level+1, indent_str)}'
+
+ if isinstance(e, Doc):
+ return tex(e.content, i, indent_level, indent_str)+"\n\\bye" # Is having the \bye a bad idea here?
- if isinstance(e, Group):
+
+ # Non-overriding elements, they get generated using the template at the end
+ # of this function
+ if isinstance(e, BulletList):
+ tag = "list"
+ open = ""
+ arguments = "{o}"
+ close = "\\endlist"
+
+ elif isinstance(e, OrderedList):
+ tag = "list"
+ open = ""
+ styles = {
+ "DefaultStyle": "n",
+ "Decimal": "n",
+ "LowerRoman": "i",
+ "UpperRoman:": "I",
+ "LowerAlpha": "a",
+ "UpperAlpha": "A"
+ }
+ style = styles[e.style]
+ delimiters = {
+ "DefaultDelim": f"{style}.",
+ "Period": f"{style}.",
+ "OneParen": f"{style})",
+ "TwoParens": f"({style})"
+ }
+ style = delimiters[e.delimiter]
+ arguments = f"{{{style}}}"
+ close = "\\endlist"
+ # FIXME: Starting number of list
+
+ elif isinstance(e, ListItem):
+ tag = ":"
+
+ elif isinstance(e, Link):
+ if len(e.content) == 1 and isinstance(e.content[0], Str) and e.content[0].text == e.url:
+ tag = "url"
+ else:
+ tag = "linkurl"
+ arguments = f'{{{e.url}}}'
+
+ elif isinstance(e, Group):
tag = "begingroup"
open = ""
if "language" in e.metadata and e.metadata["language"] is not None:
open = "\\language"+e.metadata["language"]
close = "\\endgroup"
-
- if isinstance(e, Div):
- return f'{tex(e.content, i, indent_level+1, indent_str)}'
-
- if isinstance(e, Doc):
- return tex(e.content, i, indent_level, indent_str)+"\n\\bye"
+
+ # The default which all non-overriding elements get generated by. This
+ # includes elements, which were not explicitly mentioned in this function,
+ # e. g. Strong, Emph...
if isinstance(e, Inline):
return f'\\{tag}{arguments}{open}{content_head}{tex(e.content, i, 0, "") if hasattr(e, "_content") else ""}{e.text if hasattr(e, "text") else ""}{content_foot}{close}'
diff --git a/transform.py b/transform.py
index 56843fa..b13f565 100644
--- a/transform.py
+++ b/transform.py
@@ -6,16 +6,22 @@ from whitespace import *
from command import *
from util import *
from context import *
-from group import Group
+
+# This is a small extension to the Quoted panflute elements which allows to
+# have language-aware quotation marks.
class FQuoted(Quoted):
def __init__(self, *args, **kwargs):
self.style = kwargs["style"]
del kwargs["style"]
super().__init__(*args, **kwargs)
-def transform(e: Element, c: Context) -> Element: # Returns next sibling element to transform
- """Transform the AST, making format-agnostic changes."""
+
+# This is where tha magic happens. This function transforms a single element,
+# to transform the entire tree, panflute's walk should be used.
+def transform(e: Element, c: Context) -> Element:
+
+ # Determine if this space should be non-breakable. See whitespace.py.
if isinstance(e, Whitespace) and bavlna(e, c):
e = NBSP()
@@ -29,24 +35,48 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
if c.is_flag_set(e.attributes["ifn"]):
return nullify(e)
- # `c` attribute. Execute a command with the name saved in this attribute.
+ # There are multiple ways to call a command so we turn it into a
+ # unified element first and then call it at the end. This handles the
+ # []{c=commandname} and
+ # :::{c=commandname}
+ # :::
+ # syntax.
if (isinstance(e, Div) or isinstance(e, Span)) and "c" in e.attributes:
if isinstance(e, Div):
e = BlockCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
else:
e = InlineCommand(*e.content, identifier=e.identifier, classes=e.classes, attributes=e.attributes)
- # `partial` attribute.
- # This is for including content from files with their own flags and
- # commands without affecting the state of the current document.
- if (isinstance(e, Div)) and "partial" in e.attributes:
- includedDoc = import_md(open(c.dir + "/" + e.attributes["partial"], "r").read())
- nContext = Context(includedDoc, e.attributes["partial"], c)
- language = includedDoc.get_metadata("language")
- includedDoc = includedDoc.walk(transform, nContext)
- e = Group(*includedDoc.content, metadata={"language": language})
-
-
+ # Isolated subdocuments using Group and a different Context. Can be
+ # separate files (using attribute `partial`) or be inline using the
+ # following syntax:
+ # ```markdown {.group}
+ # * file content *
+ # ```
+ # Both can contain their own metadata in a FrontMatter (YAML header)
+ if (isinstance(e, Div) and "partial" in e.attributes)\
+ or (isinstance(e, CodeBlock) and "markdown" in e.classes and "group" in e.classes):
+ if isinstance(e, Div):
+ text = open(c.dir + "/" + e.attributes["partial"], "r").read()
+ path = c.dir + "/" + e.attributes["partial"]
+ else:
+ text = e.text
+ path = c.path
+ if "type" in e.attributes and e.attributes["type"] in ["tex", "html"]:
+ e = RawBlock(text, e.attributes["type"])
+ else:
+ includedDoc = import_md(text)
+ trusted = True
+ if "untrusted" in e.attributes and (e.attributes["untrusted"] == True or e.attributes["untrusted"] == 'True'):
+ trusted = False
+ if not c.trusted:
+ trusted = False
+ nContext = Context(includedDoc, path, c, trusted=trusted)
+ language = includedDoc.get_metadata("language")
+ includedDoc = includedDoc.walk(transform, nContext)
+ e = Group(*includedDoc.content, metadata={"language": language})
+
+ # Transform panflute's Quoted to custom FQuoted, see above.
if isinstance(e, Quoted):
quote_styles = {
"cs": "cs",
@@ -55,45 +85,66 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
None: None
}
e = FQuoted(*e.content, quote_type=e.quote_type, style=quote_styles[c.get_metadata("language")])
-
+
if isinstance(e, Image):
+ # Pass down the directory of the current source file for finding image
+ # files.
e.attributes["source_dir"] = c.dir
+ # Pass down "no-srcset" metadatum as attribute down to images.
if not "no-srcset" in e.attributes:
e.attributes["no-srcset"] = c.get_metadata("no-srcset") if c.get_metadata("no-srcset") is not None else False
- # Execute python code inside source code block
+ # Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks
+ if isinstance(e, CodeBlock):
+ if not "highlight" in e.attributes:
+ e.attributes["highlight"] = c.get_metadata("highlight") if c.get_metadata("highlight") is not None else True
+ if not "style" in e.attributes:
+ e.attributes["style"] = c.get_metadata("highlight-style") if c.get_metadata("highlight-style") is not None else "default"
+ e.attributes["noclasses"] = False
+ else:
+ e.attributes["noclasses"] = True
+
+ # Execute python code inside source code block. Works the same as commands.
+ # Syntax:
+ # ```python {.run}
+ # print("woo")
+ # ```
if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and "run" in e.classes:
+ if not c.trusted:
+ return nullify(e)
e = Div(*executeCommand(e.text, None, c))
e = e.walk(transform, c)
- ## Command defines
- # possible TODO: def/longdef?
+ # Command defines for calling using BlockCommand and InlineCommand. If
+ # redefine is used instead of define, the program doesn't check if the
+ # command already exists.
+ # Syntax:
+ # ```python {define=commandname}
+ # print(wooo)
+ # ```
if isinstance(e, CodeBlock) and hasattr(e, "classes") and "python" in e.classes and hasattr(e, "attributes")\
and ("define" in e.attributes or "redefine" in e.attributes):
+ if not c.trusted:
+ return nullify(e)
e = handle_command_define(e, c)
- # Pass down metadata 'highlight' and 'highlight_style' as attribute to CodeBlocks
- if isinstance(e, CodeBlock):
- if not "highlight" in e.attributes:
- e.attributes["highlight"] = c.get_metadata("highlight") if c.get_metadata("highlight") is not None else True
- if not "style" in e.attributes:
- e.attributes["style"] = c.get_metadata("highlight_style") if c.get_metadata("highlight_style") is not None else "default"
-
## Shorthands
+ # Shorter (and sometimes the only) forms of certain features
if isinstance(e, Span) and len(e.content) == 1 and isinstance(e.content[0], Str):
## Handle special command shorthand [!commandname]{}
if re.match(r"^![\w]+$", e.content[0].text):
e = InlineCommand(identifier=e.identifier, classes=e.classes, attributes={**e.attributes, "c": e.content[0].text[1:]})
## Handle import [#path/file.md]{}
- # This is the exact opposite of include. We take the commands
- # and flags but drop the content.
+ # This is the exact opposite of partials. We take the commands, flags
+ # and metadata but drop the content.
elif re.match(r"^#.+$", e.content[0].text):
importedDoc = import_md(open(c.dir + "/" + e.content[0].text[1:], "r").read())
importedDoc.walk(transform, c)
return nullify(e)
- ## Handle metadata print [$something.something]{}
+ ## Handle metadata print [$key1.key2]{}
+ # This is a shorthand for just printing the content of some metadata.
elif re.match(r"^\$[\w.]+$", e.content[0].text):
val = c.get_metadata(e.content[0].text[1:], False)
if isinstance(val, MetaInlines):
@@ -107,8 +158,9 @@ def transform(e: Element, c: Context) -> Element: # Returns next sibling element
raise TypeError(f"Cannot print value of metadatum '{e.content[0].text[1:]}' of type '{type(val)}'")
## Execute commands
- # panflute's walk transforms the children first, then the root element, so
- # the content of the element the command receives is already transformed.
+ # panflute's walk function transforms the children first, then the root
+ # element, so the content the command receives is already transformed.
+ # The output from the command is then transformed manually again.
if isinstance(e, Command):
if not c.get_command(e.attributes["c"]):
raise NameError(f"Command not defined '{e.attributes['c']}'.")
diff --git a/util.py b/util.py
index 683c519..aa14151 100644
--- a/util.py
+++ b/util.py
@@ -1,23 +1,25 @@
from panflute import Element, Block, Inline, Null, Str, Doc, convert_text, Para, Plain
import re
+# It sometimes happens that an element contains a single paragraph or even a
+# single plaintext line. It can be sometimes useful to extract this single
+# paragraph, which is inline.
def inlinify(e: Element) -> Element:
if len(e.content) == 1 and (isinstance(e.content[0], Para) or isinstance(e.content[0], Plain)):
return e.content[0].content
-def replaceEl(e: Element, r: Element) -> Element:
- parent = e.parent
- parent.content[e.index] = r
- r.parent = parent
- return r
-def deleteEl(e: Element):
- del e.parent.content[e.index]
-
+# In transform, inline elements cannot be replaced with Block ones and also
+# cannot be removed from the tree entirely, because that would mess up the
+# iteration process through the tree. We replace them with null elements
+# instead which never make it to the output.
def nullify(e: Element):
if isinstance(e, Inline):
return Str("")
elif isinstance(e, Block):
return Null()
+# A helper function to import markdown using panflute (which calls pandoc). If
+# we ever want to disable or enable some of panflute's markdown extensions,
+# this is the place to do it.
def import_md(s: str, standalone: bool=True) -> Doc:
- return convert_text(s, standalone=standalone)
+ return convert_text(s, standalone=standalone, input_format="markdown-definition_lists-citations")
diff --git a/whitespace.py b/whitespace.py
index 28a5336..928f94b 100644
--- a/whitespace.py
+++ b/whitespace.py
@@ -9,11 +9,14 @@ Whitespace = Union[Space,SoftBreak]
class NBSP(Space):
pass
+# This function tries to determine if a space should be non-breaking. It is
+# language-aware and tries to be sort-of smart about its decisions.
def bavlna(e: Whitespace, c: Context) -> bool:
- """Determine if given piece of whitespace should be non-breakable."""
-
-
+
if c.get_metadata("language") == "cs":
+ # Add no-break space after single letter prepositions and conjunctions.
+ # Also tries to find them inside elements, for instance
+ # `V [odevzdávátku]()` should get correctly detected.
prev = e.prev if isinstance(e.prev, Str) else (e.prev.content[-1] if hasattr(e.prev, "content") and len(e.prev.content) != 0 else None)
next = e.next if isinstance(e.next, Str) else (e.next.content[0] if hasattr(e.next, "content") and len(e.next.content) != 0 else None)
if isinstance(prev, Str) and isinstance(next, Str):
@@ -21,6 +24,7 @@ def bavlna(e: Whitespace, c: Context) -> bool:
return True
if isinstance(e.prev, Str) and isinstance(e.next, Str):
+ # Add no-break space between numbers or numbers and operators.
prevC = e.prev.text[-1]
nextC = e.next.text[0]
numbers = ["0123456789"]
@@ -33,6 +37,7 @@ def bavlna(e: Whitespace, c: Context) -> bool:
return True
if isinstance(e.prev, Math) or isinstance(e.next, Math):
+ # Add no-break spaces around TeX math.
return True