From 0a37253b049552d7e642eec22e57e60ed6869c87 Mon Sep 17 00:00:00 2001 From: Gunnar Kreitz Date: Mon, 2 Feb 2026 09:00:06 +0100 Subject: [PATCH] Support emojis in sample (with caveats) #378 This is a pretty ugly workaround to get emojis almost working in sample data when rendering PDF:s. There is one annoying issue remaining - the unicode font we load isn't the same size monospace as the monospace font, which leads to emojis being rendered over adjacent characters. --- problemtools/template.py | 22 +++++++++++++++++++++ problemtools/templates/latex/problemset.cls | 14 ++++++++----- problemtools/templates/latex/template.tex | 15 ++++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/problemtools/template.py b/problemtools/template.py index 6b844692..f055a60d 100644 --- a/problemtools/template.py +++ b/problemtools/template.py @@ -63,6 +63,8 @@ def __init__(self, problem_root: Path, texfile: Path, language: str, ignore_pare else: self.samples = [] + self.non_ws_unicode_in_sample = self._non_ws_unicode_in_sample(sample_dir) + problemset_cls_parent = problem_root.parent / 'problemset.cls' if not ignore_parent_cls and problemset_cls_parent.is_file(): print(f'{problemset_cls_parent} exists, using it -- in case of weirdness this is likely culprit') @@ -84,6 +86,7 @@ def __enter__(self): 'statement_directory': self.statement_directory.as_posix(), 'statement_filename': self.statement_filename, 'language': self.language, + 'non_ws_unicode_in_sample': self.non_ws_unicode_in_sample, } for line in templin: try: @@ -104,3 +107,22 @@ def __exit__(self, exc_type, exc_value, exc_traceback): def get_file_name(self) -> Path: assert self.texfile and self.texfile.is_file() return self.texfile + + # To work around limitations in listings (which we use to render samples), we need to + # provide a list of all "unknown" characters to avoid it completely messing up the output. + # Hopefully we can replace listings at some point to avoid this. + def _non_ws_unicode_in_sample(self, sample_dir: Path) -> str: + if not sample_dir.is_dir(): + return '' + res = set() + for file in sample_dir.iterdir(): + if file.is_file() and file.suffix in ['.in', '.ans', '.interaction']: + try: + with open(file, 'r', encoding='utf-8') as f: + for line in f: + for char in line: + if not char.isascii() and not char.isspace(): + res.add(char) + except (UnicodeDecodeError, IOError): + pass + return ''.join(sorted(list(res))) diff --git a/problemtools/templates/latex/problemset.cls b/problemtools/templates/latex/problemset.cls index 10325463..94d09c3a 100644 --- a/problemtools/templates/latex/problemset.cls +++ b/problemtools/templates/latex/problemset.cls @@ -42,7 +42,7 @@ \RequirePackage{wrapfig} % Illustrations \RequirePackage{import} % Proper file inclusion \RequirePackage{fancyvrb} % -\RequirePackage{listingsutf8} % For samples +\RequirePackage{listings} % For samples \RequirePackage[left=1in,right=1in,top=0.75in,bottom=0.75in]{geometry} %\RequirePackage{fullpage} % Set up margins for full page \RequirePackage{url} % Urls @@ -63,9 +63,13 @@ % uses colors. \RequirePackage{fontspec} \RequirePackage{luacode} + +\IfFontExistsTF{NotoColorEmoji}{ + \directlua{luaotfload.add_fallback("myfallback", {"NotoColorEmoji:mode=harf;"})} + \setmonofont{lmmono10-regular}[RawFeature={fallback=myfallback}] +}{} \IfFontExistsTF{CMU Serif}{ \IfFontExistsTF{NotoColorEmoji}{ - \directlua{luaotfload.add_fallback("myfallback", {"NotoColorEmoji:mode=harf;"})} \setmainfont{CMU Serif}[RawFeature={fallback=myfallback}] }{ \setmainfont{CMU Serif} @@ -341,8 +345,8 @@ \newcommand{\sampletable}[4]{ % First find widths of the two files - \savebox{\PS@sampleinbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily]{#2}} - \savebox{\PS@sampleoutbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily]{#4}} + \savebox{\PS@sampleinbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily,extendedchars=true]{#2}} + \savebox{\PS@sampleoutbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily,extendedchars=true]{#4}} \settowidth{\PS@sampleoutwidth}{\usebox{\PS@sampleoutbox}} \settowidth{\PS@sampleinwidth}{\usebox{\PS@sampleinbox}} \setlength{\PS@sampletotwidth}{\PS@sampleinwidth} @@ -419,7 +423,7 @@ \hline \parbox[t]{0.55\textwidth}{ \vspace{-0.49cm} - \lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily]{\jobname.pstmp} + \lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily,extendedchars=true]{\jobname.pstmp} \vspace{-0.21cm} }\\ \hline diff --git a/problemtools/templates/latex/template.tex b/problemtools/templates/latex/template.tex index 5f7005b1..9e7d5ee2 100644 --- a/problemtools/templates/latex/template.tex +++ b/problemtools/templates/latex/template.tex @@ -6,6 +6,21 @@ \statementdirectory{%(statement_directory)s} \statementfilename{%(statement_filename)s} +%% This is to work around listings thinking all unknown characters (like emojis) are whitespace (and for some reason, +%% it floats unknown whitespace to the previous non-whitespace line). If you see examples of this, you will typically +%% see a list starting with ^^80-^^ff before adding any characters (as 0x80-0xff are the default extended characters) +%% Here, we don't do that, as if a character is listed twice, listings also bugs, and we know exactly what extended +%% characters occur. +%% It would be very nice to replace listings with something better to avoid having to do this. +\makeatletter +\lst@InputCatcodes +\def\lst@DefEC{%% + \lst@CCECUse \lst@ProcessLetter + %(non_ws_unicode_in_sample)s%% + ^^00} +\lst@RestoreCatcodes +\makeatother + \begin{document} \includeproblem{%(directory)s}