From 0a37253b049552d7e642eec22e57e60ed6869c87 Mon Sep 17 00:00:00 2001
From: Gunnar Kreitz <gkreitz@kattis.com>
Date: Mon, 2 Feb 2026 09:00:06 +0100
Subject: [PATCH] Support emojis in sample (with caveats) #378

This is a pretty ugly workaround to get emojis almost working in sample
data when rendering PDF:s. There is one annoying issue remaining - the
unicode font we load isn't the same size monospace as the monospace
font, which leads to emojis being rendered over adjacent characters.
---
 problemtools/template.py                    | 22 +++++++++++++++++++++
 problemtools/templates/latex/problemset.cls | 14 ++++++++-----
 problemtools/templates/latex/template.tex   | 15 ++++++++++++++
 3 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/problemtools/template.py b/problemtools/template.py
index 6b844692..f055a60d 100644
--- a/problemtools/template.py
+++ b/problemtools/template.py
@@ -63,6 +63,8 @@ def __init__(self, problem_root: Path, texfile: Path, language: str, ignore_pare
         else:
             self.samples = []
 
+        self.non_ws_unicode_in_sample = self._non_ws_unicode_in_sample(sample_dir)
+
         problemset_cls_parent = problem_root.parent / 'problemset.cls'
         if not ignore_parent_cls and problemset_cls_parent.is_file():
             print(f'{problemset_cls_parent} exists, using it -- in case of weirdness this is likely culprit')
@@ -84,6 +86,7 @@ def __enter__(self):
                 'statement_directory': self.statement_directory.as_posix(),
                 'statement_filename': self.statement_filename,
                 'language': self.language,
+                'non_ws_unicode_in_sample': self.non_ws_unicode_in_sample,
             }
             for line in templin:
                 try:
@@ -104,3 +107,22 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
     def get_file_name(self) -> Path:
         assert self.texfile and self.texfile.is_file()
         return self.texfile
+
+    # To work around limitations in listings (which we use to render samples), we need to
+    # provide a list of all "unknown" characters to avoid it completely messing up the output.
+    # Hopefully we can replace listings at some point to avoid this.
+    def _non_ws_unicode_in_sample(self, sample_dir: Path) -> str:
+        if not sample_dir.is_dir():
+            return ''
+        res = set()
+        for file in sample_dir.iterdir():
+            if file.is_file() and file.suffix in ['.in', '.ans', '.interaction']:
+                try:
+                    with open(file, 'r', encoding='utf-8') as f:
+                        for line in f:
+                            for char in line:
+                                if not char.isascii() and not char.isspace():
+                                    res.add(char)
+                except (UnicodeDecodeError, IOError):
+                    pass
+        return ''.join(sorted(list(res)))
diff --git a/problemtools/templates/latex/problemset.cls b/problemtools/templates/latex/problemset.cls
index 10325463..94d09c3a 100644
--- a/problemtools/templates/latex/problemset.cls
+++ b/problemtools/templates/latex/problemset.cls
@@ -42,7 +42,7 @@
 \RequirePackage{wrapfig}          % Illustrations
 \RequirePackage{import}           % Proper file inclusion
 \RequirePackage{fancyvrb}         %
-\RequirePackage{listingsutf8}     % For samples
+\RequirePackage{listings}         % For samples
 \RequirePackage[left=1in,right=1in,top=0.75in,bottom=0.75in]{geometry}
 %\RequirePackage{fullpage}        % Set up margins for full page
 \RequirePackage{url}              % Urls
@@ -63,9 +63,13 @@
 % uses colors.
 \RequirePackage{fontspec}
 \RequirePackage{luacode}
+
+\IfFontExistsTF{NotoColorEmoji}{
+    \directlua{luaotfload.add_fallback("myfallback", {"NotoColorEmoji:mode=harf;"})}
+    \setmonofont{lmmono10-regular}[RawFeature={fallback=myfallback}]
+}{}
 \IfFontExistsTF{CMU Serif}{
     \IfFontExistsTF{NotoColorEmoji}{
-        \directlua{luaotfload.add_fallback("myfallback", {"NotoColorEmoji:mode=harf;"})}
         \setmainfont{CMU Serif}[RawFeature={fallback=myfallback}]
     }{
         \setmainfont{CMU Serif}
@@ -341,8 +345,8 @@
 
 \newcommand{\sampletable}[4]{
   % First find widths of the two files
-  \savebox{\PS@sampleinbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily]{#2}}
-  \savebox{\PS@sampleoutbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily]{#4}}
+  \savebox{\PS@sampleinbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily,extendedchars=true]{#2}}
+  \savebox{\PS@sampleoutbox}{\lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily,extendedchars=true]{#4}}
   \settowidth{\PS@sampleoutwidth}{\usebox{\PS@sampleoutbox}}
   \settowidth{\PS@sampleinwidth}{\usebox{\PS@sampleinbox}}
   \setlength{\PS@sampletotwidth}{\PS@sampleinwidth}
@@ -419,7 +423,7 @@
         \hline
         \parbox[t]{0.55\textwidth}{
           \vspace{-0.49cm}
-          \lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily]{\jobname.pstmp}
+          \lstinputlisting[inputencoding=utf8,basicstyle=\ttfamily,extendedchars=true]{\jobname.pstmp}
           \vspace{-0.21cm}
         }\\
         \hline
diff --git a/problemtools/templates/latex/template.tex b/problemtools/templates/latex/template.tex
index 5f7005b1..9e7d5ee2 100644
--- a/problemtools/templates/latex/template.tex
+++ b/problemtools/templates/latex/template.tex
@@ -6,6 +6,21 @@
 \statementdirectory{%(statement_directory)s}
 \statementfilename{%(statement_filename)s}
 
+%% This is to work around listings thinking all unknown characters (like emojis) are whitespace (and for some reason,
+%% it floats unknown whitespace to the previous non-whitespace line). If you see examples of this, you will typically
+%% see a list starting with ^^80-^^ff before adding any characters (as 0x80-0xff are the default extended characters)
+%% Here, we don't do that, as if a character is listed twice, listings also bugs, and we know exactly what extended
+%% characters occur.
+%% It would be very nice to replace listings with something better to avoid having to do this.
+\makeatletter
+\lst@InputCatcodes
+\def\lst@DefEC{%%
+ \lst@CCECUse \lst@ProcessLetter
+  %(non_ws_unicode_in_sample)s%%
+  ^^00}
+\lst@RestoreCatcodes
+\makeatother
+
 \begin{document}
 
 \includeproblem{%(directory)s}