Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@
Release History
---------------

1.1.3 (2025-12-xx)
++++++++++++++++++

**Updates**

- None

**Fixes**

- Fix color parsing failing due to invalid colors, falling back to black. | `dfop02 <https://github.com/dfop02>`_

**New Features**

- None


1.1.2 (2025-12-07)
++++++++++++++++++

Expand Down
42 changes: 26 additions & 16 deletions html4docx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,27 +188,37 @@ def is_color(color: str) -> bool:
return is_rgb or is_hex or is_keyword or is_color_name


def parse_color(color: str, return_hex: bool = False):
def parse_color(original_color: str, return_hex: bool = False):
"""
Parses a color string into a tuple of RGB values.
Supports RGB, hex, and color name strings.
Returns a tuple of RGB values by default, or a hex string if return_hex is True.
"""
color = remove_important_from_style(color.strip().lower())

if "rgb" in color:
color = re.sub(r"[^0-9,]", "", color)
colors = [int(x) for x in color.split(",")]
elif color.startswith("#"):
color = color.lstrip("#")
color = (
"".join([x + x for x in color]) if len(color) == 3 else color
) # convert short hex to full hex
colors = RGBColor.from_string(color)
elif color in Color.__members__:
colors = Color[color].value
else:
colors = [0, 0, 0] # Default to black for unexpected colors
color = remove_important_from_style(original_color.strip().lower())

try:
if "rgba" in color:
color = re.sub(r"[^0-9,]", "", color)
colors = [int(x) for x in color.split(",")]
colors = colors[:3] # remove opacity because it's not supported by python-docx
logging.warning("RGBA color is not supported by python-docx. Opacity will be ignored.")
elif "rgb" in color:
color = re.sub(r"[^0-9,]", "", color)
colors = [int(x) for x in color.split(",")]
if len(colors) > 3:
raise ValueError(f"Invalid RGB color: {original_color}")
elif color.startswith("#"):
color = color.lstrip("#")
color = ("".join([x + x for x in color]) if len(color) == 3 else color) # convert short hex to full hex
colors = RGBColor.from_string(color)
elif color in Color.__members__:
colors = Color[color].value
else:
colors = [0, 0, 0] # Default to black for unexpected colors
logging.warning(f"Could not parse color '{original_color}': Invalid color value. Fallback to black.")
except Exception:
colors = [0, 0, 0] # Default to black for errors
logging.warning(f"Could not parse color '{original_color}': Invalid color value. Fallback to black.")

return rgb_to_hex(colors) if return_hex else colors

Expand Down
27 changes: 27 additions & 0 deletions tests/test_h4d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2545,6 +2545,33 @@ def test_none_style_map(self):

self.assertEqual(len(doc.paragraphs), 1)

def test_invalid_color_fallback_to_black(self):
"""Test with invalid color fallback to black"""
self.document.add_heading("Test: Test invalid color fallback to black", level=1)
html = '''
<p style="color: rgba(255, 0, 0, 0.5)">Test Unsupported RGBA Color with opacity Fallback to Black</p>
<p style="color: rgba(A, B, C, D, E)">Test Invalid RGBA Color with letters Fallback to Black</p>
<p style="color: rgb(255, 0, 0, 0)">Test Invalid RGB Color with extra value Fallback to Black</p>
<p style="color: invalidcolorname">Test Invalid Color Name Fallback to Black</p>
<p style="color: #F7272626161">Test Invalid Hex Color with extra value Fallback to Black</p>
'''

doc = Document()
parser = HtmlToDocx()
parser.add_html_to_document(html, self.document)
with self.assertLogs(level='WARNING') as log:
parser.add_html_to_document(html, doc)

self.assertEqual(doc.paragraphs[1].runs[0].font.color.rgb, RGBColor(*Color["red"].value))
for paragraph in doc.paragraphs[2:]:
self.assertEqual(paragraph.runs[0].font.color.rgb, RGBColor(*Color["black"].value))

self.assertEqual(len(log.records), 5)
self.assertIn('RGBA color is not supported by python-docx. Opacity will be ignored.', log.output[0])
self.assertIn('Could not parse color \'rgba(a, b, c, d, e)\': Invalid color value. Fallback to black.', log.output[1])
self.assertIn('Could not parse color \'rgb(255, 0, 0, 0)\': Invalid color value. Fallback to black.', log.output[2])
self.assertIn('Could not parse color \'invalidcolorname\': Invalid color value. Fallback to black.', log.output[3])
self.assertIn('Could not parse color \'#f7272626161\': Invalid color value. Fallback to black.', log.output[4])

if __name__ == "__main__":
unittest.main()