diff --git a/HISTORY.rst b/HISTORY.rst index e4bd155..da9f43c 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,22 @@ Release History --------------- +1.1.3 (2025-12-xx) +++++++++++++++++++ + +**Updates** + +- None + +**Fixes** + +- Fix color parsing failing due to invalid colors, falling back to black. | `dfop02 `_ + +**New Features** + +- None + + 1.1.2 (2025-12-07) ++++++++++++++++++ diff --git a/html4docx/utils.py b/html4docx/utils.py index 1b30805..01b72fb 100644 --- a/html4docx/utils.py +++ b/html4docx/utils.py @@ -188,27 +188,37 @@ def is_color(color: str) -> bool: return is_rgb or is_hex or is_keyword or is_color_name -def parse_color(color: str, return_hex: bool = False): +def parse_color(original_color: str, return_hex: bool = False): """ Parses a color string into a tuple of RGB values. Supports RGB, hex, and color name strings. Returns a tuple of RGB values by default, or a hex string if return_hex is True. """ - color = remove_important_from_style(color.strip().lower()) - - if "rgb" in color: - color = re.sub(r"[^0-9,]", "", color) - colors = [int(x) for x in color.split(",")] - elif color.startswith("#"): - color = color.lstrip("#") - color = ( - "".join([x + x for x in color]) if len(color) == 3 else color - ) # convert short hex to full hex - colors = RGBColor.from_string(color) - elif color in Color.__members__: - colors = Color[color].value - else: - colors = [0, 0, 0] # Default to black for unexpected colors + color = remove_important_from_style(original_color.strip().lower()) + + try: + if "rgba" in color: + color = re.sub(r"[^0-9,]", "", color) + colors = [int(x) for x in color.split(",")] + colors = colors[:3] # remove opacity because it's not supported by python-docx + logging.warning("RGBA color is not supported by python-docx. Opacity will be ignored.") + elif "rgb" in color: + color = re.sub(r"[^0-9,]", "", color) + colors = [int(x) for x in color.split(",")] + if len(colors) > 3: + raise ValueError(f"Invalid RGB color: {original_color}") + elif color.startswith("#"): + color = color.lstrip("#") + color = ("".join([x + x for x in color]) if len(color) == 3 else color) # convert short hex to full hex + colors = RGBColor.from_string(color) + elif color in Color.__members__: + colors = Color[color].value + else: + colors = [0, 0, 0] # Default to black for unexpected colors + logging.warning(f"Could not parse color '{original_color}': Invalid color value. Fallback to black.") + except Exception: + colors = [0, 0, 0] # Default to black for errors + logging.warning(f"Could not parse color '{original_color}': Invalid color value. Fallback to black.") return rgb_to_hex(colors) if return_hex else colors diff --git a/tests/test_h4d.py b/tests/test_h4d.py index cdb472b..8b510a9 100644 --- a/tests/test_h4d.py +++ b/tests/test_h4d.py @@ -2545,6 +2545,33 @@ def test_none_style_map(self): self.assertEqual(len(doc.paragraphs), 1) + def test_invalid_color_fallback_to_black(self): + """Test with invalid color fallback to black""" + self.document.add_heading("Test: Test invalid color fallback to black", level=1) + html = ''' +

Test Unsupported RGBA Color with opacity Fallback to Black

+

Test Invalid RGBA Color with letters Fallback to Black

+

Test Invalid RGB Color with extra value Fallback to Black

+

Test Invalid Color Name Fallback to Black

+

Test Invalid Hex Color with extra value Fallback to Black

+ ''' + + doc = Document() + parser = HtmlToDocx() + parser.add_html_to_document(html, self.document) + with self.assertLogs(level='WARNING') as log: + parser.add_html_to_document(html, doc) + + self.assertEqual(doc.paragraphs[1].runs[0].font.color.rgb, RGBColor(*Color["red"].value)) + for paragraph in doc.paragraphs[2:]: + self.assertEqual(paragraph.runs[0].font.color.rgb, RGBColor(*Color["black"].value)) + + self.assertEqual(len(log.records), 5) + self.assertIn('RGBA color is not supported by python-docx. Opacity will be ignored.', log.output[0]) + self.assertIn('Could not parse color \'rgba(a, b, c, d, e)\': Invalid color value. Fallback to black.', log.output[1]) + self.assertIn('Could not parse color \'rgb(255, 0, 0, 0)\': Invalid color value. Fallback to black.', log.output[2]) + self.assertIn('Could not parse color \'invalidcolorname\': Invalid color value. Fallback to black.', log.output[3]) + self.assertIn('Could not parse color \'#f7272626161\': Invalid color value. Fallback to black.', log.output[4]) if __name__ == "__main__": unittest.main()