From 17a5169284f7908c6b8e98281155b458bfe5ce6a Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Wed, 1 Jan 2025 19:24:13 +0100
Subject: [PATCH 01/14] basic UI integration

---
 src/develop/blend.h     |   6 ++
 src/develop/blend_gui.c | 128 ++++++++++++++++++++++++++++++++++++++++
 src/dtgtk/paint.c       |  24 ++++++++
 src/dtgtk/paint.h       |   2 +
 4 files changed, 160 insertions(+)

diff --git a/src/develop/blend.h b/src/develop/blend.h
index cc8a690903e2..a9680ff67ba9 100644
--- a/src/develop/blend.h
+++ b/src/develop/blend.h
@@ -97,6 +97,7 @@ typedef enum dt_develop_mask_mode_t
   DEVELOP_MASK_MASK = 1 << 1,                                                        // drawn mask
   DEVELOP_MASK_CONDITIONAL = 1 << 2,                                                 // parametric mask
   DEVELOP_MASK_RASTER = 1 << 3,                                                      // raster mask
+  DEVELOP_MASK_AI = 1 << 4,
   DEVELOP_MASK_MASK_CONDITIONAL = (DEVELOP_MASK_MASK | DEVELOP_MASK_CONDITIONAL)     // drawn & parametric
 } dt_develop_mask_mode_t;
 
@@ -294,6 +295,7 @@ typedef struct dt_iop_gui_blend_data_t
   gboolean masks_support;
   gboolean masks_inited;
   gboolean raster_inited;
+  gboolean ai_masks_inited;
 
   dt_develop_blend_colorspace_t csp;
   dt_iop_module_t *module;
@@ -308,6 +310,7 @@ typedef struct dt_iop_gui_blend_data_t
   GtkBox *blendif_box;
   GtkBox *masks_box;
   GtkBox *raster_box;
+  GtkBox *ai_box;
 
   GtkWidget *selected_mask_mode;
   GtkWidget *colorpicker;
@@ -351,6 +354,9 @@ typedef struct dt_iop_gui_blend_data_t
   GtkWidget *raster_combo;
   GtkWidget *raster_polarity;
 
+  GtkWidget *ai_threshold;
+  GtkWidget *execute_ai;
+
   int control_button_pressed;
   dt_pthread_mutex_t lock;
 } dt_iop_gui_blend_data_t;
diff --git a/src/develop/blend_gui.c b/src/develop/blend_gui.c
index b5cf3202b031..b2f0c8dd1ed9 100644
--- a/src/develop/blend_gui.c
+++ b/src/develop/blend_gui.c
@@ -160,6 +160,8 @@ const dt_introspection_type_enum_tuple_t dt_develop_mask_mode_names[]
           DEVELOP_MASK_CONDITIONAL | DEVELOP_MASK_ENABLED },
         { N_("raster mask"),
           DEVELOP_MASK_RASTER | DEVELOP_MASK_ENABLED },
+        { N_("AI mask"),
+          DEVELOP_MASK_AI | DEVELOP_MASK_ENABLED },
         { N_("drawn & parametric mask"),
           DEVELOP_MASK_MASK_CONDITIONAL | DEVELOP_MASK_ENABLED },
         { } };
@@ -690,6 +692,9 @@ static void _blendop_masks_mode_callback(const dt_develop_mask_mode_t mask_mode,
   _box_set_visible(data->raster_box,
                    data->raster_inited && (mask_mode & DEVELOP_MASK_RASTER));
 
+  _box_set_visible(data->ai_box,
+                   data->ai_masks_inited && (mask_mode & DEVELOP_MASK_AI));
+
   if(data->blendif_inited && (mask_mode & DEVELOP_MASK_CONDITIONAL))
   {
     _box_set_visible(data->blendif_box, TRUE);
@@ -1534,6 +1539,14 @@ static gboolean _blendop_masks_modes_raster_toggled(GtkToggleButton *button,
                                      DEVELOP_MASK_ENABLED | DEVELOP_MASK_RASTER);
 }
 
+static gboolean _blendop_masks_modes_ai_toggled(GtkToggleButton *button,
+                                                    GdkEventButton *event,
+                                                    dt_iop_module_t *module)
+{
+  return _blendop_masks_modes_toggle(button, module,
+                                     DEVELOP_MASK_ENABLED | DEVELOP_MASK_AI);
+}
+
 static gboolean _blendop_blendif_suppress_toggled(GtkToggleButton *togglebutton,
                                                   GdkEventButton *event,
                                                   dt_iop_module_t *module)
@@ -2910,6 +2923,63 @@ static void _raster_combo_populate(GtkWidget *w,
   }
 }
 
+static void _masks_ai_execute(GtkButton *button,
+                                      GdkEventButton *event,
+                                      dt_iop_module_t *module)
+{
+  if(event->button != 1
+     && event->button != 2)
+    return;
+
+  printf("Executing AI\n");
+ 
+  dtgtk_button_set_active(DTGTK_BUTTON(button), FALSE);
+
+}
+
+static void _masks_ai_threshold_update(GtkWidget *slider,
+                                                   dt_iop_gui_blend_data_t *data)
+{
+  if(darktable.gui->reset
+     || !data
+     || !data->ai_masks_inited)
+    return;
+
+  //dt_develop_blend_params_t *bp = data->module->blend_params;
+  //const int tab = data->tab;
+
+  const float value = dt_bauhaus_slider_get(slider);
+  printf("%f\n", value);
+  /*
+  for(int in_out = 1; in_out >= 0; in_out--)
+  {
+    const int ch = data->channel[tab].param_channels[in_out];
+    float off = 0.0f;
+    if(data->csp == DEVELOP_BLEND_CS_LAB
+       && (ch == DEVELOP_BLENDIF_A_in || ch == DEVELOP_BLENDIF_A_out
+        || ch == DEVELOP_BLENDIF_B_in || ch == DEVELOP_BLENDIF_B_out))
+    {
+      off = 0.5f;
+    }
+    const float new_value = value + data->channel[tab].boost_factor_offset;
+    const float old_value = bp->blendif_boost_factors[ch];
+    const float factor = exp2f(old_value) / exp2f(new_value);
+    float *parameters = &(bp->blendif_parameters[4 * ch]);
+    if(parameters[0] > 0.0f) parameters[0] = CLIP((parameters[0] - off) * factor + off);
+    if(parameters[1] > 0.0f) parameters[1] = CLIP((parameters[1] - off) * factor + off);
+    if(parameters[2] < 1.0f) parameters[2] = CLIP((parameters[2] - off) * factor + off);
+    if(parameters[3] < 1.0f) parameters[3] = CLIP((parameters[3] - off) * factor + off);
+    if(parameters[1] == 0.0f && parameters[2] == 1.0f)
+      bp->blendif &= ~(1 << ch);
+    bp->blendif_boost_factors[ch] = new_value;
+  }
+  _blendop_blendif_update_tab(data->module, tab);
+
+  dt_dev_add_history_item(darktable.develop, data->module, TRUE);
+  */
+}
+
+
 static void _raster_value_changed_callback(GtkWidget *widget,
                                            dt_iop_module_t *module)
 {
@@ -3020,6 +3090,48 @@ void dt_iop_gui_init_raster(GtkWidget *blendw, dt_iop_module_t *module)
   }
 }
 
+void dt_iop_gui_init_ai_mask(GtkWidget *blendw, dt_iop_module_t *module)
+{
+  dt_iop_gui_blend_data_t *bd = module->blend_data;
+
+  bd->ai_box = GTK_BOX(gtk_box_new(GTK_ORIENTATION_VERTICAL, 0));
+  _add_wrapped_box(blendw, bd->ai_box, "mask_ai");
+
+  /* create and add raster support if module supports it (it's coupled
+   * to masks at the moment) */
+  if(bd->masks_support)
+  {
+    GtkWidget *hbox2 = gtk_box_new(GTK_ORIENTATION_HORIZONTAL, 0);
+    gtk_box_pack_start(GTK_BOX(hbox2), dt_ui_label_new(_("AI Mask")), TRUE, TRUE, 0);
+    dt_gui_add_class(hbox2, "dt_section_label");
+
+    GtkWidget *box = gtk_box_new(GTK_ORIENTATION_VERTICAL, 0);
+
+    bd->execute_ai = dt_iop_button_new(module, N_("Generate mask"),
+                                      G_CALLBACK(_masks_ai_execute), FALSE, 0, 0,
+                                      NULL, 0, box);
+
+    bd->ai_threshold =
+      dt_bauhaus_slider_new_with_range(module, 0.0f, 100.0f, 0, 0.0f, 1);
+    dt_bauhaus_slider_set_format(bd->ai_threshold, _(" %"));
+    dt_bauhaus_widget_set_label(bd->ai_threshold,
+                                N_("blend"), N_("threshold"));
+    // dt_bauhaus_slider_set_soft_range(bd->ai_threshold, 0.0, 3.0);
+    gtk_widget_set_tooltip_text(bd->ai_threshold,
+                                _("adjust threshold of the mask"));
+    gtk_widget_set_sensitive(bd->ai_threshold, TRUE);
+
+    g_signal_connect(G_OBJECT(bd->ai_threshold), "value-changed",
+                     G_CALLBACK(_masks_ai_threshold_update), bd);
+
+    gtk_box_pack_start(GTK_BOX(box), GTK_WIDGET(bd->ai_threshold), TRUE, FALSE, 0);
+
+    gtk_box_pack_start(GTK_BOX(bd->ai_box), GTK_WIDGET(hbox2), TRUE, TRUE, 0);
+    gtk_box_pack_start(GTK_BOX(bd->ai_box), GTK_WIDGET(box), TRUE, TRUE, 0);
+
+    bd->ai_masks_inited = TRUE;
+  }
+}
 void dt_iop_gui_cleanup_blending(dt_iop_module_t *module)
 {
   if(!module->blend_data) return;
@@ -3342,6 +3454,7 @@ void dt_iop_gui_update_blending(dt_iop_module_t *module)
   }
 
   _box_set_visible(bd->raster_box, bd->raster_inited && (mask_mode & DEVELOP_MASK_RASTER));
+  _box_set_visible(bd->ai_box, bd->ai_masks_inited && (mask_mode & DEVELOP_MASK_AI));
 
   if(bd->blendif_inited && (mask_mode & DEVELOP_MASK_CONDITIONAL))
   {
@@ -3528,6 +3641,19 @@ void dt_iop_gui_init_blending(GtkWidget *iopw,
       bd->masks_modes_toggles = g_list_append(bd->masks_modes_toggles, GTK_WIDGET(but));
     }
 
+    if (bd->masks_support){
+      but = dt_iop_togglebutton_new(module, "blend`masks",
+                                    N_("AI mask"), NULL,
+                                    G_CALLBACK(_blendop_masks_modes_ai_toggled),
+                                    FALSE, 0, 0,
+                                    dtgtk_cairo_paint_masks_ai, NULL);
+      bd->masks_modes
+          = g_list_append(bd->masks_modes,
+                          GUINT_TO_POINTER(DEVELOP_MASK_ENABLED | DEVELOP_MASK_AI));
+      bd->masks_modes_toggles = g_list_append(bd->masks_modes_toggles, GTK_WIDGET(but));
+
+    }
+
     GtkWidget *presets_button = dtgtk_button_new(dtgtk_cairo_paint_presets, 0, NULL);
     gtk_widget_set_tooltip_text(presets_button, _("blending options"));
     if(bd->blendif_support)
@@ -3732,6 +3858,7 @@ void dt_iop_gui_init_blending(GtkWidget *iopw,
     dt_iop_gui_init_masks(iopw, module);
     dt_iop_gui_init_raster(iopw, module);
     dt_iop_gui_init_blendif(iopw, module);
+    dt_iop_gui_init_ai_mask(iopw, module);
 
     bd->bottom_box = GTK_BOX(gtk_box_new(GTK_ORIENTATION_VERTICAL, 0));
     gtk_box_pack_start(GTK_BOX(bd->bottom_box),
@@ -3755,6 +3882,7 @@ void dt_iop_gui_init_blending(GtkWidget *iopw,
     gtk_widget_set_name(GTK_WIDGET(bd->top_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->masks_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->raster_box), "blending-box");
+    gtk_widget_set_name(GTK_WIDGET(bd->ai_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->blendif_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->bottom_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(iopw), "blending-wrapper");
diff --git a/src/dtgtk/paint.c b/src/dtgtk/paint.c
index f500fdcb3091..395c22fb1b47 100644
--- a/src/dtgtk/paint.c
+++ b/src/dtgtk/paint.c
@@ -846,6 +846,30 @@ void dtgtk_cairo_paint_masks_raster(cairo_t *cr, gint x, gint y, gint w, gint h,
   FINISH
 }
 
+void dtgtk_cairo_paint_masks_ai(cairo_t *cr, gint x, gint y, gint w, gint h, gint flags, void *data)
+{
+  PREAMBLE(0.8, 2, 0, 0)
+  
+  // A Letter
+  cairo_move_to(cr, 0.0, 1.0);
+  cairo_line_to(cr, 0.5, 0.0);
+  cairo_line_to(cr, 1.0, 1.0);
+  cairo_stroke(cr);
+
+  cairo_move_to(cr, 0.25, 0.5);
+  cairo_line_to(cr, 0.75, 0.5);
+  cairo_stroke(cr);
+
+  // I Letter
+
+  cairo_move_to(cr, 1.0, 0.0);
+  cairo_line_to(cr, 1.0, 1.0);
+  cairo_stroke(cr);
+
+
+  FINISH
+}
+
 void dtgtk_cairo_paint_masks_multi(cairo_t *cr, gint x, gint y, gint w, gint h, gint flags, void *data)
 {
   PREAMBLE(1, 1, 0, 0)
diff --git a/src/dtgtk/paint.h b/src/dtgtk/paint.h
index ee918b22d261..7be9bf24151f 100644
--- a/src/dtgtk/paint.h
+++ b/src/dtgtk/paint.h
@@ -279,6 +279,8 @@ void dtgtk_cairo_paint_masks_parametric(cairo_t *cr, gint x, gint y, gint w, gin
 void dtgtk_cairo_paint_masks_drawn_and_parametric(cairo_t *cr, gint x, gint y, gint w, gint h, gint flags, void *data);
 /** paint a raster mask icon */
 void dtgtk_cairo_paint_masks_raster(cairo_t *cr, gint x, gint y, gint w, gint h, gint flags, void *data);
+/** paint a AI mask icon */
+void dtgtk_cairo_paint_masks_ai(cairo_t *cr, gint x, gint y, gint w, gint h, gint flags, void *data);
 /** paint a mask brush icon */
 void dtgtk_cairo_paint_masks_brush(cairo_t *cr, gint x, gint y, gint w, gint h, gint flags, void *data);
 /** Paint a vertical gradient icon for masks selection */

From 6f5608f690bae97ffaec77ef275d979ddc2501a0 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Wed, 1 Jan 2025 21:50:06 +0100
Subject: [PATCH 02/14] new mask as source of coordinates, created new brush

---
 po/POTFILES.in            |    1 +
 src/CMakeLists.txt        |    1 +
 src/develop/blend.h       |    1 +
 src/develop/blend_gui.c   |   40 +-
 src/develop/masks.h       |    4 +-
 src/develop/masks/masks.c |   14 +-
 src/develop/masks/point.c | 1534 +++++++++++++++++++++++++++++++++++++
 7 files changed, 1591 insertions(+), 4 deletions(-)
 create mode 100644 src/develop/masks/point.c

diff --git a/po/POTFILES.in b/po/POTFILES.in
index 2ee8977ae638..e53b1a5491ca 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -151,6 +151,7 @@ src/develop/imageop_gui.c
 src/develop/lightroom.c
 src/develop/masks/brush.c
 src/develop/masks/circle.c
+src/develop/masks/point.c
 src/develop/masks/ellipse.c
 src/develop/masks/gradient.c
 src/develop/masks/group.c
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6c4e4e4b1b23..0455a1e1993d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -111,6 +111,7 @@ FILE(GLOB SOURCE_FILES
   "develop/lightroom.c"
   "develop/masks/brush.c"
   "develop/masks/circle.c"
+  "develop/masks/point.c"
   "develop/masks/ellipse.c"
   "develop/masks/gradient.c"
   "develop/masks/group.c"
diff --git a/src/develop/blend.h b/src/develop/blend.h
index a9680ff67ba9..872c97514154 100644
--- a/src/develop/blend.h
+++ b/src/develop/blend.h
@@ -356,6 +356,7 @@ typedef struct dt_iop_gui_blend_data_t
 
   GtkWidget *ai_threshold;
   GtkWidget *execute_ai;
+  GtkWidget *ai_cursor_add;
 
   int control_button_pressed;
   dt_pthread_mutex_t lock;
diff --git a/src/develop/blend_gui.c b/src/develop/blend_gui.c
index b2f0c8dd1ed9..578c076ed63e 100644
--- a/src/develop/blend_gui.c
+++ b/src/develop/blend_gui.c
@@ -1669,6 +1669,34 @@ static gboolean _blendop_masks_add_shape(GtkWidget *widget,
   return TRUE;
 }
 
+
+static gboolean _blendop_masks_add_cursor(GtkWidget *widget,
+                                         GdkEventButton *event,
+                                         dt_iop_module_t *self)
+{
+  if(darktable.gui->reset
+     || event->button != GDK_BUTTON_PRIMARY)
+    return TRUE;
+
+  dt_iop_gui_blend_data_t *bd = self->blend_data;
+
+
+  // _blendop_masks_modes_toggle(NULL, self, DEVELOP_MASK_MASK);
+
+  // we want to be sure that the iop has focus
+  dt_iop_request_focus(self);
+  dt_iop_color_picker_reset(self, FALSE);
+  bd->masks_shown = DT_MASKS_EDIT_FULL;
+  // we create the new form
+  dt_masks_form_t *form = dt_masks_create(DT_MASKS_POINT);
+  dt_masks_change_form_gui(form);
+  darktable.develop->form_gui->creation_module = self;
+
+  dt_control_queue_redraw_center();
+
+  return TRUE;
+}
+
 static gboolean _blendop_masks_show_and_edit(GtkWidget *widget,
                                              GdkEventButton *event,
                                              dt_iop_module_t *self)
@@ -3111,8 +3139,7 @@ void dt_iop_gui_init_ai_mask(GtkWidget *blendw, dt_iop_module_t *module)
                                       G_CALLBACK(_masks_ai_execute), FALSE, 0, 0,
                                       NULL, 0, box);
 
-    bd->ai_threshold =
-      dt_bauhaus_slider_new_with_range(module, 0.0f, 100.0f, 0, 0.0f, 1);
+    bd->ai_threshold = dt_bauhaus_slider_new_with_range(module, 0.0f, 100.0f, 0, 0.0f, 1);
     dt_bauhaus_slider_set_format(bd->ai_threshold, _(" %"));
     dt_bauhaus_widget_set_label(bd->ai_threshold,
                                 N_("blend"), N_("threshold"));
@@ -3126,6 +3153,15 @@ void dt_iop_gui_init_ai_mask(GtkWidget *blendw, dt_iop_module_t *module)
 
     gtk_box_pack_start(GTK_BOX(box), GTK_WIDGET(bd->ai_threshold), TRUE, FALSE, 0);
 
+
+
+    bd->ai_cursor_add = dt_iop_togglebutton_new(module, "blend`shapes",
+                                                  N_("add cursor"),
+                                                  NULL,
+                                                  G_CALLBACK(_blendop_masks_add_cursor),// G_CALLBACK(_blendop_masks_add_shape),
+                                                  FALSE, 0, 0,
+                                                  dtgtk_cairo_paint_masks_circle, box);
+
     gtk_box_pack_start(GTK_BOX(bd->ai_box), GTK_WIDGET(hbox2), TRUE, TRUE, 0);
     gtk_box_pack_start(GTK_BOX(bd->ai_box), GTK_WIDGET(box), TRUE, TRUE, 0);
 
diff --git a/src/develop/masks.h b/src/develop/masks.h
index 4ca3de551772..fda1f7fb4ab3 100644
--- a/src/develop/masks.h
+++ b/src/develop/masks.h
@@ -44,7 +44,8 @@ typedef enum dt_masks_type_t
   DT_MASKS_GRADIENT = 1 << 4,
   DT_MASKS_ELLIPSE = 1 << 5,
   DT_MASKS_BRUSH = 1 << 6,
-  DT_MASKS_NON_CLONE = 1 << 7
+  DT_MASKS_NON_CLONE = 1 << 7,
+  DT_MASKS_POINT = 1 << 8,
 } dt_masks_type_t;
 
 /**masts states */
@@ -441,6 +442,7 @@ extern const dt_masks_functions_t dt_masks_functions_brush;
 extern const dt_masks_functions_t dt_masks_functions_path;
 extern const dt_masks_functions_t dt_masks_functions_gradient;
 extern const dt_masks_functions_t dt_masks_functions_group;
+extern const dt_masks_functions_t dt_masks_functions_point;
 
 /** init dt_masks_form_gui_t struct with default values */
 void dt_masks_init_form_gui(dt_masks_form_gui_t *gui);
diff --git a/src/develop/masks/masks.c b/src/develop/masks/masks.c
index 7e7db388a0fd..cce4186dfbd1 100644
--- a/src/develop/masks/masks.c
+++ b/src/develop/masks/masks.c
@@ -854,6 +854,8 @@ dt_masks_form_t *dt_masks_create(dt_masks_type_t type)
     form->functions = &dt_masks_functions_gradient;
   else if(type & DT_MASKS_GROUP)
     form->functions = &dt_masks_functions_group;
+  else if(type & DT_MASKS_POINT)
+    form->functions = &dt_masks_functions_point;
 
   if(form->functions && form->functions->sanitize_config)
     form->functions->sanitize_config(type);
@@ -1263,7 +1265,8 @@ void dt_masks_events_post_expose(dt_iop_module_t *module,
   // add preview when creating a circle, ellipse and gradient
   if(!(((form->type & DT_MASKS_CIRCLE)
         || (form->type & DT_MASKS_ELLIPSE)
-        || (form->type & DT_MASKS_GRADIENT))
+        || (form->type & DT_MASKS_GRADIENT)
+        || (form->type & DT_MASKS_POINT))
        && gui->creation))
     dt_masks_gui_form_test_create(form, gui, module);
 
@@ -1711,6 +1714,9 @@ void dt_masks_iop_value_changed_callback(GtkWidget *widget,
       // add a brush shape
       _menu_add_shape(module, DT_MASKS_BRUSH);
     }
+    else if(val == -2000128) {
+      _menu_add_shape(module, DT_MASKS_POINT);
+    }
     else if(val < 0)
     {
       // use same shapes as another iop
@@ -2561,6 +2567,12 @@ void dt_masks_calculate_source_pos_value(dt_masks_form_gui_t *gui,
         x += xpos;
         y += ypos;
       }
+      else if(mask_type & DT_MASKS_POINT)
+      {
+        dt_masks_functions_point.initial_source_pos(iwidth, iheight, &x, &y);
+        x += xpos;
+        y += ypos;
+      }
 #endif
       else
         dt_print(DT_DEBUG_ALWAYS, "[dt_masks_calculate_source_pos_value]"
diff --git a/src/develop/masks/point.c b/src/develop/masks/point.c
new file mode 100644
index 000000000000..bcb1b39d9a8b
--- /dev/null
+++ b/src/develop/masks/point.c
@@ -0,0 +1,1534 @@
+/*
+    This file is part of darktable,
+    Copyright (C) 2013-2024 darktable developers.
+
+    darktable is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    darktable is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "bauhaus/bauhaus.h"
+#include "common/debug.h"
+#include "common/undo.h"
+#include "control/conf.h"
+#include "control/control.h"
+#include "develop/blend.h"
+#include "develop/imageop.h"
+#include "develop/masks.h"
+#include "develop/openmp_maths.h"
+
+#define MIN_POINT_RADIUS 0.0005f
+#define MIN_POINT_BORDER 0.0005f
+
+static inline int _nb_ctrl_point(void)
+{
+  return 2;
+}
+
+static void _point_get_distance(const float x,
+                                 const float y,
+                                 const float as,
+                                 dt_masks_form_gui_t *gui,
+                                 const int index,
+                                 const int num_points,
+                                 gboolean *inside,
+                                 gboolean *inside_border,
+                                 int *near,
+                                 gboolean *inside_source,
+                                 float *dist)
+{
+  (void)num_points; // unused arg, keep compiler from complaining
+  // initialise returned values
+  *inside_source = FALSE;
+  *inside = FALSE;
+  *inside_border = FALSE;
+  *near = -1;
+  *dist = FLT_MAX;
+
+  if(!gui) return;
+
+  dt_masks_form_gui_points_t *gpt = g_list_nth_data(gui->points, index);
+  if(!gpt) return;
+
+  // we first check if we are inside the source form
+  if(dt_masks_point_in_form_exact(x, y, gpt->source, 1, gpt->source_count))
+  {
+    *inside_source = TRUE;
+    *inside = TRUE;
+
+    // distance from source center
+    const float cx = x - gpt->source[0];
+    const float cy = y - gpt->source[1];
+    *dist = sqf(cx) + sqf(cy);
+
+    return;
+  }
+
+  // distance from center
+
+  const float cx = x - gpt->points[0];
+  const float cy = y - gpt->points[1];
+  *dist = sqf(cx) + sqf(cy);
+
+  // compute distances from resize point
+
+  const float dx = x - gpt->points[2];
+  const float dy = y - gpt->points[3];
+  const float dd = sqf(dx) + sqf(dy);
+  *dist = fminf(*dist, dd);
+
+  // compute distances from feather point
+
+  const float bx = x - gpt->border[2];
+  const float by = y - gpt->border[3];
+  const float bd = sqf(bx) + sqf(by);
+  *dist = fminf(*dist, bd);
+
+  // we check if it's inside borders
+  if(!dt_masks_point_in_form_near(x, y, gpt->border, 1, gpt->border_count, as, near))
+  {
+    if(*near != -1)
+      *inside_border = TRUE;
+    else
+      return;
+  }
+  else
+    *inside_border= TRUE;
+
+  *inside = TRUE;
+}
+
+static int _point_events_mouse_scrolled(dt_iop_module_t *module,
+                                         const float pzx,
+                                         const float pzy,
+                                         const int up,
+                                         const uint32_t state,
+                                         dt_masks_form_t *form,
+                                         const dt_mask_id_t parentid,
+                                         dt_masks_form_gui_t *gui,
+                                         const int index)
+{
+  const float max_mask_border =
+    form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
+  const float max_mask_size =
+    form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
+
+  // add a preview when creating a point
+  if(gui->creation)
+  {
+    if(dt_modifier_is(state, GDK_SHIFT_MASK))
+    {
+      const float masks_border = dt_masks_change_size
+        (up,
+         dt_conf_get_float(DT_MASKS_CONF(form->type, point, border)),
+         MIN_POINT_BORDER, max_mask_border);
+
+      dt_conf_set_float(DT_MASKS_CONF(form->type, point, border), masks_border);
+      dt_toast_log(_("feather size: %3.2f%%"), masks_border*100.0f);
+    }
+    else if(dt_modifier_is(state, 0))
+    {
+      const float masks_size = dt_masks_change_size
+        (up,
+         dt_conf_get_float(DT_MASKS_CONF(form->type, point, size)),
+         MIN_POINT_RADIUS,
+         max_mask_size);
+
+      dt_conf_set_float(DT_MASKS_CONF(form->type, point, size), masks_size);
+      dt_toast_log(_("size: %3.2f%%"), masks_size*100.0f);
+    }
+    dt_dev_masks_list_change(darktable.develop);
+    return 1;
+  }
+
+  if(gui->form_selected)
+  {
+    // we register the current position
+    if(gui->scrollx == 0.0f && gui->scrolly == 0.0f)
+    {
+      gui->scrollx = pzx;
+      gui->scrolly = pzy;
+    }
+    if(dt_modifier_is(state, GDK_CONTROL_MASK))
+    {
+      // we try to change the opacity
+      dt_masks_form_change_opacity(form, parentid, up ? 0.05f : -0.05f);
+    }
+    else
+    {
+      dt_masks_point_circle_t *point = form->points->data; // FIXME
+      // resize don't care where the mouse is inside a shape
+      if(dt_modifier_is(state, GDK_SHIFT_MASK))
+      {
+        point->border = dt_masks_change_size
+          (up,
+           point->border,
+           MIN_POINT_BORDER, max_mask_border);
+
+        dt_dev_add_masks_history_item(darktable.develop, module, TRUE);
+        dt_masks_gui_form_create(form, gui, index, module);
+        dt_conf_set_float(DT_MASKS_CONF(form->type, point, border), point->border);
+        dt_toast_log(_("feather size: %3.2f%%"), point->border*100.0f);
+      }
+      else if(gui->edit_mode == DT_MASKS_EDIT_FULL)
+      {
+        point->radius = dt_masks_change_size
+          (up,
+           point->radius,
+           MIN_POINT_BORDER, max_mask_border);
+
+        dt_dev_add_masks_history_item(darktable.develop, module, TRUE);
+        dt_masks_gui_form_create(form, gui, index, module);
+        dt_conf_set_float(DT_MASKS_CONF(form->type, point, size), point->radius);
+        dt_toast_log(_("size: %3.2f%%"), point->radius*100.0f);
+      }
+      else
+      {
+        return 0;
+      }
+    }
+    return 1;
+  }
+  return 0;
+}
+
+static int _point_events_button_pressed(dt_iop_module_t *module,
+                                         float pzx, float pzy,
+                                         const double pressure,
+                                         const int which,
+                                         const int type,
+                                         const uint32_t state,
+                                         dt_masks_form_t *form,
+                                         const dt_mask_id_t parentid,
+                                         dt_masks_form_gui_t *gui,
+                                         const int index)
+{
+  if(!gui) return 0;
+
+  float wd, ht, iwidth, iheight;
+  dt_masks_get_image_size(&wd, &ht, &iwidth, &iheight);
+
+  if(!gui->creation)
+  {
+    dt_masks_form_gui_points_t *gpt = g_list_nth_data(gui->points, index);
+    if(!gpt) return 0;
+
+    if(gui->edit_mode == DT_MASKS_EDIT_FULL)
+    {
+      if(gui->source_selected)
+      {
+        // we start the form dragging
+        gui->source_dragging = TRUE;
+        gui->dx = gpt->source[0] - gui->posx;
+        gui->dy = gpt->source[1] - gui->posy;
+        return 1;
+      }
+
+      gui->dx = gpt->points[0] - gui->posx;
+      gui->dy = gpt->points[1] - gui->posy;
+
+      if(gui->point_selected >= 1)
+      {
+        gui->point_dragging = gui->point_selected;
+        return 1;
+      }
+      else if(gui->point_border_selected >= 1)
+      {
+        gui->point_border_dragging = gui->point_border_selected;
+        return 1;
+      }
+      else if(gui->form_selected)
+      {
+        gui->form_dragging = TRUE;
+        return 1;
+      }
+    }
+  }
+  else if(which == 3)
+  {
+    gui->creation_continuous = FALSE;
+    gui->creation_continuous_module = NULL;
+    dt_masks_set_edit_mode(module, DT_MASKS_EDIT_FULL);
+    dt_masks_iop_update(module);
+    dt_control_queue_redraw_center();
+    return 1;
+  }
+  else if(which == 1
+          && ((dt_modifier_is(state, GDK_CONTROL_MASK | GDK_SHIFT_MASK))
+              || dt_modifier_is(state, GDK_SHIFT_MASK)))
+  {
+    // set some absolute or relative position for the source of the clone mask
+    if(form->type & DT_MASKS_CLONE)
+      dt_masks_set_source_pos_initial_state(gui, state, pzx, pzy);
+
+    return 1;
+  }
+  else
+  {
+    // we create the point
+    dt_masks_point_circle_t *point = malloc(sizeof(dt_masks_point_circle_t)); // FIXME
+
+    // we change the center value
+    float pts[2] = { pzx * wd, pzy * ht };
+    dt_dev_distort_backtransform(darktable.develop, pts, 1);
+    point->center[0] = pts[0] / iwidth;
+    point->center[1] = pts[1] / iheight;
+
+    // calculate the source position
+    if(form->type & DT_MASKS_CLONE)
+    {
+      dt_masks_set_source_pos_initial_value(gui, DT_MASKS_POINT, form, pzx, pzy);
+    }
+    else
+    {
+      // not used by regular masks
+      form->source[0] = form->source[1] = 0.0f;
+    }
+    point->radius = dt_conf_get_float(DT_MASKS_CONF(form->type, point, size));
+    point->border = dt_conf_get_float(DT_MASKS_CONF(form->type, point, border));
+    form->points = g_list_append(form->points, point);
+
+    dt_iop_module_t *crea_module = gui->creation_module;
+    dt_masks_gui_form_save_creation(darktable.develop, crea_module, form, gui);
+
+    if(crea_module)
+    {
+      // we save the move
+      dt_dev_add_history_item(darktable.develop, crea_module, TRUE);
+      // and we switch in edit mode to show all the forms
+      // spots and retouch have their own handling of creation_continuous
+      if(gui->creation_continuous
+         && (dt_iop_module_is(crea_module->so, "spots")
+             || dt_iop_module_is(crea_module->so, "retouch")))
+        dt_masks_set_edit_mode_single_form(crea_module, form->formid, DT_MASKS_EDIT_FULL);
+      else if(!gui->creation_continuous)
+        dt_masks_set_edit_mode(crea_module, DT_MASKS_EDIT_FULL);
+      dt_masks_iop_update(crea_module);
+    }
+
+    dt_dev_masks_selection_change(darktable.develop, crea_module, form->formid);
+    gui->creation_module = NULL;
+
+    // if we draw a clone point, we start now the source dragging
+    if(form->type & (DT_MASKS_CLONE|DT_MASKS_NON_CLONE))
+    {
+      dt_masks_form_t *grp = darktable.develop->form_visible;
+      if(!grp || !(grp->type & DT_MASKS_GROUP)) return 1;
+      int pos3 = 0, pos2 = -1;
+      for(GList *fs = grp->points; fs; fs = g_list_next(fs))
+      {
+        dt_masks_point_group_t *pt = fs->data;
+        if(pt->formid == form->formid)
+        {
+          pos2 = pos3;
+          break;
+        }
+        pos3++;
+      }
+      if(pos2 < 0) return 1;
+      dt_masks_form_gui_t *gui2 = darktable.develop->form_gui;
+      if(!gui2) return 1;
+      if(form->type & DT_MASKS_CLONE)
+        gui2->source_dragging = TRUE;
+      else
+        gui2->form_dragging = TRUE;
+      gui2->group_edited = gui2->group_selected = pos2;
+      gui2->posx = pzx * wd;
+      gui2->posy = pzy * ht;
+      gui2->dx = 0.0;
+      gui2->dy = 0.0;
+      gui2->scrollx = pzx;
+      gui2->scrolly = pzy;
+      gui2->form_selected = TRUE; // we also want to be selected after button released
+
+      dt_masks_select_form(module, dt_masks_get_from_id(darktable.develop, form->formid));
+    }
+    //spot and retouch manage creation_continuous in their own way
+    if(gui->creation_continuous
+       && (!crea_module
+           || (!dt_iop_module_is(crea_module->so, "spots")
+               && !dt_iop_module_is(crea_module->so, "retouch"))))
+    {
+      if(crea_module)
+      {
+        dt_iop_gui_blend_data_t *bd = crea_module->blend_data;
+        for(int n = 0; n < DEVELOP_MASKS_NB_SHAPES; n++)
+          if(bd->masks_type[n] == form->type)
+            gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bd->masks_shapes[n]), TRUE);
+
+        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bd->masks_edit), FALSE);
+        dt_masks_form_t *newform = dt_masks_create(form->type);
+        dt_masks_change_form_gui(newform);
+        darktable.develop->form_gui->creation_module = crea_module;
+        darktable.develop->form_gui->creation_continuous = TRUE;
+        darktable.develop->form_gui->creation_continuous_module = crea_module;
+      }
+      else
+      {
+        dt_masks_form_t *form_new = dt_masks_create(form->type);
+        dt_masks_change_form_gui(form_new);
+        darktable.develop->form_gui->creation_module = gui->creation_continuous_module;
+      }
+    }
+
+    return 1;
+  }
+  return 0;
+}
+
+static int _point_events_button_released(dt_iop_module_t *module,
+                                          const float pzx,
+                                          const float pzy,
+                                          const int which,
+                                          const uint32_t state,
+                                          dt_masks_form_t *form,
+                                          const dt_mask_id_t parentid,
+                                          dt_masks_form_gui_t *gui,
+                                          const int index)
+{
+  float wd, ht, iwidth, iheight;
+  dt_masks_get_image_size(&wd, &ht, &iwidth, &iheight);
+
+  if(which == 3
+     && dt_is_valid_maskid(parentid)
+     && gui->edit_mode == DT_MASKS_EDIT_FULL)
+  {
+    // we hide the form
+    if(!(darktable.develop->form_visible->type & DT_MASKS_GROUP))
+      dt_masks_change_form_gui(NULL);
+    else if(g_list_shorter_than(darktable.develop->form_visible->points, 2))
+      dt_masks_change_form_gui(NULL);
+    else
+    {
+      dt_masks_clear_form_gui(darktable.develop);
+      for(GList *forms = darktable.develop->form_visible->points;
+          forms;
+          forms = g_list_next(forms))
+      {
+        dt_masks_point_group_t *gpt = forms->data;
+        if(gpt->formid == form->formid)
+        {
+          darktable.develop->form_visible->points
+              = g_list_remove(darktable.develop->form_visible->points, gpt);
+          free(gpt);
+          break;
+        }
+      }
+      gui->edit_mode = DT_MASKS_EDIT_FULL;
+    }
+
+    // we remove the shape
+    dt_masks_form_remove(module, dt_masks_get_from_id(darktable.develop, parentid), form);
+    return 1;
+  }
+  if(gui->form_dragging)
+  {
+    // we get the point
+    dt_masks_point_circle_t *point = form->points->data;
+
+    // we end the form dragging
+    gui->form_dragging = FALSE;
+
+    // we change the center value
+    float pts[2] = { pzx * wd + gui->dx, pzy * ht + gui->dy };
+    dt_dev_distort_backtransform(darktable.develop, pts, 1);
+    point->center[0] = pts[0] / iwidth;
+    point->center[1] = pts[1] / iheight;
+    dt_dev_add_masks_history_item(darktable.develop, module, TRUE);
+
+    // we recreate the form points
+    dt_masks_gui_form_create(form, gui, index, module);
+
+    if(gui->creation_continuous)
+    {
+      dt_masks_form_t *form_new = dt_masks_create(form->type);
+      dt_masks_change_form_gui(form_new);
+      darktable.develop->form_gui->creation_module = gui->creation_continuous_module;
+    }
+    return 1;
+  }
+  else if(gui->source_dragging)
+  {
+    // we end the form dragging
+    gui->source_dragging = FALSE;
+
+    if(gui->scrollx != 0.0 || gui->scrolly != 0.0)
+    {
+      // if there's no dragging the source is calculated in
+      // _point_events_button_pressed()
+    }
+    else
+    {
+      // we change the center value
+      float pts[2] = { pzx * wd + gui->dx, pzy * ht + gui->dy };
+
+      dt_dev_distort_backtransform(darktable.develop, pts, 1);
+
+      form->source[0] = pts[0] / iwidth;
+      form->source[1] = pts[1] / iheight;
+    }
+    dt_dev_add_masks_history_item(darktable.develop, module, TRUE);
+
+    // we recreate the form points
+    dt_masks_gui_form_create(form, gui, index, module);
+
+    if(gui->creation_continuous)
+    {
+      dt_masks_form_t *form_new = dt_masks_create(form->type);
+      dt_masks_change_form_gui(form_new);
+      darktable.develop->form_gui->creation_module = gui->creation_continuous_module;
+    }
+
+    // and select the source as default, if the mouse is not moved we are inside the
+    // source and so want to move the source.
+    gui->form_selected = TRUE;
+    gui->source_selected = TRUE;
+    gui->border_selected = FALSE;
+
+    return 1;
+  }
+  else if(gui->point_dragging >= 1 || gui->point_border_dragging >= 1)
+  {
+    // we end the point dragging
+    gui->point_dragging = gui->point_border_dragging = -1;
+
+    dt_dev_add_masks_history_item(darktable.develop, module, TRUE);
+  }
+
+  return 0;
+}
+
+static int _point_events_mouse_moved(dt_iop_module_t *module,
+                                      const float pzx,
+                                      const float pzy,
+                                      const double pressure,
+                                      const int which,
+                                      const float zoom_scale,
+                                      dt_masks_form_t *form,
+                                      const dt_mask_id_t parentid,
+                                      dt_masks_form_gui_t *gui,
+                                      const int index)
+{
+  float wd, ht, iwidth, iheight;
+  dt_masks_get_image_size(&wd, &ht, &iwidth, &iheight);
+
+  if(gui->form_dragging || gui->source_dragging)
+  {
+    float pts[2] = { pzx * wd + gui->dx, pzy * ht + gui->dy };
+    dt_dev_distort_backtransform(darktable.develop, pts, 1);
+
+    if(gui->form_dragging)
+    {
+      dt_masks_point_circle_t *point = form->points->data;
+      point->center[0] = pts[0] / iwidth;
+      point->center[1] = pts[1] / iheight;
+    }
+    else
+    {
+      form->source[0] = pts[0] / iwidth;
+      form->source[1] = pts[1] / iheight;
+    }
+
+    // we recreate the form points
+    dt_masks_gui_form_create(form, gui, index, module);
+    dt_control_queue_redraw_center();
+    return 1;
+  }
+  else if(gui->point_dragging >= 1)
+  {
+    const float max_mask_size =
+      form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
+
+    dt_masks_point_circle_t *point = form->points->data;
+
+    const float s = dt_masks_drag_factor(gui, index, gui->point_dragging, FALSE);
+
+    point->radius = CLAMP(point->radius * s, MIN_POINT_RADIUS, max_mask_size);
+
+    // we recreate the form points
+    dt_masks_gui_form_create(form, gui, index, module);
+    dt_control_queue_redraw_center();
+    return 1;
+  }
+  else if(gui->point_border_dragging >= 1)
+  {
+    const float max_mask_border =
+      form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
+
+    dt_masks_point_circle_t *point = form->points->data;
+
+    const float s = dt_masks_drag_factor(gui, index, gui->point_border_dragging, TRUE);
+
+    point->border = CLAMP((point->radius + point->border) * s - point->radius,
+                           0.001f, max_mask_border);
+
+    dt_masks_gui_form_create(form, gui, index, module);
+    dt_control_queue_redraw_center();
+    return 1;
+  }
+  else if(!gui->creation)
+  {
+    const float as = dt_masks_sensitive_dist(zoom_scale);
+    const float x = pzx * wd;
+    const float y = pzy * ht;
+    gboolean in, inb, ins;
+    int near;
+    float dist;
+    _point_get_distance(x, y, as, gui, index, 0, &in, &inb, &near, &ins, &dist);
+    if(ins)
+    {
+      gui->form_selected = TRUE;
+      gui->source_selected = TRUE;
+      gui->border_selected = FALSE;
+    }
+    else if(inb)
+    {
+      gui->form_selected = TRUE;
+      gui->border_selected = TRUE;
+      gui->source_selected = FALSE;
+    }
+    else if(in)
+    {
+      gui->form_selected = TRUE;
+      gui->border_selected = FALSE;
+      gui->source_selected = FALSE;
+    }
+    else
+    {
+      gui->form_selected = FALSE;
+      gui->border_selected = FALSE;
+      gui->source_selected = FALSE;
+    }
+
+    // see if we are close to the anchor points
+    gui->point_selected = -1;
+    gui->point_border_selected = -1;
+
+    if(gui->form_selected)
+    {
+      dt_masks_form_gui_points_t *gpt = g_list_nth_data(gui->points, index);
+
+      const float as2 = sqf(as);
+      const float dist_b = sqf(x - gpt->border[2]) + sqf(y - gpt->border[3]);
+      const float dist_p = sqf(x - gpt->points[2]) + sqf(y - gpt->points[3]);
+
+      // prefer border point over shape itself in case of near overlap
+      // for ease of pickup
+      if(dist_b < as2)
+      {
+        gui->point_border_selected = 1;
+      }
+      else if(dist_p < as2)
+      {
+        gui->point_selected = 1;
+      }
+    }
+
+    dt_control_queue_redraw_center();
+    if(!gui->form_selected && !gui->border_selected) return 0;
+    if(gui->edit_mode != DT_MASKS_EDIT_FULL) return 0;
+    return 1;
+  }
+  // add a preview when creating a point
+  else if(gui->creation)
+  {
+    dt_control_queue_redraw_center();
+    return 1;
+  }
+
+  return 0;
+}
+
+static void _point_draw_lines(const gboolean borders,
+                               const gboolean source,
+                               cairo_t *cr,
+                               const gboolean selected,
+                               const float zoom_scale,
+                               float *points,
+                               const int points_count)
+{
+  if(points_count <= 6) return;
+
+  cairo_move_to(cr, points[2], points[3]);
+  for(int i = _nb_ctrl_point(); i < points_count; i++)
+  {
+    cairo_line_to(cr, points[i * 2], points[i * 2 + 1]);
+  }
+  cairo_line_to(cr, points[2], points[3]);
+
+  dt_masks_line_stroke(cr, borders, source, selected, zoom_scale);
+}
+
+static float *_points_to_transform(const float x,
+                                   const float y,
+                                   const float radius,
+                                   const float wd,
+                                   const float ht,
+                                   int *points_count)
+{
+  // how many points do we need?
+  const float r = radius * MIN(wd, ht);
+  const size_t l = MAX(10, (size_t)(2.0f * M_PI * r));
+  // allocate buffer
+  float *const restrict points = dt_alloc_align_float((l + 1) * 2);
+  if(!points)
+  {
+    *points_count = 0;
+    return NULL;
+  }
+  *points_count = l + 1;
+
+  // now we set the points, first the center, then the circumference
+  const float center_x = x * wd;
+  const float center_y = y * ht;
+  points[0] = center_x;
+  points[1] = center_y;
+  DT_OMP_FOR_SIMD(if(l > 100) aligned(points:64))
+  for(int i = 1; i < l + 1; i++)
+  {
+    const float alpha = (i - 1) * 2.0f * M_PI / (float)l;
+    points[i * 2] = center_x + r * cosf(alpha);
+    points[i * 2 + 1] = center_y + r * sinf(alpha);
+  }
+  return points;
+}
+
+static int _point_get_points_source(dt_develop_t *dev,
+                                     const float x,
+                                     const float y,
+                                     const float xs,
+                                     const float ys,
+                                     const float radius,
+                                     const float radius2,
+                                     const float rotation,
+                                     float **points,
+                                     int *points_count,
+                                     const dt_iop_module_t *module)
+{
+  (void)radius2; // keep compiler from complaining about unused arg
+  (void)rotation;
+
+  float wd, ht;
+  dt_masks_get_image_size(NULL, NULL, &wd, &ht);
+
+  // compute the points of the target (center and circumference of point)
+  // we get the point in RAW image reference
+  *points = _points_to_transform(x, y, radius, wd, ht, points_count);
+  if(!*points) return 0;
+
+  // we transform with all distortion that happen *before* the module
+  // so we have now the TARGET points in module input reference
+  if(dt_dev_distort_transform_plus(dev, dev->preview_pipe, module->iop_order,
+                                   DT_DEV_TRANSFORM_DIR_BACK_EXCL,
+                                   *points, *points_count))
+  {
+    // now we move all the points by the shift
+    // so we have now the SOURCE points in module input reference
+    float pts[2] = { xs * wd, ys * ht };
+    if(dt_dev_distort_transform_plus(dev, dev->preview_pipe, module->iop_order,
+                                     DT_DEV_TRANSFORM_DIR_BACK_EXCL,
+                                     pts, 1))
+    {
+      const float dx = pts[0] - (*points)[0];
+      const float dy = pts[1] - (*points)[1];
+      float *const ptsbuf = DT_IS_ALIGNED(*points);
+      DT_OMP_FOR(if(*points_count > 100))
+      for(int i = 0; i < *points_count; i++)
+      {
+        ptsbuf[i * 2] += dx;
+        ptsbuf[i * 2 + 1] += dy;
+      }
+
+      // we apply the rest of the distortions (those after the module)
+      // so we have now the SOURCE points in final image reference
+      if(dt_dev_distort_transform_plus(dev, dev->preview_pipe, module->iop_order,
+                                       DT_DEV_TRANSFORM_DIR_FORW_INCL,
+                                       *points, *points_count))
+        return 1;
+    }
+  }
+
+  // if we failed, then free all and return
+  dt_free_align(*points);
+  *points = NULL;
+  *points_count = 0;
+  return 0;
+}
+
+static int _point_get_points(dt_develop_t *dev,
+                              const float x,
+                              const float y,
+                              const float radius,
+                              const float radius2,
+                              const float rotation,
+                              float **points,
+                              int *points_count)
+{
+  (void)radius2; // keep compiler from complaining about unused arg
+  (void)rotation;
+  float wd, ht;
+  dt_masks_get_image_size(NULL, NULL, &wd, &ht);
+
+  // compute the points we need to transform (center and circumference of point)
+  *points = _points_to_transform(x, y, radius, wd, ht, points_count);
+  if(!*points) return 0;
+
+  // and transform them with all distorted modules
+  if(dt_dev_distort_transform(dev, *points, *points_count)) return 1;
+
+  // if we failed, then free all and return
+  dt_free_align(*points);
+  *points = NULL;
+  *points_count = 0;
+  return 0;
+}
+
+static void _point_events_post_expose(cairo_t *cr,
+                                       const float zoom_scale,
+                                       dt_masks_form_gui_t *gui,
+                                       const int index,
+                                       const int num_points)
+{
+  (void)num_points; // unused arg, keep compiler from complaining
+
+  dt_masks_form_gui_points_t *gpt = g_list_nth_data(gui->points, index);
+
+  float wd, ht, iwidth, iheight;
+  dt_masks_get_image_size(&wd, &ht, &iwidth, &iheight);
+
+  // add a preview when creating a point
+  // in creation mode
+  if(gui->creation)
+  {
+    if(gui->guipoints_count == 0)
+    {
+      dt_masks_form_t *form = darktable.develop->form_visible;
+      if(!form) return;
+
+      // we get the default radius values
+      float radius_a = dt_conf_get_float(DT_MASKS_CONF(form->type, point, size));
+      float radius_b = dt_conf_get_float(DT_MASKS_CONF(form->type, point, border));
+      radius_b += radius_a;
+
+      float pts[2] = { gui->posx, gui->posy };
+      dt_dev_distort_backtransform(darktable.develop, pts, 1);
+      float x = pts[0] / iwidth;
+      float y = pts[1] / iheight;
+
+      // we get all the points, distorted if needed of the sample form
+      float *points = NULL;
+      int points_count = 0;
+      float *border = NULL;
+      int border_count = 0;
+      int draw = _point_get_points(darktable.develop, x, y,
+                                    radius_a, 0.0, 0.0, &points, &points_count);
+      if(draw && radius_a != radius_b)
+      {
+        draw = _point_get_points(darktable.develop, x, y,
+                                  radius_b, 0.0, 0.0, &border, &border_count);
+      }
+
+      // we draw the form and it's border
+      cairo_save(cr);
+      // we draw the main shape
+      _point_draw_lines(FALSE, FALSE, cr,
+                         FALSE, zoom_scale, points, points_count);
+      // we draw the borders
+      _point_draw_lines(TRUE, FALSE, cr,
+                         FALSE, zoom_scale, border, border_count);
+      cairo_restore(cr);
+
+      // draw a cross where the source will be created
+      if(form->type & DT_MASKS_CLONE)
+      {
+        x = 0.0f;
+        y = 0.0f;
+        dt_masks_calculate_source_pos_value(gui, DT_MASKS_POINT,
+                                            gui->posx, gui->posy,
+                                            gui->posx, gui->posy,
+                                            &x, &y, FALSE);
+        dt_masks_draw_clone_source_pos(cr, zoom_scale, x, y);
+      }
+
+      if(points) dt_free_align(points);
+      if(border) dt_free_align(border);
+    }
+
+    return;
+  }
+
+  if(!gpt) return;
+  // we draw the main shape
+  const gboolean selected = (gui->group_selected == index)
+    && (gui->form_selected || gui->form_dragging);
+
+  _point_draw_lines(FALSE, FALSE, cr,
+                     selected, zoom_scale, gpt->points, gpt->points_count);
+  // we draw the borders
+  if(gui->show_all_feathers || gui->group_selected == index)
+  {
+    _point_draw_lines(TRUE, FALSE, cr,
+                       gui->border_selected, zoom_scale, gpt->border,
+                       gpt->border_count);
+    dt_masks_draw_anchor(cr, gui->point_dragging > 0
+                         || gui->point_selected > 0,
+                         zoom_scale, gpt->points[2], gpt->points[3]);
+    dt_masks_draw_anchor(cr, gui->point_border_dragging > 0
+                         || gui->point_border_selected > 0,
+                         zoom_scale, gpt->border[2], gpt->border[3]);
+  }
+
+  // draw the source if any
+  if(gpt->source_count > 6)
+  {
+    // compute the dest inner point intersection with the line from
+    // source center to dest center.
+    const float cdx = gpt->source[0] - gpt->points[0];
+    const float cdy = gpt->source[1] - gpt->points[1];
+
+    // we don't draw the line if source==point
+    if(cdx != 0.0 && cdy != 0.0)
+    {
+      cairo_set_line_cap(cr, CAIRO_LINE_CAP_ROUND);
+
+      float to_x = 0.0f;
+      float to_y = 0.0f;
+      float from_x = 0.0f;
+      float from_y = 0.0f;
+
+      dt_masks_closest_point(gpt->points_count,
+                             _nb_ctrl_point(),
+                             gpt->points,
+                             gpt->source[0], gpt->source[1],
+                             &to_x, &to_y);
+
+      dt_masks_closest_point(gpt->source_count,
+                             _nb_ctrl_point(),
+                             gpt->source,
+                             to_x, to_y,
+                             &from_x, &from_y);
+
+      // then draw two lines for the arrow itself
+      dt_masks_draw_arrow(cr,
+                          from_x,from_y,
+                          to_x, to_y,
+                          zoom_scale,
+                          FALSE);
+
+      dt_masks_stroke_arrow(cr, gui, index, zoom_scale);
+    }
+
+    // we only the main shape for the source, no borders
+    _point_draw_lines(FALSE, TRUE, cr, selected,
+                       zoom_scale, gpt->source, gpt->source_count);
+  }
+}
+
+static void _bounding_box(const float *const points,
+                          const int num_points,
+                          int *width,
+                          int *height,
+                          int *posx,
+                          int *posy)
+{
+  // search for min/max X and Y coordinates
+  float xmin = FLT_MAX, xmax = FLT_MIN, ymin = FLT_MAX, ymax = FLT_MIN;
+  for(int i = 1; i < num_points; i++) // skip point[0], which is point's center
+  {
+    xmin = fminf(points[i * 2], xmin);
+    xmax = fmaxf(points[i * 2], xmax);
+    ymin = fminf(points[i * 2 + 1], ymin);
+    ymax = fmaxf(points[i * 2 + 1], ymax);
+  }
+  // set the min/max values we found
+  *posx = xmin;
+  *posy = ymin;
+  *width = (xmax - xmin);
+  *height = (ymax - ymin);
+}
+
+static int _point_get_points_border(dt_develop_t *dev,
+                                     struct dt_masks_form_t *form,
+                                     float **points,
+                                     int *points_count,
+                                     float **border,
+                                     int *border_count,
+                                     const int source,
+                                     const dt_iop_module_t *module)
+{
+  dt_masks_point_circle_t *point = form->points->data;
+
+  const float x = point->center[0];
+  const float y = point->center[1];
+
+  if(source)
+  {
+    const float xs = form->source[0];
+    const float ys = form->source[1];
+    return _point_get_points_source(dev, x, y, xs, ys,
+                                     point->radius, point->radius, 0,
+                                     points, points_count,
+                                     module);
+  }
+  else
+  {
+    if(form->functions->get_points(dev, x, y,
+                                   point->radius, point->radius, 0,
+                                   points, points_count))
+    {
+      if(border)
+      {
+        const float outer_radius = point->radius + point->border;
+        return form->functions->get_points(dev, x, y,
+                                           outer_radius, outer_radius, 0,
+                                           border, border_count);
+      }
+      else
+        return 1;
+    }
+  }
+  return 0;
+}
+
+static int _point_get_source_area(dt_iop_module_t *module,
+                                   dt_dev_pixelpipe_iop_t *piece,
+                                   dt_masks_form_t *form,
+                                   int *width,
+                                   int *height,
+                                   int *posx,
+                                   int *posy)
+{
+  // we get the point values
+  dt_masks_point_circle_t *point = form->points->data;
+  const float wd = piece->pipe->iwidth;
+  const float ht = piece->pipe->iheight;
+
+  // compute the points we need to transform (center and circumference of point)
+  const float outer_radius = point->radius + point->border;
+  int num_points;
+  float *const restrict points =
+    _points_to_transform(form->source[0], form->source[1],
+                         outer_radius, wd, ht, &num_points);
+  if(points == NULL)
+    return 0;
+
+  // and transform them with all distorted modules
+  if(!dt_dev_distort_transform_plus(darktable.develop, piece->pipe,
+                                    module->iop_order,
+                                    DT_DEV_TRANSFORM_DIR_BACK_INCL, points, num_points))
+  {
+    dt_free_align(points);
+    return 0;
+  }
+
+  _bounding_box(points, num_points, width, height, posx, posy);
+  dt_free_align(points);
+  return 1;
+}
+
+static int _point_get_area(const dt_iop_module_t *const restrict module,
+                            const dt_dev_pixelpipe_iop_t *const restrict piece,
+                            dt_masks_form_t *const restrict form,
+                            int *width,
+                            int *height,
+                            int *posx,
+                            int *posy)
+{
+  // we get the point values
+  dt_masks_point_circle_t *point = form->points->data;
+  float wd = piece->pipe->iwidth, ht = piece->pipe->iheight;
+
+  // compute the points we need to transform (center and circumference of point)
+  const float outer_radius = point->radius + point->border;
+  int num_points;
+  float *const restrict points =
+    _points_to_transform(point->center[0], point->center[1],
+                         outer_radius, wd, ht, &num_points);
+  if(points == NULL)
+    return 0;
+
+  // and transform them with all distorted modules
+  if(!dt_dev_distort_transform_plus(module->dev, piece->pipe, module->iop_order,
+                                    DT_DEV_TRANSFORM_DIR_BACK_INCL, points, num_points))
+  {
+    dt_free_align(points);
+    return 0;
+  }
+
+  _bounding_box(points, num_points, width, height, posx, posy);
+  dt_free_align(points);
+  return 1;
+}
+
+static int _point_get_mask(const dt_iop_module_t *const restrict module,
+                            const dt_dev_pixelpipe_iop_t *const restrict piece,
+                            dt_masks_form_t *const restrict form,
+                            float **buffer,
+                            int *width,
+                            int *height,
+                            int *posx,
+                            int *posy)
+{
+  double start2 = dt_get_debug_wtime();
+
+  // we get the area
+  if(!_point_get_area(module, piece, form, width, height, posx, posy)) return 0;
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point area took %0.04f sec",
+           form->name, dt_get_lap_time(&start2));
+
+  // we get the point values
+  dt_masks_point_circle_t *const restrict point = form->points->data;
+
+  // we create a buffer of points with all points in the area
+  const int w = *width, h = *height;
+  float *const restrict points = dt_alloc_align_float((size_t)w * h * 2);
+  if(points == NULL)
+    return 0;
+
+  const float pos_x = *posx;
+  const float pos_y = *posy;
+  DT_OMP_FOR(if(h*w > 50000) num_threads(MIN(dt_get_num_threads(), (h*w)/20000)))
+  for(int i = 0; i < h; i++)
+  {
+    float *const restrict p = points + 2 * i * w;
+    const float y = i + pos_y;
+    DT_OMP_SIMD(aligned(points : 64))
+    for(int j = 0; j < w; j++)
+    {
+      p[2*j] = pos_x + j;
+      p[2*j + 1] = y;
+    }
+  }
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point draw took %0.04f sec", form->name, dt_get_lap_time(&start2));
+
+  // we back transform all this points
+  if(!dt_dev_distort_backtransform_plus(module->dev, piece->pipe, module->iop_order,
+                                        DT_DEV_TRANSFORM_DIR_BACK_INCL,
+                                        points, (size_t)w * h))
+  {
+    dt_free_align(points);
+    return 0;
+  }
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point transform took %0.04f sec", form->name,
+           dt_get_lap_time(&start2));
+
+  // we allocate the buffer
+  *buffer = dt_alloc_align_float((size_t)w * h);
+  if(*buffer == NULL)
+  {
+    dt_free_align(points);
+    return 0;
+  }
+
+  // we populate the buffer
+  float *const restrict ptbuffer = *buffer;
+  const int wi = piece->pipe->iwidth, hi = piece->pipe->iheight;
+  const int mindim = MIN(wi, hi);
+  const float centerx = point->center[0] * wi;
+  const float centery = point->center[1] * hi;
+  const float radius2 = point->radius * mindim * point->radius * mindim;
+  const float total2 = (point->radius + point->border) * mindim
+    * (point->radius + point->border) * mindim;
+  const float border2 = total2 - radius2;
+  const float *const points_y = points + 1;
+  DT_OMP_FOR(if(h*w > 50000) num_threads(MIN(dt_get_num_threads(), (h*w)/20000)))
+  for(int i = 0 ; i < h*w; i++)
+  {
+    // find the square of the distance from the center
+    const float l2 = sqf(points[2 * i] - centerx) + sqf(points_y[2 * i] - centery);
+    // quadratic falloff between the point's radius and the radius of
+    // the outside of the feathering
+    const float ratio = (total2 - l2) / border2;
+    // enforce 1.0 inside the point and 0.0 outside the feathering
+    const float f = CLIP(ratio);
+    ptbuffer[i] = sqf(f);
+  }
+
+  dt_free_align(points);
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point fill took %0.04f sec",
+           form->name, dt_get_lap_time(&start2));
+
+  return 1;
+}
+
+
+static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
+                                const dt_dev_pixelpipe_iop_t *const restrict piece,
+                                dt_masks_form_t *const form,
+                                const dt_iop_roi_t *const roi,
+                                float *const restrict buffer)
+{
+  double start1 = dt_get_debug_wtime();
+  double start2 = start1;
+
+  // we get the point parameters
+  dt_masks_point_circle_t *point = form->points->data;
+  const int wi = piece->pipe->iwidth, hi = piece->pipe->iheight;
+  const float centerx = point->center[0] * wi;
+  const float centery = point->center[1] * hi;
+  const int mindim = MIN(wi, hi);
+  const float radius2 = point->radius * mindim * point->radius * mindim;
+  const float total = (point->radius + point->border) * mindim;
+  const float total2 = total * total;
+  const float border2 = total2 - radius2;
+
+  // we create a buffer of grid points for later interpolation: higher
+  // speed and reduced memory footprint; we match size of buffer to
+  // bounding box around the shape
+  const int w = roi->width;
+  const int h = roi->height;
+  const int px = roi->x;
+  const int py = roi->y;
+  const float iscale = 1.0f / roi->scale;
+  // scale dependent resolution
+  const int grid = CLAMP((10.0f * roi->scale + 2.0f) / 3.0f, 1, 4);
+  const int gw = (w + grid - 1) / grid + 1;  // grid dimension of total roi
+  const int gh = (h + grid - 1) / grid + 1;  // grid dimension of total roi
+
+  // initialize output buffer with zero
+  memset(buffer, 0, sizeof(float) * w * h);
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point init took %0.04f sec",
+           form->name, dt_get_lap_time(&start2));
+
+  // we look at the outer point of the shape - no effects outside of
+  // this point; we need many points as we do not know how the point
+  // might get distorted in the pixelpipe
+  const size_t circpts = dt_masks_roundup(MIN(360, 2 * M_PI * total2), 8);
+  float *const restrict circ = dt_alloc_align_float(circpts * 2);
+  if(circ == NULL) return 0;
+
+  DT_OMP_FOR(if(circpts/8 > 1000))
+  for(int n = 0; n < circpts / 8; n++)
+  {
+    const float phi = (2.0f * M_PI * n) / circpts;
+    const float x = total * cosf(phi);
+    const float y = total * sinf(phi);
+    const float cx = centerx;
+    const float cy = centery;
+    const int index_x = 2 * n * 8;
+    const int index_y = 2 * n * 8 + 1;
+    // take advantage of symmetry
+    circ[index_x] = cx + x;
+    circ[index_y] = cy + y;
+    circ[index_x + 2] = cx + x;
+    circ[index_y + 2] = cy - y;
+    circ[index_x + 4] = cx - x;
+    circ[index_y + 4] = cy + y;
+    circ[index_x + 6] = cx - x;
+    circ[index_y + 6] = cy - y;
+    circ[index_x + 8] = cx + y;
+    circ[index_y + 8] = cy + x;
+    circ[index_x + 10] = cx + y;
+    circ[index_y + 10] = cy - x;
+    circ[index_x + 12] = cx - y;
+    circ[index_y + 12] = cy + x;
+    circ[index_x + 14] = cx - y;
+    circ[index_y + 14] = cy - x;
+  }
+
+  // we transform the outer point from input image coordinates to current point in pixelpipe
+  if(!dt_dev_distort_transform_plus(module->dev, piece->pipe, module->iop_order,
+                                    DT_DEV_TRANSFORM_DIR_BACK_INCL, circ,
+                                    circpts))
+  {
+    dt_free_align(circ);
+    return 0;
+  }
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point outline took %0.04f sec",
+           form->name, dt_get_lap_time(&start2));
+
+  // we get the min/max values ...
+  float xmin = FLT_MAX, ymin = FLT_MAX, xmax = FLT_MIN, ymax = FLT_MIN;
+  for(int n = 0; n < circpts; n++)
+  {
+    // just in case that transform throws surprising values
+    if(!(dt_isnormal(circ[2 * n]) && dt_isnormal(circ[2 * n + 1]))) continue;
+
+    xmin = MIN(xmin, circ[2 * n]);
+    xmax = MAX(xmax, circ[2 * n]);
+    ymin = MIN(ymin, circ[2 * n + 1]);
+    ymax = MAX(ymax, circ[2 * n + 1]);
+  }
+
+#if 0
+  printf("xmin %f, xmax %f, ymin %f, ymax %f\n", xmin, xmax, ymin, ymax);
+  printf("wi %d, hi %d, iscale %f\n", wi, hi, iscale);
+  printf("w %d, h %d, px %d, py %d\n", w, h, px, py);
+#endif
+
+  // ... and calculate the bounding box with a bit of reserve
+  const int bbxm = CLAMP((int)floorf(xmin / iscale - px) / grid - 1, 0, gw - 1);
+  const int bbXM = CLAMP((int)ceilf(xmax / iscale - px) / grid + 2, 0, gw - 1);
+  const int bbym = CLAMP((int)floorf(ymin / iscale - py) / grid - 1, 0, gh - 1);
+  const int bbYM = CLAMP((int)ceilf(ymax / iscale - py) / grid + 2, 0, gh - 1);
+  const int bbw = bbXM - bbxm + 1;
+  const int bbh = bbYM - bbym + 1;
+
+#if 0
+  printf("bbxm %d, bbXM %d, bbym %d, bbYM %d\n", bbxm, bbXM, bbym, bbYM);
+  printf("gw %d, gh %d, bbw %d, bbh %d\n", gw, gh, bbw, bbh);
+#endif
+
+  dt_free_align(circ);
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point bounding box took %0.04f sec",
+           form->name, dt_get_lap_time(&start2));
+
+  // check if there is anything to do at all; only if width and height
+  // of bounding box is 2 or greater the shape lies inside of roi and
+  // requires action
+  if(bbw <= 1 || bbh <= 1)
+    return 1;
+
+  float *const restrict points = dt_alloc_align_float((size_t)bbw * bbh * 2);
+  if(points == NULL) return 0;
+
+  // we populate the grid points in module coordinates
+  DT_OMP_FOR(collapse(2) if(bbw*bbh > 50000))
+  for(int j = bbym; j <= bbYM; j++)
+    for(int i = bbxm; i <= bbXM; i++)
+    {
+      const size_t index = (size_t)(j - bbym) * bbw + i - bbxm;
+      points[index * 2] = (grid * i + px) * iscale;
+      points[index * 2 + 1] = (grid * j + py) * iscale;
+    }
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point grid took %0.04f sec", form->name, dt_get_lap_time(&start2));
+
+  // we back transform all these points to the input image coordinates
+  if(!dt_dev_distort_backtransform_plus(module->dev, piece->pipe, module->iop_order,
+                                        DT_DEV_TRANSFORM_DIR_BACK_INCL, points,
+                                        (size_t)bbw * bbh))
+  {
+    dt_free_align(points);
+    return 0;
+  }
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point transform took %0.04f sec", form->name,
+           dt_get_lap_time(&start2));
+
+  // we calculate the mask values at the transformed points;
+  // for results: re-use the points array
+  DT_OMP_FOR(collapse(2) if(bbh*bbw > 50000) num_threads(MIN(dt_get_num_threads(), (h*w)/20000)))
+  for(int j = 0; j < bbh; j++)
+    for(int i = 0; i < bbw; i++)
+    {
+      const size_t index = (size_t)j * bbw + i;
+      // find the square of the distance from the center
+      const float l2 = sqf(points[2 * index] - centerx)
+        + sqf(points[2 * index + 1] - centery);
+      // quadratic falloff between the point's radius and the radius
+      // of the outside of the feathering
+      const float ratio = (total2 - l2) / border2;
+      // enforce 1.0 inside the point and 0.0 outside the feathering
+      const float f = CLAMP(ratio, 0.0f, 1.0f);
+      points[2*index] = f * f;
+    }
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point draw took %0.04f sec", form->name,
+           dt_get_lap_time(&start2));
+
+  // we fill the pre-initialized output buffer by interpolation;
+  // we only need to take the contents of our bounding box into account
+  const int endx = MIN(w, bbXM * grid);
+  const int endy = MIN(h, bbYM * grid);
+  DT_OMP_FOR()
+  for(int j = bbym * grid; j < endy; j++)
+  {
+    const int jj = j % grid;
+    const int mj = j / grid - bbym;
+    for(int i = bbxm * grid; i < endx; i++)
+    {
+      const int ii = i % grid;
+      const int mi = i / grid - bbxm;
+      const size_t mindex = (size_t)mj * bbw + mi;
+      buffer[(size_t)j * w + i]
+          = (points[mindex * 2] * (grid - ii) * (grid - jj)
+             + points[(mindex + 1) * 2] * ii * (grid - jj)
+             + points[(mindex + bbw) * 2] * (grid - ii) * jj
+             + points[(mindex + bbw + 1) * 2] * ii * jj)
+            / (grid * grid);
+    }
+  }
+
+  dt_free_align(points);
+
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point fill took %0.04f sec",
+           form->name, dt_get_lap_time(&start2));
+  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+           "[masks %s] point total render took %0.04f sec", form->name,
+           dt_get_lap_time(&start1));
+
+  return 1;
+}
+
+static GSList *_point_setup_mouse_actions(const struct dt_masks_form_t *const form)
+{
+  GSList *lm = NULL;
+  lm = dt_mouse_action_create_simple(lm, DT_MOUSE_ACTION_SCROLL,
+                                     0, _("[POINT] change size"));
+  lm = dt_mouse_action_create_simple(lm, DT_MOUSE_ACTION_SCROLL,
+                                     GDK_SHIFT_MASK, _("[POINT] change feather size"));
+  lm = dt_mouse_action_create_simple(lm, DT_MOUSE_ACTION_SCROLL,
+                                     GDK_CONTROL_MASK, _("[POINT] change opacity"));
+  return lm;
+}
+
+static void _point_sanitize_config(dt_masks_type_t type)
+{
+  dt_conf_get_and_sanitize_float(DT_MASKS_CONF(type, point, size), MIN_POINT_RADIUS, 0.5f);
+  dt_conf_get_and_sanitize_float(DT_MASKS_CONF(type, point, border), MIN_POINT_BORDER, 0.5f);
+}
+
+static void _point_set_form_name(dt_masks_form_t *const form,
+                                  const size_t nb)
+{
+  snprintf(form->name, sizeof(form->name), _("point #%d"), (int)nb);
+}
+
+static void _point_set_hint_message(const dt_masks_form_gui_t *const gui,
+                                     const dt_masks_form_t *const form,
+                                     const int opacity,
+                                     char *const restrict msgbuf,
+                                     const size_t msgbuf_len)
+{
+  // point has same controls on creation and on edit
+  g_snprintf(msgbuf, msgbuf_len,
+             _("<b>size</b>: scroll, <b>feather size</b>: shift+scroll\n"
+               "<b>opacity</b>: ctrl+scroll (%d%%)"), opacity);
+}
+
+static void _point_duplicate_points(dt_develop_t *dev,
+                                     dt_masks_form_t *const base,
+                                     dt_masks_form_t *const dest)
+{
+  (void)dev; // unused arg, keep compiler from complaining
+  for(GList *pts = base->points; pts; pts = g_list_next(pts))
+  {
+    dt_masks_point_circle_t *pt = pts->data;
+    dt_masks_point_circle_t *npt = malloc(sizeof(dt_masks_point_circle_t));
+    memcpy(npt, pt, sizeof(dt_masks_point_circle_t));
+    dest->points = g_list_append(dest->points, npt);
+  }
+}
+
+static void _point_modify_property(dt_masks_form_t *const form,
+                                    const dt_masks_property_t prop,
+                                    const float old_val,
+                                    const float new_val,
+                                    float *sum,
+                                    int *count,
+                                    float *min,
+                                    float *max)
+{
+  float ratio = (!old_val || !new_val) ? 1.0f : new_val / old_val;
+
+  dt_masks_point_circle_t *point = form->points ? form->points->data : NULL;
+
+  float masks_size = point
+    ? point->radius
+    : dt_conf_get_float(DT_MASKS_CONF(form->type, point, size));
+
+  switch(prop)
+  {
+    case DT_MASKS_PROPERTY_SIZE:;
+      const float max_mask_size =
+        form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
+      masks_size = CLAMP(masks_size * ratio, MIN_POINT_RADIUS, max_mask_size);
+
+      if(point) point->radius = masks_size;
+      dt_conf_set_float(DT_MASKS_CONF(form->type, point, size), masks_size);
+
+      *sum += masks_size;
+      *max = fminf(*max, max_mask_size / masks_size);
+      *min = fmaxf(*min, MIN_POINT_RADIUS / masks_size);
+      ++*count;
+      break;
+    case DT_MASKS_PROPERTY_FEATHER:;
+      const float max_mask_border =
+        form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
+      float masks_border =
+        point
+        ? point->border
+        : dt_conf_get_float(DT_MASKS_CONF(form->type, point, border));
+
+      masks_border = CLAMP(masks_border * ratio, MIN_POINT_BORDER, max_mask_border);
+
+      if(point) point->border = masks_border;
+      dt_conf_set_float(DT_MASKS_CONF(form->type, point, border), masks_border);
+
+      *sum += masks_border;
+      *max = fminf(*max, max_mask_border / masks_border);
+      *min = fmaxf(*min, MIN_POINT_BORDER / masks_border);
+      ++*count;
+      break;
+    default:;
+  }
+}
+
+static void _point_initial_source_pos(const float iwd,
+                                       const float iht,
+                                       float *x,
+                                       float *y)
+{
+  const float radius = MIN(0.5f, dt_conf_get_float("plugins/darkroom/spots/point_size"));
+
+  *x = (radius * iwd);
+  *y = -(radius * iht);
+}
+
+// The function table for points.  This must be public, i.e. no "static" keyword.
+const dt_masks_functions_t dt_masks_functions_point = {
+  .point_struct_size = sizeof(struct dt_masks_point_circle_t),
+  .sanitize_config = _point_sanitize_config,
+  .setup_mouse_actions = _point_setup_mouse_actions,
+  .set_form_name = _point_set_form_name,
+  .set_hint_message = _point_set_hint_message,
+  .modify_property = _point_modify_property,
+  .duplicate_points = _point_duplicate_points,
+  .initial_source_pos = _point_initial_source_pos,
+  .get_distance = _point_get_distance,
+  .get_points = _point_get_points,
+  .get_points_border = _point_get_points_border,
+  .get_mask = _point_get_mask,
+  .get_mask_roi = _point_get_mask_roi,
+  .get_area = _point_get_area,
+  .get_source_area = _point_get_source_area,
+  .mouse_moved = _point_events_mouse_moved,
+  .mouse_scrolled = _point_events_mouse_scrolled,
+  .button_pressed = _point_events_button_pressed,
+  .button_released = _point_events_button_released,
+  .post_expose = _point_events_post_expose
+};
+
+
+// clang-format off
+// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
+// vim: shiftwidth=2 expandtab tabstop=2 cindent
+// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
+// clang-format on

From 81688d728298635ce9998cdd8e9aee9c155635c8 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Tue, 14 Jan 2025 11:49:59 +0100
Subject: [PATCH 03/14] add onnxruntime

---
 .gitmodules              | 3 +++
 src/external/onnxruntime | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 src/external/onnxruntime

diff --git a/.gitmodules b/.gitmodules
index 06a31cc63826..0767f4bfe13d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -20,3 +20,6 @@
 [submodule "src/external/lua-scripts"]
 	path = src/external/lua-scripts
 	url = https://github.com/darktable-org/lua-scripts.git
+[submodule "src/external/onnxruntime"]
+	path = src/external/onnxruntime
+	url = https://github.com/microsoft/onnxruntime
diff --git a/src/external/onnxruntime b/src/external/onnxruntime
new file mode 160000
index 000000000000..4e4fd2bdcf0f
--- /dev/null
+++ b/src/external/onnxruntime
@@ -0,0 +1 @@
+Subproject commit 4e4fd2bdcf0f12e1c897c77e6384cb1e97cd80c3

From c3cfe6b55462ccbb0530545d9a1e86364c71afc4 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Wed, 15 Jan 2025 13:35:18 +0100
Subject: [PATCH 04/14] added cmake to include onnx

---
 src/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0455a1e1993d..ac4343ba1d59 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_subdirectory(external)
 
+set(ONNXRUNTIME_ROOTDIR "/home/miko/Downloads/installers/onnxruntime-linux-x64")
+
 include(CheckCSourceCompiles)
 include(CheckCXXSymbolExists)
 
@@ -159,7 +161,7 @@ FILE(GLOB SOURCE_FILES
   "views/view.c"
   )
 
-FILE(GLOB HEADER_FILES "*.h" "common/*.h" "external/OpenCL/CL/*.h" "control/*.h" "iop/*.h" "libs/*.h" "views/*.h")
+FILE(GLOB HEADER_FILES "*.h" "common/*.h" "external/OpenCL/CL/*.h" "external/onnxruntime/include/onnxruntime/core/session/*.h" "control/*.h" "iop/*.h" "libs/*.h" "views/*.h")
 
 if(APPLE)
   list(APPEND SOURCE_FILES "osx/osx.mm")
@@ -227,6 +229,7 @@ endif()
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/external)
 include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/external/OpenCL)
+include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/external/onnxruntime/include/onnxruntime/core/session)
 
 # initial compiler flags
 add_definitions("-DHAVE_CONFIG_H")
@@ -973,6 +976,8 @@ unset(LIB_DEPS)
 add_library(lib_darktable_imageio_rawspeed STATIC imageio/imageio_rawspeed.cc)
 target_link_libraries(lib_darktable_imageio_rawspeed PRIVATE rawspeed)
 target_link_libraries(lib_darktable PRIVATE lib_darktable_imageio_rawspeed)
+link_directories(lib_darktable "${ONNXRUNTIME_ROOTDIR}/lib")
+target_link_libraries(lib_darktable PRIVATE "${ONNXRUNTIME_ROOTDIR}/lib/libonnxruntime.so")
 
 #
 # Install lib_darktable

From 6d8094c901a5d0244685b61a51676c7a883f191b Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Wed, 15 Jan 2025 13:43:46 +0100
Subject: [PATCH 05/14] added mask button

---
 src/develop/blend.h       | 2 +-
 src/develop/blend_gui.c   | 8 ++++++++
 src/develop/masks.h       | 9 +++++++++
 src/develop/masks/masks.c | 8 ++++++++
 src/iop/bloom.c           | 4 ++--
 5 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/develop/blend.h b/src/develop/blend.h
index 872c97514154..dc9b5a1c9238 100644
--- a/src/develop/blend.h
+++ b/src/develop/blend.h
@@ -284,7 +284,7 @@ extern const dt_introspection_type_enum_tuple_t dt_develop_combine_masks_names[]
 extern const dt_introspection_type_enum_tuple_t dt_develop_feathering_guide_names[];
 extern const dt_introspection_type_enum_tuple_t dt_develop_invert_mask_names[];
 
-#define DEVELOP_MASKS_NB_SHAPES 5
+#define DEVELOP_MASKS_NB_SHAPES 6
 
 /** blend gui data */
 typedef struct dt_iop_gui_blend_data_t
diff --git a/src/develop/blend_gui.c b/src/develop/blend_gui.c
index 578c076ed63e..14df359fdf82 100644
--- a/src/develop/blend_gui.c
+++ b/src/develop/blend_gui.c
@@ -2872,6 +2872,14 @@ void dt_iop_gui_init_masks(GtkWidget *blendw, dt_iop_module_t *module)
                                                   FALSE, 0, 0,
                                                   dtgtk_cairo_paint_masks_brush, abox);
 
+    bd->masks_type[5] = DT_MASKS_POINT;
+    bd->masks_shapes[5] = dt_iop_togglebutton_new(module, "blend`shapes",
+                                                  N_("add point"),
+                                                  N_("add multiple points"),
+                                                  G_CALLBACK(_blendop_masks_add_shape),
+                                                  FALSE, 0, 0,
+                                                  dtgtk_cairo_paint_masks_ai, abox);
+
     bd->masks_type[1] = DT_MASKS_PATH;
     bd->masks_shapes[1] = dt_iop_togglebutton_new(module, "blend`shapes",
                                                   N_("add path"),
diff --git a/src/develop/masks.h b/src/develop/masks.h
index fda1f7fb4ab3..7b832f210d09 100644
--- a/src/develop/masks.h
+++ b/src/develop/masks.h
@@ -327,6 +327,13 @@ typedef struct dt_masks_functions_t
                       const int num_points);
 } dt_masks_functions_t;
 
+typedef struct dt_masks_fast_sam_data_t{
+    bool proxy_data_initialized;
+    uint8_t *proxy_data; // Scaled-down image data
+    int proxy_width;
+    int proxy_height;
+} dt_masks_fast_sam_data_t;
+
 /** structure used to define a form */
 typedef struct dt_masks_form_t
 {
@@ -340,6 +347,8 @@ typedef struct dt_masks_form_t
   char name[128];
   // id used to store the form
   dt_mask_id_t formid;
+  // data for the FastSAM model
+  dt_masks_fast_sam_data_t *fast_sam_data;
   // version of the form
   int version;
 } dt_masks_form_t;
diff --git a/src/develop/masks/masks.c b/src/develop/masks/masks.c
index cce4186dfbd1..4f72b2ceff3b 100644
--- a/src/develop/masks/masks.c
+++ b/src/develop/masks/masks.c
@@ -855,7 +855,15 @@ dt_masks_form_t *dt_masks_create(dt_masks_type_t type)
   else if(type & DT_MASKS_GROUP)
     form->functions = &dt_masks_functions_group;
   else if(type & DT_MASKS_POINT)
+  {
     form->functions = &dt_masks_functions_point;
+    dt_masks_fast_sam_data_t *data = malloc(sizeof(dt_masks_fast_sam_data_t));
+    data->proxy_data_initialized = false;
+    data->proxy_data = NULL;
+    data->proxy_width = 1024;
+    data->proxy_height = 1024;
+    form->fast_sam_data = data;
+  }
 
   if(form->functions && form->functions->sanitize_config)
     form->functions->sanitize_config(type);
diff --git a/src/iop/bloom.c b/src/iop/bloom.c
index ef6bc3a7d747..3016078f4ab0 100644
--- a/src/iop/bloom.c
+++ b/src/iop/bloom.c
@@ -143,7 +143,7 @@ void process(dt_iop_module_t *self,
   const float scale = 1.0f / exp2f(-1.0f * (fmin(100.0f, data->strength + 1.0f) / 100.0f));
 
   const float threshold = data->threshold;
-/* get the thresholded lights into buffer */
+  /* get the thresholded lights into buffer */
   DT_OMP_FOR()
   for(size_t k = 0; k < npixels; k++)
   {
@@ -157,7 +157,7 @@ void process(dt_iop_module_t *self,
 
   dt_box_mean(blurlightness, roi_out->height, roi_out->width, 1, hr, BOX_ITERATIONS);
 
-/* screen blend lightness with original */
+  /* screen blend lightness with original */
   DT_OMP_FOR()
   for(size_t k = 0; k < npixels; k++)
   {

From 91ade3730d0abb59ffe9c6dbf0996d8a62c2e520 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Mon, 20 Jan 2025 01:27:57 +0100
Subject: [PATCH 06/14] added basic mask creation and loading

---
 src/CMakeLists.txt              |  19 ++
 src/develop/image_file.h        |  12 +
 src/develop/image_file_libpng.c |  19 ++
 src/develop/masks/point.c       | 138 ++++----
 src/develop/pixelpipe_hb.c      | 547 ++++++++++++++++++++++++++++++++
 src/develop/pixelpipe_hb.h      |   8 +
 6 files changed, 683 insertions(+), 60 deletions(-)
 create mode 100644 src/develop/image_file.h
 create mode 100644 src/develop/image_file_libpng.c

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ac4343ba1d59..5b63390a36c8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -121,6 +121,7 @@ FILE(GLOB SOURCE_FILES
   "develop/masks/path.c"
   "develop/pixelpipe.c"
   "develop/tiling.c"
+  "develop/image_file_libpng.c"
   "dtgtk/button.c"
   "dtgtk/culling.c"
   "dtgtk/drawingarea.c"
@@ -979,6 +980,24 @@ target_link_libraries(lib_darktable PRIVATE lib_darktable_imageio_rawspeed)
 link_directories(lib_darktable "${ONNXRUNTIME_ROOTDIR}/lib")
 target_link_libraries(lib_darktable PRIVATE "${ONNXRUNTIME_ROOTDIR}/lib/libonnxruntime.so")
 
+
+find_package(JPEG)
+  if(LIBPNG_ROOTDIR)
+    set(PNG_FOUND true)
+    set(PNG_LIBRARIES png16)
+    set(PNG_INCLUDE_DIRS "${LIBPNG_ROOTDIR}/include")
+    set(PNG_LIBDIR "${LIBPNG_ROOTDIR}/lib")
+  else()
+    find_package(PNG)
+  endif()
+
+target_include_directories(lib_darktable PRIVATE ${PNG_INCLUDE_DIRS})
+target_link_libraries(lib_darktable PRIVATE ${PNG_LIBRARIES})
+
+if(PNG_LIBDIR)
+  target_link_directories(lib_darktable PRIVATE ${PNG_LIBDIR})
+endif()
+
 #
 # Install lib_darktable
 #
diff --git a/src/develop/image_file.h b/src/develop/image_file.h
new file mode 100644
index 000000000000..3ef704d97a1d
--- /dev/null
+++ b/src/develop/image_file.h
@@ -0,0 +1,12 @@
+#pragma once
+#include "onnxruntime_c_api.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int write_image_file(_In_ uint8_t* model_output_bytes, unsigned int height,
+                     unsigned int width, _In_z_ const ORTCHAR_T* output_file);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/src/develop/image_file_libpng.c b/src/develop/image_file_libpng.c
new file mode 100644
index 000000000000..051a6ac367d3
--- /dev/null
+++ b/src/develop/image_file_libpng.c
@@ -0,0 +1,19 @@
+#include "image_file.h"
+#include <png.h>
+
+int write_image_file(uint8_t* model_output_bytes, unsigned int height,
+                     unsigned int width, const char* output_file){
+  png_image image;
+  memset(&image, 0, (sizeof image));
+  image.version = PNG_IMAGE_VERSION;
+  image.format = PNG_FORMAT_BGR;
+  image.height = height;
+  image.width = width;
+  int ret = 0;
+  if (png_image_write_to_file(&image, output_file, 0 /*convert_to_8bit*/, model_output_bytes, 0 /*row_stride*/,
+			      NULL /*colormap*/) == 0) {
+    printf("write to '%s' failed:%s\n", output_file, image.message);
+    ret = -1;
+  }
+  return ret;
+}
diff --git a/src/develop/masks/point.c b/src/develop/masks/point.c
index bcb1b39d9a8b..1cb3abe9a8bd 100644
--- a/src/develop/masks/point.c
+++ b/src/develop/masks/point.c
@@ -24,10 +24,11 @@
 #include "develop/blend.h"
 #include "develop/imageop.h"
 #include "develop/masks.h"
-#include "develop/openmp_maths.h"
+#include "develop/openmp_maths.h" 
 
-#define MIN_POINT_RADIUS 0.0005f
-#define MIN_POINT_BORDER 0.0005f
+
+#define MIN_POINT_RADIUS 0.005f
+#define MIN_POINT_BORDER 0.005f
 
 static inline int _nb_ctrl_point(void)
 {
@@ -117,6 +118,7 @@ static int _point_events_mouse_scrolled(dt_iop_module_t *module,
                                          dt_masks_form_gui_t *gui,
                                          const int index)
 {
+  /*
   const float max_mask_border =
     form->type & (DT_MASKS_CLONE | DT_MASKS_NON_CLONE) ? 0.5f : 1.0f;
   const float max_mask_size =
@@ -198,6 +200,7 @@ static int _point_events_mouse_scrolled(dt_iop_module_t *module,
     }
     return 1;
   }
+  */
   return 0;
 }
 
@@ -352,33 +355,6 @@ static int _point_events_button_pressed(dt_iop_module_t *module,
 
       dt_masks_select_form(module, dt_masks_get_from_id(darktable.develop, form->formid));
     }
-    //spot and retouch manage creation_continuous in their own way
-    if(gui->creation_continuous
-       && (!crea_module
-           || (!dt_iop_module_is(crea_module->so, "spots")
-               && !dt_iop_module_is(crea_module->so, "retouch"))))
-    {
-      if(crea_module)
-      {
-        dt_iop_gui_blend_data_t *bd = crea_module->blend_data;
-        for(int n = 0; n < DEVELOP_MASKS_NB_SHAPES; n++)
-          if(bd->masks_type[n] == form->type)
-            gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bd->masks_shapes[n]), TRUE);
-
-        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bd->masks_edit), FALSE);
-        dt_masks_form_t *newform = dt_masks_create(form->type);
-        dt_masks_change_form_gui(newform);
-        darktable.develop->form_gui->creation_module = crea_module;
-        darktable.develop->form_gui->creation_continuous = TRUE;
-        darktable.develop->form_gui->creation_continuous_module = crea_module;
-      }
-      else
-      {
-        dt_masks_form_t *form_new = dt_masks_create(form->type);
-        dt_masks_change_form_gui(form_new);
-        darktable.develop->form_gui->creation_module = gui->creation_continuous_module;
-      }
-    }
 
     return 1;
   }
@@ -559,6 +535,7 @@ static int _point_events_mouse_moved(dt_iop_module_t *module,
     dt_control_queue_redraw_center();
     return 1;
   }
+  /*
   else if(gui->point_border_dragging >= 1)
   {
     const float max_mask_border =
@@ -575,6 +552,7 @@ static int _point_events_mouse_moved(dt_iop_module_t *module,
     dt_control_queue_redraw_center();
     return 1;
   }
+  */
   else if(!gui->creation)
   {
     const float as = dt_masks_sensitive_dist(zoom_scale);
@@ -665,7 +643,7 @@ static void _point_draw_lines(const gboolean borders,
   }
   cairo_line_to(cr, points[2], points[3]);
 
-  dt_masks_line_stroke(cr, borders, source, selected, zoom_scale);
+  dt_masks_line_stroke(cr, borders, source, selected, 1.0);
 }
 
 static float *_points_to_transform(const float x,
@@ -1174,43 +1152,82 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
                                 const dt_iop_roi_t *const roi,
                                 float *const restrict buffer)
 {
-  double start1 = dt_get_debug_wtime();
-  double start2 = start1;
+  //double start1 = dt_get_debug_wtime();
+  //double start2 = start1;
 
   // we get the point parameters
-  dt_masks_point_circle_t *point = form->points->data;
+
+  //dt_masks_fast_sam_data_t *fast_sam_data = form->fast_sam_data;
+  
+  printf("I'm on get_mask_roi\n");
+  dt_dev_pixelpipe_t* p = piece->pipe;
+  dt_masks_point_circle_t *circle = form->points->data;
   const int wi = piece->pipe->iwidth, hi = piece->pipe->iheight;
-  const float centerx = point->center[0] * wi;
-  const float centery = point->center[1] * hi;
-  const int mindim = MIN(wi, hi);
-  const float radius2 = point->radius * mindim * point->radius * mindim;
-  const float total = (point->radius + point->border) * mindim;
-  const float total2 = total * total;
-  const float border2 = total2 - radius2;
+  
+  //wi /= 
+
+  printf("Circle position:\nx: %f, y: %f", circle->center[0] * wi, circle->center[1] * hi);
+
+  if (p->has_proxy){
+    //size_t n_masks = p->n_masks;
+    size_t stride = p->proxy_width * p->proxy_height;
+
+    for (int i = 0; i < roi->width * roi->height; i++){
+      buffer[i] = 0.0f;
+    }
+
+    for (int mask_i = 0; mask_i < 1; mask_i++){
+      //size_t mask_x = (size_t)(circle->center[0] * wi);
+      //size_t mask_y = (size_t)(circle->center[1] * hi);
+      //if (p->proxy_data[mask_i * stride + mask_y* p->proxy_width + mask_x] == 0)
+      //  continue;
+      for (int y = 0; y < p->proxy_height; y++)
+      {
+        if (y >= roi->height)
+          continue;
+        for (int x = 0; x < p->proxy_width; x ++)
+        {
+          if ( x >= roi->width)
+            continue;
+          buffer[x + y * roi->width] += p->proxy_data[mask_i * stride + y * p->proxy_width + x];
+        }
+      }
+    }
+    for (int i = 0; i < roi->width * roi->height; i++){
+      if (buffer[i] > 0)
+        buffer[i] = 1.0f;
+      else
+        buffer[i] = 0.0f;
+    }
+
+    // return 0;
+  }
+
 
   // we create a buffer of grid points for later interpolation: higher
   // speed and reduced memory footprint; we match size of buffer to
   // bounding box around the shape
-  const int w = roi->width;
-  const int h = roi->height;
-  const int px = roi->x;
-  const int py = roi->y;
-  const float iscale = 1.0f / roi->scale;
-  // scale dependent resolution
-  const int grid = CLAMP((10.0f * roi->scale + 2.0f) / 3.0f, 1, 4);
-  const int gw = (w + grid - 1) / grid + 1;  // grid dimension of total roi
-  const int gh = (h + grid - 1) / grid + 1;  // grid dimension of total roi
-
-  // initialize output buffer with zero
-  memset(buffer, 0, sizeof(float) * w * h);
-
-  dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
-           "[masks %s] point init took %0.04f sec",
-           form->name, dt_get_lap_time(&start2));
+  //const int w = roi->width;
+  //const int h = roi->height;
+  //const int px = roi->x;
+  //const int py = roi->y;
+  //const float iscale = 1.0f / roi->scale;
+  //// scale dependent resolution
+  //const int grid = CLAMP((10.0f * roi->scale + 2.0f) / 3.0f, 1, 4);
+  //const int gw = (w + grid - 1) / grid + 1;  // grid dimension of total roi
+  //const int gh = (h + grid - 1) / grid + 1;  // grid dimension of total roi
+//
+  //// initialize output buffer with zero
+  //memset(buffer, 0, sizeof(float) * w * h);
+//
+  //dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
+  //         "[masks %s] point init took %0.04f sec",
+  //         form->name, dt_get_lap_time(&start2));
 
   // we look at the outer point of the shape - no effects outside of
   // this point; we need many points as we do not know how the point
   // might get distorted in the pixelpipe
+  /*
   const size_t circpts = dt_masks_roundup(MIN(360, 2 * M_PI * total2), 8);
   float *const restrict circ = dt_alloc_align_float(circpts * 2);
   if(circ == NULL) return 0;
@@ -1383,19 +1400,21 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
   dt_print(DT_DEBUG_MASKS | DT_DEBUG_PERF,
            "[masks %s] point total render took %0.04f sec", form->name,
            dt_get_lap_time(&start1));
-
+  */
   return 1;
 }
 
 static GSList *_point_setup_mouse_actions(const struct dt_masks_form_t *const form)
 {
   GSList *lm = NULL;
+  /*
   lm = dt_mouse_action_create_simple(lm, DT_MOUSE_ACTION_SCROLL,
                                      0, _("[POINT] change size"));
   lm = dt_mouse_action_create_simple(lm, DT_MOUSE_ACTION_SCROLL,
                                      GDK_SHIFT_MASK, _("[POINT] change feather size"));
   lm = dt_mouse_action_create_simple(lm, DT_MOUSE_ACTION_SCROLL,
                                      GDK_CONTROL_MASK, _("[POINT] change opacity"));
+  */
   return lm;
 }
 
@@ -1419,8 +1438,7 @@ static void _point_set_hint_message(const dt_masks_form_gui_t *const gui,
 {
   // point has same controls on creation and on edit
   g_snprintf(msgbuf, msgbuf_len,
-             _("<b>size</b>: scroll, <b>feather size</b>: shift+scroll\n"
-               "<b>opacity</b>: ctrl+scroll (%d%%)"), opacity);
+             _("click to add"));
 }
 
 static void _point_duplicate_points(dt_develop_t *dev,
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index 4aab0d1544e5..9a49286e97e5 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -36,6 +36,11 @@
 #include "libs/lib.h"
 #include "gui/color_picker_proxy.h"
 
+#include "onnxruntime_c_api.h"
+#include "common/image_cache.h"
+
+#include "develop/image_file.h"
+
 #include <assert.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -236,6 +241,8 @@ gboolean dt_dev_pixelpipe_init_cached(dt_dev_pixelpipe_t *pipe,
   pipe->backbuf_zoom_x = 0.0f;
   pipe->backbuf_zoom_y = 0.0f;
   pipe->output_imgid = NO_IMGID;
+  pipe->has_proxy = FALSE;
+  pipe->n_masks = 0;
 
   memset(&pipe->scharr, 0, sizeof(dt_dev_detail_mask_t));
   pipe->want_detail_mask = FALSE;
@@ -1102,6 +1109,445 @@ static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe,
   }
 }
 
+#define tcscmp strcmp
+
+const OrtApi* g_ort = NULL;
+float conf = 0.3;
+float iou_threshold = 0.7;
+#define ORT_ABORT_ON_ERROR(expr)                             \
+  do {                                                       \
+    OrtStatus* onnx_status = (expr);                         \
+    if (onnx_status != NULL) {                               \
+      const char* msg = g_ort->GetErrorMessage(onnx_status); \
+      fprintf(stderr, "%s\n", msg);                          \
+      g_ort->ReleaseStatus(onnx_status);                     \
+      abort();                                               \
+    }                                                        \
+  } while (0);
+
+
+typedef struct {
+  float x1;
+  float y1;
+  float x2;
+  float y2;
+  float score;
+  float* mask;
+} TensorBoxes;
+
+float max(float a, float b) {
+    return (a > b) ? a : b;
+}
+
+float min(float a, float b) {
+    return (a < b) ? a : b;
+}
+float IoU(TensorBoxes a, TensorBoxes b) {
+    float x1 = max(a.x1, b.x1);
+    float y1 = max(a.y1, b.y1);
+    float x2 = min(a.x2, b.x2);
+    float y2 = min(a.y2, b.y2);
+
+    float intersection = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1);
+    float areaA = (a.x2 - a.x1 + 1) * (a.y2 - a.y1 + 1);
+    float areaB = (b.x2 - b.x1 + 1) * (b.y2 - b.y1 + 1);
+
+    return intersection / (areaA + areaB - intersection);
+}
+
+static int compare_scores(const void* a, const void* b) {
+    TensorBoxes* boxA = (TensorBoxes*)a;
+    TensorBoxes* boxB = (TensorBoxes*)b;
+    if (boxA->score == boxB->score){
+      // Sort in descending order of area
+      float A_area = (boxA->x2 - boxA->x1) * (boxA->y2 - boxA->y1);
+      float B_area = (boxB->x2 - boxB->x1) * (boxB->y2 - boxB->y1);
+      if (A_area < B_area) return 1;
+      if (A_area > B_area) return -1;
+      return 0;
+    }
+    // Sort in descending order of score
+    if (boxA->score < boxB->score) return 1;
+    if (boxA->score > boxB->score) return -1;
+    return 0;
+}
+
+// Function to sort an array of TensorBoxes
+static void sort_tensor_boxes_by_score(TensorBoxes* boxes, size_t count) {
+    qsort(boxes, count, sizeof(TensorBoxes), compare_scores);
+}
+
+static size_t NMS(TensorBoxes* boxes, size_t count, TensorBoxes* output) {
+
+  qsort(boxes, count, sizeof(TensorBoxes), compare_scores);
+
+    char* suppressed = (char*)calloc(count, sizeof(char)); // 0 = not suppressed, 1 = suppressed
+    size_t output_count = 0;
+
+    for (size_t i = 0; i < count; i++) {
+        if (suppressed[i]) continue; // Skip if the box is suppressed
+
+        output[output_count++] = boxes[i]; // Add the current box to output
+
+        for (size_t j = i + 1; j < count; j++) {
+            if (suppressed[j]) continue; // Skip if already suppressed
+
+            float iou = IoU(boxes[i], boxes[j]);
+            if (iou > iou_threshold) {
+                suppressed[j] = 1; // Suppress the box
+            }
+        }
+    }
+
+    free(suppressed);
+    return output_count; // Return the number of boxes kept
+}
+
+static void process_mask_native(
+    float *protos,       // [mask_dim, mask_h, mask_w]
+    float *masks_in,     // [n, mask_dim]
+    TensorBoxes* boxes,  // [n]
+    int n,               // Number of masks
+    int mask_dim,        // Channels
+    int mask_h,          // Height of protos
+    int mask_w,          // Width of protos
+    int output_h,        // Desired output height
+    int output_w,        // Desired output width
+    float *output_masks   // [output_h, output_w, n], boolean output
+    
+) {
+    // Allocate intermediate storage for masks [n, mask_h, mask_w]
+    float *masks = (float *)malloc(n * mask_h * mask_w * sizeof(float));
+    printf("Allocated masks");
+    if (!masks) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(EXIT_FAILURE);
+    }
+
+    // Flattened version of `protos` reshaped to [mask_dim, mask_h * mask_w]
+    float *protos_flat = (float *)malloc(mask_dim * mask_h * mask_w * sizeof(float));
+    printf("Allocated protos");
+    if (!protos_flat) {
+        fprintf(stderr, "Memory allocation failed\n");
+        free(masks);
+        exit(EXIT_FAILURE);
+    }
+
+    printf("Allocated everything");
+
+    // Flatten protos
+    for (int c = 0; c < mask_dim; ++c) {
+        for (int i = 0; i < mask_h * mask_w; ++i) {
+            protos_flat[c * (mask_h * mask_w) + i] = protos[c * mask_h * mask_w + i];
+        }
+    }
+
+    printf("Flattened protos");
+
+    // Perform masks_in @ protos
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < mask_h * mask_w; ++j) {
+            masks[i * mask_h * mask_w + j] = 0.0f;
+            for (int k = 0; k < mask_dim; ++k) {
+                masks[i * mask_h * mask_w + j] += masks_in[i * mask_dim + k] * protos_flat[k * (mask_h * mask_w) + j];
+            }
+        }
+    }
+
+    printf("Created masks");
+
+    float *max_value = (float *)malloc(n  * sizeof(float));
+    float *min_value = (float *)malloc(n  * sizeof(float));
+
+    // Threshold and create masks
+    for (int i = 0; i < n; ++i) {
+        max_value[i] = 0;
+        min_value[i] = 0;
+        TensorBoxes current_box = boxes[i];
+        if (i == 0){
+          printf("x1 %f, x2 %f, y1 %f, y2 %f\n", current_box.x1, current_box.x2, current_box.y1, current_box.y2);
+        }
+        for (int y = 0; y < output_h; ++y) {
+            for (int x = 0; x < output_w; ++x) {
+                
+                // Calculate the corresponding coordinates in the original mask
+                float src_x = (float)x * mask_w / output_w;
+                float src_y = (float)y * mask_h / output_h;
+
+                // Find the four nearest neighbors
+                int x1 = (int)src_x;
+                int y1 = (int)src_y;
+                int x2 = (x1 + 1 < mask_w) ? x1 + 1 : x1;
+                int y2 = (y1 + 1 < mask_h) ? y1 + 1 : y1;
+
+                // Calculate the distances (weights) for interpolation
+                float dx = src_x - x1;
+                float dy = src_y - y1;
+
+                // Get the pixel values from the original mask
+                float top_left = masks[i * mask_h * mask_w + y1 * mask_w + x1];
+                float top_right = masks[i * mask_h * mask_w + y1 * mask_w + x2];
+                float bottom_left = masks[i * mask_h * mask_w + y2 * mask_w + x1];
+                float bottom_right = masks[i * mask_h * mask_w + y2 * mask_w + x2];
+
+                // Perform bilinear interpolation
+                float interpolated_value = (1 - dx) * (1 - dy) * top_left +
+                                                   dx * (1 - dy) * top_right +
+                                                   (1 - dx) * dy * bottom_left +
+                                                   dx * dy * bottom_right;
+
+                // Set the value in the output mask
+                int idx_out = i * output_h * output_w + y * output_w + x;
+
+                // FIXME
+                // if ((x < current_box.x1) || (x > current_box.x2) || (y < current_box.y1) || (y > current_box.y2))
+                //   interpolated_value = 0.0f;
+
+                output_masks[idx_out] = interpolated_value;
+                if (interpolated_value > max_value[i]) max_value[i] = output_masks[idx_out];
+                if (interpolated_value < min_value[i]) min_value[i] = output_masks[idx_out];
+            }
+        }
+    }
+
+    printf("Loaded masks");
+
+    for (int i = 0; i < n; ++i) {
+        for (int y = 0; y < output_h; ++y) {
+            for (int x = 0; x < output_w; ++x) {
+                int idx_out = i * output_h * output_w + y * output_w + x;
+                if (output_masks[idx_out] > 0.0f)
+                  output_masks[idx_out] = 1.0; // output_masks[idx_out] / max_value[i];
+                else
+                  output_masks[idx_out] = 0.0f;
+            }
+        }
+    }
+
+    printf("Refined masks");
+    // Free allocated memory
+    free(masks);
+    free(protos_flat);
+    free(min_value);
+    free(max_value);
+}
+
+static void prep_out_data(float* input_data[6], int64_t definition_size, int64_t numb_boxes, float** output, size_t output_height, size_t output_width, size_t* n_masks){
+  
+  float* mask = input_data[0];
+  
+  TensorBoxes* boxes = malloc(numb_boxes * sizeof(TensorBoxes));
+
+  size_t coordinates_count = 4;
+  size_t class_count = 1;
+  size_t mask_dim = definition_size - coordinates_count - class_count;
+  size_t b_stride = numb_boxes;
+  size_t counter = 0;
+  for (int64_t i = 0; i < numb_boxes; i++) {
+
+    float score = mask[i + 4 * b_stride];
+    if (score < conf) {
+      continue;
+    }
+    float w = mask[i + (2 * b_stride)];
+    float h = mask[i + (3 * b_stride)];
+
+    if (w < 0 || h < 0) {
+      continue;
+    }
+    
+
+    boxes[counter].x1 = mask[i + (0 * b_stride) ] - (w / 2);
+    boxes[counter].y1 = mask[i + (1 * b_stride) ] - (h / 2);
+    boxes[counter].x2 = mask[i + (0 * b_stride) ] + (w / 2);
+    boxes[counter].y2 = mask[i + (1 * b_stride) ] + (h / 2);
+    
+    boxes[counter].score = score;
+
+    boxes[counter].mask = (float*)malloc(mask_dim * sizeof(float));
+    for (size_t j = 0; j < mask_dim; j++) {
+      boxes[counter].mask[j] = mask[i  + (5 + j) * b_stride ];
+    }
+    counter++;
+  }
+
+  printf("counter: %ld\n", counter);
+  // assert(counter > 0);
+  if (counter == 0){
+    return;
+  }
+  boxes = realloc(boxes, counter * sizeof(TensorBoxes));
+
+  sort_tensor_boxes_by_score(boxes, counter);
+  
+  TensorBoxes* output_boxes = (TensorBoxes*)malloc(counter * sizeof(TensorBoxes));
+  size_t num_boxes = NMS(boxes, counter, output_boxes);
+
+  printf("num_boxes: %ld\n", num_boxes);
+
+  output_boxes = realloc(output_boxes, num_boxes * sizeof(TensorBoxes));
+
+  int mask_h = 256, mask_w = 256;
+
+  // Allocate and initialize inputs
+  float *protos = input_data[5];
+  float *masks_in = (float *)malloc(num_boxes * mask_dim * sizeof(float));
+  float *output_masks = (float *)malloc(output_height * output_width * num_boxes * sizeof(float));
+
+  for (size_t i = 0; i < num_boxes; ++i){
+    for (size_t j = 0; j < mask_dim; ++j){
+      masks_in[i * mask_dim + j] = output_boxes[i].mask[j];
+    }
+  }
+
+  // Call the function
+  printf("Preparing masks\n");
+  
+  process_mask_native(protos, masks_in, output_boxes, num_boxes, mask_dim, mask_h, mask_w, output_height, output_width, output_masks);
+  printf("Masks generated");
+
+  *output = output_masks;
+  *n_masks = num_boxes;
+  printf("Mask loaded");
+
+  for (size_t i = 0; i < counter; i++) {
+    free(boxes[i].mask);
+  }
+  free(boxes);
+  
+}
+
+static void resize_image(const float** input, const int input_height, const int input_width, float** out, size_t output_height, size_t output_width, size_t* output_count)
+{
+  float* output_data = (float*)malloc(3 * output_width * output_width * sizeof(float));
+  size_t out_stride = output_height * output_width;
+  size_t in_stride = input_height * input_width;
+  float height_ratio = (float)input_height / (float)output_height;
+  float width_ratio = (float)input_width / (float)output_width;
+
+  for (size_t c = 0; c < 3; c++){
+    for (size_t i = 0; i < output_height; i++){
+      for (size_t j = 0; j < output_width; j++){
+        size_t input_j = (size_t)((float)j * width_ratio);
+        size_t input_i = (size_t)((float)i * height_ratio);
+        float input_d = (*input)[c * in_stride + input_i * input_width + input_j];
+        output_data[c * out_stride + i * output_width + j] = input_d;
+      }
+    }
+  }
+  *out = output_data;
+  *output_count = out_stride * 3;
+};
+
+void hwc_to_chw(const uint8_t* input, const int h, const int w, float** output, size_t* output_count) {
+  size_t stride = h * w;
+  *output_count = stride * 3;
+  float* output_data = (float*)malloc(3* stride * sizeof(float));
+  assert(output_data != NULL);
+  for (size_t i = 0; i != stride; ++i) {
+    for (size_t c = 0; c != 3; ++c) {
+      
+      output_data[c * stride + i] = ((float)input[i * 3 + c])/255.0; // I'm also converting from 0-255 to 0-1 and RGBA to RGB
+    }
+  }
+  *output = output_data;
+}
+
+int run_inference(OrtSession* session, const float* input_image, const int h, const int w, float** out, size_t * n_masks) {
+  const int input_height = h;
+  const int input_width = w;
+  printf("Roi h:%d, w:%d\n", h, w);
+  float* model_input;
+  size_t model_input_ele_count = 1024 * 1024;
+
+  resize_image(&input_image, input_height, input_width, &model_input, 1024, 1024, &model_input_ele_count);
+
+  OrtMemoryInfo* memory_info;
+  ORT_ABORT_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
+  const int64_t input_shape[] = {1, 3, 1024, 1024};
+  const size_t input_shape_len = sizeof(input_shape) / sizeof(input_shape[0]);
+  const size_t model_input_len = model_input_ele_count * sizeof(float);
+
+  OrtValue* input_tensor = NULL;
+  ORT_ABORT_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, model_input, model_input_len, input_shape,
+                                                           input_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
+                                                           &input_tensor));
+  assert(input_tensor != NULL);
+  int is_tensor;
+  ORT_ABORT_ON_ERROR(g_ort->IsTensor(input_tensor, &is_tensor));
+  assert(is_tensor);
+ 
+  OrtAllocator* allocator;
+  ORT_ABORT_ON_ERROR(g_ort->GetAllocatorWithDefaultOptions(&allocator))
+
+  const char* input_names[] = {"images"};
+  const char* output_names[] = {"output0", "output1", "onnx::Shape_1304", "onnx::Shape_1323", "onnx::Concat_1263", "onnx::Shape_1215"};
+  OrtValue* output_tensor[6];
+
+  for (int i = 0; i < 6; i++) {
+    output_tensor[i] = NULL;
+  }
+
+  
+  printf("Running inference\n");
+  ORT_ABORT_ON_ERROR(g_ort->Run(session, NULL, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 6,
+                                output_tensor));
+  printf("Inference done\n");
+
+  for (int i = 0; i < 6; i++) {
+    assert(output_tensor[i] != NULL);
+    ORT_ABORT_ON_ERROR(g_ort->IsTensor(output_tensor[i], &is_tensor));
+    assert(is_tensor);
+  }
+
+  printf("Tensors are not null and is tensor\n");
+
+  OrtTensorTypeAndShapeInfo* tensor_info;
+  ORT_ABORT_ON_ERROR(g_ort->GetTensorTypeAndShape(output_tensor[0], &tensor_info));
+
+  printf("Gather tensor info");
+  // Get the shape dimensions
+  size_t num_dims;
+  ORT_ABORT_ON_ERROR(g_ort->GetDimensionsCount(tensor_info, &num_dims));
+  printf("Number of dimensions: %zu\n", num_dims);
+
+  int64_t* shape = (int64_t*)malloc(num_dims * sizeof(int64_t));
+  ORT_ABORT_ON_ERROR(g_ort->GetDimensions(tensor_info, shape, num_dims));
+
+  // Get tensor element type
+  ONNXTensorElementDataType data_type;
+  ORT_ABORT_ON_ERROR(g_ort->GetTensorElementType(tensor_info, &data_type));
+
+  // Get the output tensor information
+  int ret = 0;
+  float* output_tensor_data = NULL;
+  ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor[0], (void**)&output_tensor_data));
+  
+  printf("Base Data gathered\n");
+
+  float* output_tensor_data_t[6];
+  for (int i = 0; i < 6; i++) {
+    output_tensor_data_t[i] = NULL;
+    ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor[i], (void**)&output_tensor_data_t[i]));
+  }
+
+  printf("Data gathered\n");
+
+  prep_out_data(output_tensor_data_t, shape[1], shape[2], out, input_height, input_width, n_masks);
+
+  for (int i = 0; i < 6; i++) {
+    g_ort->ReleaseValue(output_tensor[i]);
+  }
+
+  g_ort->ReleaseMemoryInfo(memory_info);
+  free(shape);
+  g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+  g_ort->ReleaseValue(input_tensor);
+  free(model_input);
+  
+  return ret;
+}
+
 static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
                                           dt_develop_t *dev,
                                           float *input,
@@ -1163,6 +1609,107 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   if(dt_atomic_get_int(&pipe->shutdown))
     return TRUE;
 
+  int w = piece->pipe->backbuf_width;
+  int h = piece->pipe->backbuf_height;
+
+  size_t stride = w * h;
+  if (stride > 0 && (pipe->has_proxy == FALSE)){
+    pipe->has_proxy = TRUE;
+    int colors = piece->colors;
+    int bpc = piece->bpc;
+    
+    //const dt_develop_blend_params_t *const d = piece->blendop_data;
+    //dt_develop_blend_colorspace_t blend_csp = d->blend_cst;
+    //(dt_develop_blend_colorspace_t)blend_csp;
+    dt_image_t* image  = dt_image_cache_get(darktable.image_cache, piece->pipe->image.id, 'r');
+    /*
+    / backbuffer (output)
+    uint8_t *backbuf;
+    size_t backbuf_size;
+    int backbuf_width, backbuf_height;
+    */
+    printf("image w:%d, h:%d\n", image->width, image->height);
+    printf("output w:%d, h:%d\n", w, h);
+
+    uint8_t* local_copy = (uint8_t*)malloc(4 * sizeof(uint8_t) * stride);
+    memcpy(local_copy, piece->pipe->backbuf, 4 * stride * sizeof(uint8_t));
+    uint8_t* new_image = (uint8_t*)malloc(3 * sizeof(uint8_t) * stride);
+    if (new_image == NULL){
+      printf("malloc new image incorrect\n");
+      return 1;
+    }
+    for (int i = 0; i < stride; i++ ){
+      new_image[i*3 + 0] = local_copy[i*4 + 0];
+      new_image[i*3 + 1] = local_copy[i*4 + 1];
+      new_image[i*3 + 2] = local_copy[i*4 + 2];
+    }
+
+    if (write_image_file(new_image, h, w, "/home/miko/Desktop/test1.png") != 0) {
+        printf("Error writing image\n");
+    }
+    
+    float *converted_image = NULL;
+    size_t output_count;
+
+    hwc_to_chw(new_image, h, w, &converted_image, &output_count);
+    
+    g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+    if (!g_ort) {
+      fprintf(stderr, "Failed to init ONNX Runtime engine.\n");
+      return -1;
+    }
+
+    char model_path[] = "/home/miko/Documents/OpenSourceProjects/darktable_plugins/fast_sam_example/build/fast_sam_1024.onnx";
+
+
+    OrtEnv* env;
+    ORT_ABORT_ON_ERROR(g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));
+    assert(env != NULL);
+
+    OrtSessionOptions* session_options;
+    ORT_ABORT_ON_ERROR(g_ort->CreateSessionOptions(&session_options));
+
+    OrtSession* session;
+    ORT_ABORT_ON_ERROR(g_ort->CreateSession(env, model_path, session_options, &session));
+    
+    float *out = NULL;
+
+    size_t n_masks = 0;
+
+    run_inference(session, converted_image, h, w, &out, &n_masks);
+    
+    //g_ort->ReleaseSessionOptions(session_options);
+    //g_ort->ReleaseSession(session);
+    //g_ort->ReleaseEnv(env);
+
+    //memset(buffer, 0, sizeof(float) * w * h);
+    for (int i = 0; i < stride; i++)
+    {
+      new_image[i * 3 + 0] = (uint8_t)(out[i] * 255.0);
+      new_image[i * 3 + 1] = (uint8_t)(out[i] * 255.0);
+      new_image[i * 3 + 2] = (uint8_t)(out[i] * 255.0);
+    }
+
+    if (write_image_file(new_image, h, w, "/home/miko/Desktop/test2.png") != 0)
+    {
+      printf("Error writing image\n");
+    }
+
+    pipe->proxy_data = (uint8_t*)malloc(sizeof(uint8_t) * stride * n_masks);
+    for (int i = 0; i < stride; i++){
+      pipe->proxy_data[i] = (uint8_t)(out[i] * 255.0);
+    }
+
+    pipe->proxy_width = w;
+    pipe->proxy_height = h;
+
+    
+    free(out);
+    free(new_image);
+    free(converted_image);
+
+    printf("Colors %d, Bits per channel %d\n", colors, bpc);
+  }
   _collect_histogram_on_CPU(pipe, dev, input, roi_in, module, piece, pixelpipe_flow);
 
   if(dt_atomic_get_int(&pipe->shutdown))
diff --git a/src/develop/pixelpipe_hb.h b/src/develop/pixelpipe_hb.h
index d31f65910645..23a0ad939de9 100644
--- a/src/develop/pixelpipe_hb.h
+++ b/src/develop/pixelpipe_hb.h
@@ -72,6 +72,8 @@ typedef struct dt_dev_pixelpipe_iop_t
   dt_iop_buffer_dsc_t dsc_out;
 
   GHashTable *raster_masks;
+
+  gboolean has_proxy;
 } dt_dev_pixelpipe_iop_t;
 
 typedef enum dt_dev_pixelpipe_change_t
@@ -195,6 +197,12 @@ typedef struct dt_dev_pixelpipe_t
   GList *forms;
   // the masks generated in the pipe for later reusal are inside dt_dev_pixelpipe_iop_t
   gboolean store_all_raster_masks;
+
+  gboolean has_proxy;
+  size_t n_masks;
+  uint8_t* proxy_data;
+  size_t proxy_width, proxy_height;
+
 } dt_dev_pixelpipe_t;
 
 struct dt_develop_t;

From ff417f3915ed24e8dc538f1ee99567df422b660c Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Fri, 31 Jan 2025 17:23:33 +0100
Subject: [PATCH 07/14] base commit

---
 README.md                | 4 ++++
 src/external/onnxruntime | 1 +
 2 files changed, 5 insertions(+)
 create mode 160000 src/external/onnxruntime

diff --git a/README.md b/README.md
index 30e092d78b5d..4fc44210bde7 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,7 @@
+# Unstable FORK
+THIS FORK IS JUST FOR DEVELOPMENT AND TEST PURPOSES
+DON'T USE FOR SENSITIVE WORK
+
 [![icon](/data/pixmaps/idbutton.png?raw=true)](https://www.darktable.org/) darktable [![GitHub Workflow Status (branch)](https://img.shields.io/github/actions/workflow/status/darktable-org/darktable/ci.yml?branch=master)](https://github.com/darktable-org/darktable/actions/workflows/ci.yml?query=branch%3Amaster+is%3Acompleted+event%3Apush) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/470/badge)](https://bestpractices.coreinfrastructure.org/projects/470)
 =========
 
diff --git a/src/external/onnxruntime b/src/external/onnxruntime
new file mode 160000
index 000000000000..4e4fd2bdcf0f
--- /dev/null
+++ b/src/external/onnxruntime
@@ -0,0 +1 @@
+Subproject commit 4e4fd2bdcf0f12e1c897c77e6384cb1e97cd80c3

From 44427fb56f89a4235f1ec8469a6486d31fdeab32 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Fri, 31 Jan 2025 17:32:07 +0100
Subject: [PATCH 08/14] reconstruction of commit "8532d72"

---
 src/develop/masks/point.c  | 41 +++++++++++++++++++------
 src/develop/pixelpipe_hb.c | 63 +++++++++++++++++++-------------------
 2 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/src/develop/masks/point.c b/src/develop/masks/point.c
index 1cb3abe9a8bd..8df520ef1ea5 100644
--- a/src/develop/masks/point.c
+++ b/src/develop/masks/point.c
@@ -1162,25 +1162,47 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
   printf("I'm on get_mask_roi\n");
   dt_dev_pixelpipe_t* p = piece->pipe;
   dt_masks_point_circle_t *circle = form->points->data;
-  const int wi = piece->pipe->iwidth, hi = piece->pipe->iheight;
+  int wi = piece->pipe->iwidth, hi = piece->pipe->iheight;
   
-  //wi /= 
+  wi /= 4.65;
+  hi /= 4.65;
 
-  printf("Circle position:\nx: %f, y: %f", circle->center[0] * wi, circle->center[1] * hi);
+  printf("Circle position:\nx: %f, y: %f\n", circle->center[0] * wi, circle->center[1] * hi);
 
   if (p->has_proxy){
-    //size_t n_masks = p->n_masks;
+    size_t n_masks = p->n_masks;
     size_t stride = p->proxy_width * p->proxy_height;
-
+    printf("N masks: %ld\n", n_masks);
     for (int i = 0; i < roi->width * roi->height; i++){
       buffer[i] = 0.0f;
     }
 
+    int mask_x = (int)(circle->center[0] * wi);
+    if (mask_x >= roi->width){
+      mask_x = roi->width - 1;
+    }
+
+    int mask_y = (int)(circle->center[1] * hi);
+    if (mask_y >= roi->height){
+      mask_y = roi->height - 1;
+    }
+
+    // for (int  x = 0; x < 10; x ++){
+    //   int x_pos = mask_x - 5 + x;
+    //   if (x_pos < 0) continue;
+    //   if (x_pos >= roi->width) continue;
+    //     
+    //   for (int  y = 0; y < 10; y ++){
+    //     int y_pos = mask_y - 5 + y;
+    //     if (y_pos < 0) continue;
+    //     if (y_pos >= roi->height) continue;
+    //     buffer[x_pos + y_pos * roi->width] = 1.0;
+    //   } 
+    // }
+    
     for (int mask_i = 0; mask_i < 1; mask_i++){
-      //size_t mask_x = (size_t)(circle->center[0] * wi);
-      //size_t mask_y = (size_t)(circle->center[1] * hi);
-      //if (p->proxy_data[mask_i * stride + mask_y* p->proxy_width + mask_x] == 0)
-      //  continue;
+      // if (p->proxy_data[mask_i * stride + mask_y * p->proxy_width + mask_x] == 0) continue;
+
       for (int y = 0; y < p->proxy_height; y++)
       {
         if (y >= roi->height)
@@ -1192,6 +1214,7 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
           buffer[x + y * roi->width] += p->proxy_data[mask_i * stride + y * p->proxy_width + x];
         }
       }
+      
     }
     for (int i = 0; i < roi->width * roi->height; i++){
       if (buffer[i] > 0)
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index 9a49286e97e5..c9756ee6692d 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -1608,7 +1608,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
 
   if(dt_atomic_get_int(&pipe->shutdown))
     return TRUE;
-
+  
   int w = piece->pipe->backbuf_width;
   int h = piece->pipe->backbuf_height;
 
@@ -1621,14 +1621,9 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
     //const dt_develop_blend_params_t *const d = piece->blendop_data;
     //dt_develop_blend_colorspace_t blend_csp = d->blend_cst;
     //(dt_develop_blend_colorspace_t)blend_csp;
-    dt_image_t* image  = dt_image_cache_get(darktable.image_cache, piece->pipe->image.id, 'r');
-    /*
-    / backbuffer (output)
-    uint8_t *backbuf;
-    size_t backbuf_size;
-    int backbuf_width, backbuf_height;
-    */
-    printf("image w:%d, h:%d\n", image->width, image->height);
+    // dt_image_t* image  = dt_image_cache_get(darktable.image_cache, piece->pipe->image.id, 'r');
+    
+    // printf("image w:%d, h:%d\n", image->width, image->height);
     printf("output w:%d, h:%d\n", w, h);
 
     uint8_t* local_copy = (uint8_t*)malloc(4 * sizeof(uint8_t) * stride);
@@ -1678,38 +1673,44 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
 
     run_inference(session, converted_image, h, w, &out, &n_masks);
     
-    //g_ort->ReleaseSessionOptions(session_options);
-    //g_ort->ReleaseSession(session);
-    //g_ort->ReleaseEnv(env);
+    g_ort->ReleaseSessionOptions(session_options);
+    g_ort->ReleaseSession(session);
+    g_ort->ReleaseEnv(env);
 
     //memset(buffer, 0, sizeof(float) * w * h);
-    for (int i = 0; i < stride; i++)
-    {
-      new_image[i * 3 + 0] = (uint8_t)(out[i] * 255.0);
-      new_image[i * 3 + 1] = (uint8_t)(out[i] * 255.0);
-      new_image[i * 3 + 2] = (uint8_t)(out[i] * 255.0);
-    }
 
-    if (write_image_file(new_image, h, w, "/home/miko/Desktop/test2.png") != 0)
+    if (out)
     {
-      printf("Error writing image\n");
-    }
+      for (int i = 0; i < stride; i++)
+      {
+        new_image[i * 3 + 0] = (uint8_t)(out[i] * 255.0);
+        new_image[i * 3 + 1] = (uint8_t)(out[i] * 255.0);
+        new_image[i * 3 + 2] = (uint8_t)(out[i] * 255.0);
+      }
 
-    pipe->proxy_data = (uint8_t*)malloc(sizeof(uint8_t) * stride * n_masks);
-    for (int i = 0; i < stride; i++){
-      pipe->proxy_data[i] = (uint8_t)(out[i] * 255.0);
-    }
+      if (write_image_file(new_image, h, w, "/home/miko/Desktop/test2.png") != 0)
+      {
+        printf("Error writing image\n");
+      }
 
-    pipe->proxy_width = w;
-    pipe->proxy_height = h;
+      pipe->proxy_data = (uint8_t*)malloc(sizeof(uint8_t) * stride * n_masks);
+      for (int i = 0; i < stride; i++){
+        pipe->proxy_data[i] = (uint8_t)(out[i] * 255.0);
+      }
 
-    
-    free(out);
-    free(new_image);
-    free(converted_image);
+      pipe->proxy_width = w;
+      pipe->proxy_height = h;
+      pipe->n_masks = n_masks;
+
+      
+      free(out);
+      free(new_image);
+      free(converted_image);
+    }
 
     printf("Colors %d, Bits per channel %d\n", colors, bpc);
   }
+  
   _collect_histogram_on_CPU(pipe, dev, input, roi_in, module, piece, pixelpipe_flow);
 
   if(dt_atomic_get_int(&pipe->shutdown))

From 73255a453794c0abe75f372392bbb93d0cc5a8c7 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Fri, 31 Jan 2025 17:34:14 +0100
Subject: [PATCH 09/14] reconstruction of commit ee3db7f "corrected mask
 loading and scaling of the marker to get the correct position of the mask"

---
 src/develop/masks/point.c  |  69 +++++---
 src/develop/pixelpipe_hb.c | 350 ++++++++++++++++++++++++++-----------
 2 files changed, 288 insertions(+), 131 deletions(-)

diff --git a/src/develop/masks/point.c b/src/develop/masks/point.c
index 8df520ef1ea5..549d0fe4d8ea 100644
--- a/src/develop/masks/point.c
+++ b/src/develop/masks/point.c
@@ -1163,9 +1163,6 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
   dt_dev_pixelpipe_t* p = piece->pipe;
   dt_masks_point_circle_t *circle = form->points->data;
   int wi = piece->pipe->iwidth, hi = piece->pipe->iheight;
-  
-  wi /= 4.65;
-  hi /= 4.65;
 
   printf("Circle position:\nx: %f, y: %f\n", circle->center[0] * wi, circle->center[1] * hi);
 
@@ -1177,31 +1174,52 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
       buffer[i] = 0.0f;
     }
 
-    int mask_x = (int)(circle->center[0] * wi);
-    if (mask_x >= roi->width){
-      mask_x = roi->width - 1;
-    }
+    float *const restrict circ = dt_alloc_align_float(1 * 2);
+    circ[0] = circle->center[0] * wi;
+    circ[1] = circle->center[1] * hi;
 
-    int mask_y = (int)(circle->center[1] * hi);
-    if (mask_y >= roi->height){
-      mask_y = roi->height - 1;
+    dt_dev_distort_transform_plus(module->dev, piece->pipe, module->iop_order,
+                                    DT_DEV_TRANSFORM_DIR_BACK_INCL, circ,
+                                    1);
+    int mask_x = (int)(circ[0]);
+
+    int mask_y = (int)(circ[1]);
+
+    const int px = roi->x;
+    const int py = roi->y;
+    const float iscale = 1.0f / roi->scale;
+
+    mask_x = mask_x / iscale - px;
+    mask_y = mask_y / iscale - py;
+
+    printf("Mask position:\nx: %d, y: %d\n", mask_x, mask_y);
+
+    for (int  x = 0; x < 10; x ++){
+      int x_pos = mask_x - 5 + x;
+      if (x_pos < 0) continue;
+      if (x_pos >= roi->width) continue;
+        
+      for (int  y = 0; y < 10; y ++){
+        int y_pos = mask_y - 5 + y;
+        if (y_pos < 0) continue;
+        if (y_pos >= roi->height) continue;
+        buffer[x_pos + y_pos * roi->width] = 1.0;
+      } 
     }
-
-    // for (int  x = 0; x < 10; x ++){
-    //   int x_pos = mask_x - 5 + x;
-    //   if (x_pos < 0) continue;
-    //   if (x_pos >= roi->width) continue;
-    //     
-    //   for (int  y = 0; y < 10; y ++){
-    //     int y_pos = mask_y - 5 + y;
-    //     if (y_pos < 0) continue;
-    //     if (y_pos >= roi->height) continue;
-    //     buffer[x_pos + y_pos * roi->width] = 1.0;
-    //   } 
-    // }
     
-    for (int mask_i = 0; mask_i < 1; mask_i++){
-      // if (p->proxy_data[mask_i * stride + mask_y * p->proxy_width + mask_x] == 0) continue;
+    for (int mask_i = 0; mask_i < n_masks; mask_i++){
+      // printf("Checking on mask: %d", mask_i);
+      if (mask_y >= roi->height) continue;
+      if (mask_y < 0) continue;
+      if (mask_x >= roi->width) continue;
+      if (mask_x < 0) continue;
+
+      if (p->proxy_data[mask_i * stride + mask_y * p->proxy_width + mask_x] == 0)
+      {
+        // printf("... discarded\n");
+        continue;
+      }
+      // printf("... loaded\n");
 
       for (int y = 0; y < p->proxy_height; y++)
       {
@@ -1216,6 +1234,7 @@ static int _point_get_mask_roi(const dt_iop_module_t *const restrict module,
       }
       
     }
+
     for (int i = 0; i < roi->width * roi->height; i++){
       if (buffer[i] > 0)
         buffer[i] = 1.0f;
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index c9756ee6692d..ccbb0a467e02 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -1,6 +1,6 @@
 /*
     This file is part of darktable,
-    Copyright (C) 2009-2024 darktable developers.
+    Copyright (C) 2009-2025 darktable developers.
 
     darktable is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -255,9 +255,9 @@ gboolean dt_dev_pixelpipe_init_cached(dt_dev_pixelpipe_t *pipe,
   pipe->bypass_blendif = FALSE;
   pipe->input_timestamp = 0;
   pipe->levels = IMAGEIO_RGB | IMAGEIO_INT8;
-  dt_pthread_mutex_init(&(pipe->mutex), NULL);
-  dt_pthread_mutex_init(&(pipe->backbuf_mutex), NULL);
-  dt_pthread_mutex_init(&(pipe->busy_mutex), NULL);
+  dt_pthread_mutex_init(&pipe->mutex, NULL);
+  dt_pthread_mutex_init(&pipe->backbuf_mutex, NULL);
+  dt_pthread_mutex_init(&pipe->busy_mutex, NULL);
   pipe->icc_type = DT_COLORSPACE_NONE;
   pipe->icc_filename = NULL;
   pipe->icc_intent = DT_INTENT_LAST;
@@ -269,7 +269,8 @@ gboolean dt_dev_pixelpipe_init_cached(dt_dev_pixelpipe_t *pipe,
   pipe->input_profile_info = NULL;
   pipe->output_profile_info = NULL;
   pipe->runs = 0;
-
+  pipe->bcache_data = NULL;
+  pipe->bcache_hash = 0;
   return dt_dev_pixelpipe_cache_init(pipe, entries, size, memlimit);
 }
 
@@ -330,6 +331,7 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe)
   dt_dev_pixelpipe_cleanup_nodes(pipe);
   // so now it's safe to clean up cache:
   dt_dev_pixelpipe_cache_cleanup(pipe);
+  dt_free_align(pipe->bcache_data);
 
   pipe->icc_type = DT_COLORSPACE_NONE;
   g_free(pipe->icc_filename);
@@ -341,7 +343,7 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe)
   pipe->backbuf_height = 0;
 
   dt_pthread_mutex_unlock(&pipe->backbuf_mutex);
-  dt_pthread_mutex_destroy(&(pipe->backbuf_mutex));
+  dt_pthread_mutex_destroy(&pipe->backbuf_mutex);
 
   pipe->output_imgid = NO_IMGID;
 
@@ -350,8 +352,8 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe)
     g_list_free_full(pipe->forms, (void (*)(void *))dt_masks_free_form);
     pipe->forms = NULL;
   }
-  dt_pthread_mutex_destroy(&(pipe->busy_mutex));
-  dt_pthread_mutex_destroy(&(pipe->mutex));
+  dt_pthread_mutex_destroy(&pipe->busy_mutex);
+  dt_pthread_mutex_destroy(&pipe->mutex);
 }
 
 void dt_dev_pixelpipe_cleanup_nodes(dt_dev_pixelpipe_t *pipe)
@@ -528,7 +530,7 @@ static void _dev_pixelpipe_synch(dt_dev_pixelpipe_t *pipe,
       if(active && hist->iop_order == INT_MAX)
       {
         piece->enabled = FALSE;
-        dt_print_pipe(DT_DEBUG_PARAMS | DT_DEBUG_PIPE, "dt_dev_pixelpipe_synch",
+        dt_print_pipe(DT_DEBUG_PARAMS | DT_DEBUG_PIPE | DT_DEBUG_IOPORDER, "dt_dev_pixelpipe_synch",
           pipe, piece->module, DT_DEVICE_NONE, NULL, NULL,
           "enabled module with iop_order of INT_MAX is disabled");
       }
@@ -640,34 +642,42 @@ void dt_dev_pixelpipe_synch_top(dt_dev_pixelpipe_t *pipe, dt_develop_t *dev)
 void dt_dev_pixelpipe_change(dt_dev_pixelpipe_t *pipe, dt_develop_t *dev)
 {
   dt_pthread_mutex_lock(&dev->history_mutex);
-
-  dt_print_pipe(DT_DEBUG_PIPE, "pipe state changing",
-      pipe, NULL, DT_DEVICE_NONE, NULL, NULL, "%s%s%s%s",
+  dt_print_pipe(DT_DEBUG_PIPE, "dev_pixelpipe_change",
+      pipe, NULL, DT_DEVICE_NONE, NULL, NULL, "%s%s%s%s%s",
       pipe->changed & DT_DEV_PIPE_ZOOMED      ? "zoomed, " : "",
       pipe->changed & DT_DEV_PIPE_TOP_CHANGED ? "top changed, " : "",
       pipe->changed & DT_DEV_PIPE_SYNCH       ? "synch all, " : "",
-      pipe->changed & DT_DEV_PIPE_REMOVE      ? "pipe remove" : "");
-  // case DT_DEV_PIPE_UNCHANGED: case DT_DEV_PIPE_ZOOMED:
-  if(pipe->changed & DT_DEV_PIPE_TOP_CHANGED)
-  {
-    // only top history item changed.
-    dt_dev_pixelpipe_synch_top(pipe, dev);
-  }
-  if(pipe->changed & DT_DEV_PIPE_SYNCH)
-  {
-    // pipeline topology remains intact, only change all params.
-    dt_dev_pixelpipe_synch_all(pipe, dev);
-  }
-  if(pipe->changed & DT_DEV_PIPE_REMOVE)
+      pipe->changed & DT_DEV_PIPE_REMOVE      ? "pipe remove" : "",
+      pipe->changed == DT_DEV_PIPE_UNCHANGED  ? "dimension" : "");
+
+  if(pipe->changed & (DT_DEV_PIPE_TOP_CHANGED | DT_DEV_PIPE_SYNCH | DT_DEV_PIPE_REMOVE))
   {
-    // modules have been added in between or removed. need to rebuild
-    // the whole pipeline.
-    dt_dev_pixelpipe_cleanup_nodes(pipe);
-    dt_dev_pixelpipe_create_nodes(pipe, dev);
-    dt_dev_pixelpipe_synch_all(pipe, dev);
+    const gboolean sync_all = pipe->changed & (DT_DEV_PIPE_SYNCH | DT_DEV_PIPE_REMOVE);
+    const gboolean sync_remove = pipe->changed & DT_DEV_PIPE_REMOVE;
+
+    if((pipe->changed & DT_DEV_PIPE_TOP_CHANGED) && !sync_all)
+    {
+      // only top history item changed. Not required if we synch_all
+      dt_dev_pixelpipe_synch_top(pipe, dev);
+    }
+
+    if((pipe->changed & DT_DEV_PIPE_SYNCH) && !sync_remove)
+    {
+      // pipeline topology remains intact but change all params. Not required if we rebuild all nodes
+      dt_dev_pixelpipe_synch_all(pipe, dev);
+    }
+
+    if(pipe->changed & DT_DEV_PIPE_REMOVE)
+    {
+      // modules have been added in between or removed. need to rebuild the whole pipeline.
+      dt_dev_pixelpipe_cleanup_nodes(pipe);
+      dt_dev_pixelpipe_create_nodes(pipe, dev);
+      dt_dev_pixelpipe_synch_all(pipe, dev);
+    }
   }
   pipe->changed = DT_DEV_PIPE_UNCHANGED;
   dt_pthread_mutex_unlock(&dev->history_mutex);
+
   dt_dev_pixelpipe_get_dimensions(pipe, dev,
                                   pipe->iwidth, pipe->iheight,
                                   &pipe->processed_width,
@@ -1078,6 +1088,13 @@ static gboolean _request_color_pick(dt_dev_pixelpipe_t *pipe,
     && module->request_color_pick != DT_REQUEST_COLORPICK_OFF;
 }
 
+// Is it worth to use a dt_iop_flags_t for this ?
+static inline gboolean _piece_may_tile(const dt_dev_pixelpipe_iop_t *piece)
+{
+  return piece->process_tiling_ready
+        && !(piece->pipe->want_detail_mask && piece->module->flags() & IOP_FLAGS_WRITE_DETAILS);
+}
+
 static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe,
                                       dt_develop_t *dev,
                                       float *input,
@@ -1090,7 +1107,7 @@ static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe,
   if((dev->gui_attached || !(piece->request_histogram & DT_REQUEST_ONLY_IN_GUI))
      && (piece->request_histogram & DT_REQUEST_ON))
   {
-    _histogram_collect(piece, input, roi_in, &(piece->histogram), piece->histogram_max);
+    _histogram_collect(piece, input, roi_in, &piece->histogram, piece->histogram_max);
     *pixelpipe_flow |= (PIXELPIPE_FLOW_HISTOGRAM_ON_CPU);
     *pixelpipe_flow &= ~(PIXELPIPE_FLOW_HISTOGRAM_NONE | PIXELPIPE_FLOW_HISTOGRAM_ON_GPU);
 
@@ -1112,8 +1129,8 @@ static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe,
 #define tcscmp strcmp
 
 const OrtApi* g_ort = NULL;
-float conf = 0.3;
-float iou_threshold = 0.7;
+float conf = 0.25;
+float iou_threshold = 0.65;
 #define ORT_ABORT_ON_ERROR(expr)                             \
   do {                                                       \
     OrtStatus* onnx_status = (expr);                         \
@@ -1300,8 +1317,8 @@ static void process_mask_native(
                 int idx_out = i * output_h * output_w + y * output_w + x;
 
                 // FIXME
-                // if ((x < current_box.x1) || (x > current_box.x2) || (y < current_box.y1) || (y > current_box.y2))
-                //   interpolated_value = 0.0f;
+                if (((src_x*4) < current_box.x1) || ((src_x*4) > current_box.x2) || ((src_y*4) < current_box.y1) || ((src_y*4) > current_box.y2))
+                  interpolated_value = 0.0f;
 
                 output_masks[idx_out] = interpolated_value;
                 if (interpolated_value > max_value[i]) max_value[i] = output_masks[idx_out];
@@ -1547,6 +1564,50 @@ int run_inference(OrtSession* session, const float* input_image, const int h, co
   
   return ret;
 }
+/* About the module-in-focus blending cache
+  Processing a piece in the pixelpipe is basically
+    a) call the module->process
+    b) possibly blend output of (a) to pipe output
+  As (a) can be very demanding and it's output does not change if we only change any
+  blending parameter we check for this situation if
+    - there is a module in focus and
+    - we process any pipe writing to screen.
+
+  The validating hash is almost dt_dev_pixelpipe_cache_hash() except it
+  does *not* include the blending paramters for the module in focus.
+*/
+static inline dt_hash_t _piece_process_hash(const dt_dev_pixelpipe_iop_t *piece,
+                                            const dt_iop_roi_t *roi,
+                                            const dt_iop_module_t *module)
+{
+  dt_hash_t phash = dt_dev_pixelpipe_cache_hash(piece->pipe->image.id, roi, piece->pipe, module->iop_order -1);
+  phash = dt_hash(phash, roi, sizeof(dt_iop_roi_t));
+  phash = dt_hash(phash, &module->so->op, strlen(module->so->op));
+  phash = dt_hash(phash, &module->instance, sizeof(module->instance));
+  phash = dt_hash(phash, module->params, module->params_size);
+  return phash;
+}
+
+static inline gboolean _piece_fast_blend(const dt_dev_pixelpipe_iop_t *piece,
+                                         const dt_iop_module_t *module)
+{
+  return (piece->pipe->type & DT_DEV_PIXELPIPE_SCREEN)
+      && module->dev
+      && module->dev->gui_attached
+      && module == module->dev->gui_module
+      && dt_dev_modulegroups_test_activated(darktable.develop)
+      && _transform_for_blend(module, piece);
+}
+
+static inline float *_get_fast_blendcache(const size_t nfloats,
+                                          const dt_hash_t phash,
+                                          dt_dev_pixelpipe_t *pipe)
+{
+  dt_free_align(pipe->bcache_data);
+  pipe->bcache_data = dt_alloc_align_float(nfloats);
+  pipe->bcache_hash = phash;
+  return pipe->bcache_data;
+}
 
 static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
                                           dt_develop_t *dev,
@@ -1569,7 +1630,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   {
     dt_print_pipe(DT_DEBUG_ALWAYS,
         "fatal process alignment",
-        piece->pipe, module, DT_DEVICE_NONE, roi_in, roi_out,
+        pipe, module, DT_DEVICE_NONE, roi_in, roi_out,
         "non-aligned buffers IN=%p OUT=%p",
         input, *output);
 
@@ -1596,14 +1657,14 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   if(cst_from != cst_to)
     dt_print_pipe(DT_DEBUG_PIPE,
            "transform colorspace",
-           piece->pipe, module, DT_DEVICE_CPU, roi_in, NULL, " %s -> %s `%s'",
+           pipe, module, DT_DEVICE_CPU, roi_in, NULL, " %s -> %s `%s'",
            dt_iop_colorspace_to_name(cst_from),
            dt_iop_colorspace_to_name(cst_to),
            work_profile ? dt_colorspaces_get_name(work_profile->type, work_profile->filename) : "no work profile");
 
   // transform to module input colorspace
-  dt_ioppr_transform_image_colorspace
-    (module, input, input, roi_in->width, roi_in->height, cst_from,
+  dt_ioppr_transform_image_colorspace(
+     module, input, input, roi_in->width, roi_in->height, cst_from,
      cst_to, &input_format->cst, work_profile);
 
   if(dt_atomic_get_int(&pipe->shutdown))
@@ -1639,7 +1700,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
       new_image[i*3 + 2] = local_copy[i*4 + 2];
     }
 
-    if (write_image_file(new_image, h, w, "/home/miko/Desktop/test1.png") != 0) {
+    if (write_image_file(new_image, h, w, "/home/miko/Desktop/test_base.png") != 0) {
         printf("Error writing image\n");
     }
     
@@ -1681,20 +1742,26 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
 
     if (out)
     {
-      for (int i = 0; i < stride; i++)
+      /*
+      for (int mask_id = 0; mask_id < n_masks; mask_id++)
       {
-        new_image[i * 3 + 0] = (uint8_t)(out[i] * 255.0);
-        new_image[i * 3 + 1] = (uint8_t)(out[i] * 255.0);
-        new_image[i * 3 + 2] = (uint8_t)(out[i] * 255.0);
-      }
-
-      if (write_image_file(new_image, h, w, "/home/miko/Desktop/test2.png") != 0)
-      {
-        printf("Error writing image\n");
+        for (int i = 0; i < stride; i++)
+        {
+          new_image[i * 3 + 0] = (uint8_t)(out[i + stride * mask_id] * 255.0);
+          new_image[i * 3 + 1] = (uint8_t)(out[i + stride * mask_id] * 255.0);
+          new_image[i * 3 + 2] = (uint8_t)(out[i + stride * mask_id] * 255.0);
+        }
+        char path[128];
+        sprintf(path, "/home/miko/Desktop/test%d.png", mask_id);
+        if (write_image_file(new_image, h, w, path) != 0)
+        {
+          printf("Error writing image\n");
+        }
       }
+      */
 
       pipe->proxy_data = (uint8_t*)malloc(sizeof(uint8_t) * stride * n_masks);
-      for (int i = 0; i < stride; i++){
+      for (int i = 0; i < stride * n_masks; i++){
         pipe->proxy_data[i] = (uint8_t)(out[i] * 255.0);
       }
 
@@ -1726,23 +1793,40 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   /* process module on cpu. use tiling if needed and possible. */
 
   const gboolean pfm_dump = darktable.dump_pfm_pipe
-    && (piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT));
+    && (pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT));
 
   if(pfm_dump)
     dt_dump_pipe_pfm(module->op, input,
                      roi_in->width, roi_in->height, in_bpp,
-                     TRUE, dt_dev_pixelpipe_type_to_str(piece->pipe->type));
+                     TRUE, dt_dev_pixelpipe_type_to_str(pipe->type));
 
-  if(!fitting && piece->process_tiling_ready)
+  const gboolean relevant = _piece_fast_blend(piece, module);
+  const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0;
+  const size_t nfloats = bpp * roi_out->width * roi_out->height / sizeof(float);
+  const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE;
+
+  if(!fitting && _piece_may_tile(piece))
   {
     dt_print_pipe(DT_DEBUG_PIPE,
-                        "process tiles",
-                        piece->pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s",
+                        bcaching ? "from focus cache" : "process tiles",
+                        pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s",
                         dt_iop_colorspace_to_name(cst_to),
                         cst_to != cst_out ? " -> " : "",
                         cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "");
-    module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp);
 
+    if(bcaching)
+    {
+      dt_iop_image_copy(*output, pipe->bcache_data, nfloats);
+    }
+    else
+    {
+      module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp);
+      if(relevant)
+      {
+        float *cache = _get_fast_blendcache(nfloats, phash, pipe);
+        if(cache) dt_iop_image_copy(cache, *output, nfloats);
+      }
+    }
     *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU
                         | PIXELPIPE_FLOW_PROCESSED_WITH_TILING);
     *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU);
@@ -1750,8 +1834,8 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   else
   {
     dt_print_pipe(DT_DEBUG_PIPE,
-       "process",
-       piece->pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s%s %.fMB",
+       bcaching ? "from focus cache" : "process",
+       pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s%s %.fMB",
        dt_iop_colorspace_to_name(cst_to),
        cst_to != cst_out ? " -> " : "",
        cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "",
@@ -1761,8 +1845,9 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
        1e-6 * (tiling->factor * (m_width * m_height * m_bpp) + tiling->overhead));
 
     // this code section is for simplistic benchmarking via --bench-module
-    if((piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
-       && darktable.bench_module)
+    if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
+       && darktable.bench_module
+       && fitting)
     {
       if(dt_str_commasubstring(darktable.bench_module, module->op))
       {
@@ -1770,8 +1855,8 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
         dt_times_t end;
         const int old_muted = darktable.unmuted;
         darktable.unmuted = 0;
-        const gboolean full = piece->pipe->type & DT_DEV_PIXELPIPE_FULL;
-        const int counter = (piece->pipe->type & DT_DEV_PIXELPIPE_FULL) ? 100 : 50;
+        const gboolean full = pipe->type & DT_DEV_PIXELPIPE_FULL;
+        const int counter = full ? 100 : 50;
         const float mpix = (roi_out->width * roi_out->height) / 1.0e6;
 
         if(module->process_plain)
@@ -1788,7 +1873,20 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
         darktable.unmuted = old_muted;
       }
     }
-    module->process(module, piece, input, *output, roi_in, roi_out);
+
+    if(bcaching)
+    {
+      dt_iop_image_copy(*output, pipe->bcache_data, nfloats);
+    }
+    else
+    {
+      module->process(module, piece, input, *output, roi_in, roi_out);
+      if(relevant)
+      {
+        float *cache = _get_fast_blendcache(nfloats, phash, pipe);
+        if(cache) dt_iop_image_copy(cache, *output, nfloats);
+      }
+    }
 
     *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU);
     *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU
@@ -1799,9 +1897,9 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   {
     dt_dump_pipe_pfm(module->op, *output,
                      roi_out->width, roi_out->height, bpp,
-                     FALSE, dt_dev_pixelpipe_type_to_str(piece->pipe->type));
+                     FALSE, dt_dev_pixelpipe_type_to_str(pipe->type));
     _dump_pipe_pfm_diff(module->op, input, roi_in, in_bpp, *output, roi_out, bpp,
-                        dt_dev_pixelpipe_type_to_str(piece->pipe->type));
+                        dt_dev_pixelpipe_type_to_str(pipe->type));
   }
 
   // and save the output colorspace
@@ -2093,7 +2191,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
   module->modify_roi_in(module, piece, roi_out, &roi_in);
   if((darktable.unmuted & DT_DEBUG_PIPE) && memcmp(roi_out, &roi_in, sizeof(dt_iop_roi_t)))
     dt_print_pipe(DT_DEBUG_PIPE,
-                  "modify roi IN", piece->pipe, module, DT_DEVICE_NONE, roi_out, &roi_in, "ID=%i",
+                  "modify roi IN", pipe, module, DT_DEVICE_NONE, roi_out, &roi_in, "ID=%i",
                   pipe->image.id);
   // recurse to get actual data of input buffer
 
@@ -2260,7 +2358,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
 
     if(possible_cl && !fits_on_device)
     {
-      if(!piece->process_tiling_ready)
+      if(!_piece_may_tile(piece))
         possible_cl = FALSE;
 
       const float advantage = darktable.opencl->dev[pipe->devid].advantage;
@@ -2300,7 +2398,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
           {
             dt_print_pipe(DT_DEBUG_OPENCL | DT_DEBUG_PIPE,
               "no input cl_mem",
-              piece->pipe, module, pipe->devid, &roi_in, roi_out);
+              pipe, module, pipe->devid, &roi_in, roi_out);
             success_opencl = FALSE;
           }
 
@@ -2332,25 +2430,25 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
           {
             dt_print_pipe(DT_DEBUG_OPENCL | DT_DEBUG_PIPE,
               "no output cl_mem",
-              piece->pipe, module, pipe->devid, &roi_in, roi_out);
+              pipe, module, pipe->devid, &roi_in, roi_out);
             success_opencl = FALSE;
           }
         }
 
         // indirectly give gpu some air to breathe (and to do display related stuff)
-        dt_iop_nap(dt_opencl_micro_nap(pipe->devid));
+        dt_opencl_micro_nap(pipe->devid);
 
         // transform to input colorspace
         if(success_opencl)
         {
           if(cst_from != cst_to)
             dt_print_pipe(DT_DEBUG_PIPE,
-               "transform colorspace", piece->pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s `%s'",
+               "transform colorspace", pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s `%s'",
                dt_iop_colorspace_to_name(cst_from),
                dt_iop_colorspace_to_name(cst_to),
                work_profile ? dt_colorspaces_get_name(work_profile->type, work_profile->filename) : "no work profile");
           success_opencl = dt_ioppr_transform_image_colorspace_cl(
-             module, piece->pipe->devid,
+             module, pipe->devid,
              cl_mem_input, cl_mem_input,
              roi_in.width, roi_in.height,
              input_cst_cl,
@@ -2370,7 +2468,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
           const size_t outbufsize = bpp * roi_out->width * roi_out->height;
 
           _histogram_collect_cl(pipe->devid, piece, cl_mem_input,
-                                &roi_in, &(piece->histogram),
+                                &roi_in, &piece->histogram,
                                 piece->histogram_max, *output, outbufsize);
           pixelpipe_flow |= (PIXELPIPE_FLOW_HISTOGRAM_ON_GPU);
           pixelpipe_flow &= ~(PIXELPIPE_FLOW_HISTOGRAM_NONE
@@ -2399,16 +2497,20 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            meaningful messages in case of error */
         if(success_opencl)
         {
+          const gboolean relevant = _piece_fast_blend(piece, module);
+          const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0;
+          const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE;
+
           dt_print_pipe(DT_DEBUG_PIPE,
-                        "process",
-                        piece->pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s %.1fMB",
+                        bcaching ? "from focus cache" : "process",
+                        pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s %.1fMB",
                         dt_iop_colorspace_to_name(cst_to),
                         cst_to != cst_out ? " -> " : "",
                         cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "",
                         1e-6 * (tiling.factor_cl * (m_width * m_height * m_bpp) + tiling.overhead));
 
           // this code section is for simplistic benchmarking via --bench-module
-          if((piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
+          if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
              && darktable.bench_module)
           {
             if(dt_str_commasubstring(darktable.bench_module, module->op))
@@ -2417,9 +2519,9 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
               dt_times_t end;
               const int old_muted = darktable.unmuted;
               darktable.unmuted = 0;
-              const gboolean full = piece->pipe->type & DT_DEV_PIXELPIPE_FULL;
+              const gboolean full = pipe->type & DT_DEV_PIXELPIPE_FULL;
               const float mpix = (roi_out->width * roi_out->height) / 1.0e6;
-              const int counter = (piece->pipe->type & DT_DEV_PIXELPIPE_FULL) ? 100 : 50;
+              const int counter = (pipe->type & DT_DEV_PIXELPIPE_FULL) ? 100 : 50;
               gboolean success = TRUE;
               dt_get_times(&bench);
               for(int i = 0; i < counter; i++)
@@ -2446,20 +2548,33 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
             }
           }
           const gboolean pfm_dump = darktable.dump_pfm_pipe
-            && (piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
+            && (pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
             && dt_str_commasubstring(darktable.dump_pfm_pipe, module->op);
 
           if(pfm_dump)
             dt_opencl_dump_pipe_pfm(module->op, pipe->devid, cl_mem_input,
-                                    TRUE, dt_dev_pixelpipe_type_to_str(piece->pipe->type));
+                                    TRUE, dt_dev_pixelpipe_type_to_str(pipe->type));
 
-          const cl_int err = module->process_cl(module, piece, cl_mem_input, *cl_mem_output,
-                                              &roi_in, roi_out);
+          cl_int err = CL_SUCCESS;
+
+          if(bcaching)
+          {
+            err = dt_opencl_write_host_to_device(pipe->devid, pipe->bcache_data, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
+          }
+          else
+          {
+            err = module->process_cl(module, piece, cl_mem_input, *cl_mem_output, &roi_in, roi_out);
+            if(relevant && (err == CL_SUCCESS))
+            {
+              float *cache = _get_fast_blendcache(out_bpp * roi_out->width * roi_out->height / sizeof(float), phash, pipe);
+              if(cache) err = dt_opencl_read_host_from_device(pipe->devid, cache, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
+            }
+          }
           success_opencl = (err == CL_SUCCESS);
 
           if(!success_opencl)
             dt_print_pipe(DT_DEBUG_OPENCL,
-              "Error: process", piece->pipe, module, pipe->devid, &roi_in, roi_out,
+              "Error: process", pipe, module, pipe->devid, &roi_in, roi_out,
               "device=%i (%s), %s",
               pipe->devid, darktable.opencl->dev[pipe->devid].cname, cl_errstr(err));
 
@@ -2467,9 +2582,9 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
           {
             if(pfm_dump)
               dt_opencl_dump_pipe_pfm(module->op, pipe->devid, *cl_mem_output,
-                                    FALSE, dt_dev_pixelpipe_type_to_str(piece->pipe->type));
+                                    FALSE, dt_dev_pixelpipe_type_to_str(pipe->type));
 
-            if((piece->pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
+            if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
                 && darktable.dump_diff_pipe)
             {
               const int ch = dt_opencl_get_image_element_size(cl_mem_input) / sizeof(float);
@@ -2494,7 +2609,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
                     if(terr == CL_SUCCESS)
                     {
                       module->process(module, piece, clindata, cpudata, &roi_in, roi_out);
-                      dt_dump_pipe_diff_pfm(module->op, cloutdata, cpudata, ow, oh, cho, dt_dev_pixelpipe_type_to_str(piece->pipe->type));                  }
+                      dt_dump_pipe_diff_pfm(module->op, cloutdata, cpudata, ow, oh, cho, dt_dev_pixelpipe_type_to_str(pipe->type));                  }
                   }
                 }
                 dt_free_align(cpudata);
@@ -2553,12 +2668,12 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
         {
 
           success_opencl = dt_ioppr_transform_image_colorspace_cl(
-             module, piece->pipe->devid, cl_mem_input, cl_mem_input,
+             module, pipe->devid, cl_mem_input, cl_mem_input,
              roi_in.width, roi_in.height,
              input_cst_cl, blend_cst, &input_cst_cl, work_profile);
 
           success_opencl &= dt_ioppr_transform_image_colorspace_cl(
-             module, piece->pipe->devid, *cl_mem_output, *cl_mem_output,
+             module, pipe->devid, *cl_mem_output, *cl_mem_output,
              roi_out->width, roi_out->height,
              pipe->dsc.cst, blend_cst, &pipe->dsc.cst, work_profile);
 
@@ -2584,9 +2699,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
         /* process blending */
         if(success_opencl)
         {
-          success_opencl = dt_develop_blend_process_cl(
-             module, piece, cl_mem_input, *cl_mem_output,
-             &roi_in, roi_out);
+          success_opencl = dt_develop_blend_process_cl(module, piece, cl_mem_input, *cl_mem_output, &roi_in, roi_out);
           pixelpipe_flow |= (PIXELPIPE_FLOW_BLENDED_ON_GPU);
           pixelpipe_flow &= ~(PIXELPIPE_FLOW_BLENDED_ON_CPU);
         }
@@ -2632,14 +2745,14 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            return TRUE;
 
         // indirectly give gpu some air to breathe (and to do display related stuff)
-        dt_iop_nap(dt_opencl_micro_nap(pipe->devid));
+        dt_opencl_micro_nap(pipe->devid);
 
         // transform to module input colorspace
         if(success_opencl)
         {
           if(cst_from != cst_to)
             dt_print_pipe(DT_DEBUG_PIPE,
-               "transform colorspace", piece->pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s",
+               "transform colorspace", pipe, module, pipe->devid, &roi_in, NULL, " %s -> %s",
                dt_iop_colorspace_to_name(cst_from),
                dt_iop_colorspace_to_name(cst_to));
           dt_ioppr_transform_image_colorspace(
@@ -2664,18 +2777,37 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            meaningful messages in case of error */
         if(success_opencl)
         {
+          const gboolean relevant = _piece_fast_blend(piece, module);
+          const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0;
+          const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE;
           dt_print_pipe(DT_DEBUG_PIPE,
-                        "process tiled",
-                        piece->pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s",
+                        bcaching ? "from focus cache" : "process tiled",
+                        pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s",
                         dt_iop_colorspace_to_name(cst_to),
                         cst_to != cst_out ? " -> " : "",
                         cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "");
-          const cl_int err = module->process_tiling_cl(module, piece, input, *output, &roi_in, roi_out, in_bpp);
+
+          cl_int err = CL_SUCCESS;
+
+          if(bcaching)
+          {
+            err = dt_opencl_write_host_to_device(pipe->devid, pipe->bcache_data, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
+          }
+          else
+          {
+            err = module->process_tiling_cl(module, piece, input, *output, &roi_in, roi_out, in_bpp);
+            if(relevant && (err == CL_SUCCESS))
+            {
+              float *cache = _get_fast_blendcache(out_bpp * roi_out->width * roi_out->height / sizeof(float), phash, pipe);
+              if(cache)
+                err = dt_opencl_read_host_from_device(pipe->devid, cache, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
+            }
+          }
           success_opencl = (err == CL_SUCCESS);
 
           if(!success_opencl)
             dt_print_pipe(DT_DEBUG_OPENCL,
-              "Error: process_tiling", piece->pipe, module, pipe->devid, &roi_in, roi_out,
+              "Error: process_tiling", pipe, module, pipe->devid, &roi_in, roi_out,
               "device=%i (%s), %s",
               pipe->devid, darktable.opencl->dev[pipe->devid].cname, cl_errstr(err));
 
@@ -2993,16 +3125,19 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
     // as the user is likely to change that one soon (again), so keep it in cache.
     // Also do this if the clbuffer has been actively written
     const gboolean has_focus = module == dt_dev_gui_module();
+    const gboolean last_history = darktable.develop->history_last_module == module;
     if((pipe->type & DT_DEV_PIXELPIPE_BASIC)
         && (pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE)
-        && (has_focus || darktable.develop->history_last_module == module || important_cl))
+        && (has_focus || last_history || important_cl))
     {
       dt_print_pipe(DT_DEBUG_PIPE,
         "importance hints", pipe, module, pipe->devid, &roi_in, NULL, " %s%s%s",
-        darktable.develop->history_last_module == module ? "input_hint " : "",
+        last_history ? "input_hint " : "",
         has_focus ? "focus " : "",
         important_cl ? "cldata" : "");
       dt_dev_pixelpipe_important_cacheline(pipe, input, roi_in.width * roi_in.height * in_bpp);
+      if((pipe->type & DT_DEV_PIXELPIPE_FULL) && last_history)
+        darktable.develop->history_last_module = NULL;
     }
 
     if(module->expanded
@@ -3203,7 +3338,6 @@ static gboolean _dev_pixelpipe_process_rec_and_backcopy(dt_dev_pixelpipe_t *pipe
                                                         const int pos)
 {
   dt_pthread_mutex_lock(&pipe->busy_mutex);
-  darktable.dtresources.group = 4 * darktable.dtresources.level;
 #ifdef HAVE_OPENCL
   dt_opencl_check_tuning(pipe->devid);
 #endif
@@ -3303,9 +3437,12 @@ gboolean dt_dev_pixelpipe_process(dt_dev_pixelpipe_t *pipe,
 
 #ifdef HAVE_OPENCL
   if(pipe->devid > DT_DEVICE_CPU)
-    dt_print_pipe(DT_DEBUG_PIPE, "pipe starting", pipe, NULL, pipe->devid, &roi, &roi, "ID=%i, %s",
+    dt_print_pipe(DT_DEBUG_PIPE, "pipe starting", pipe, NULL, pipe->devid, &roi, &roi, "ID=%i, %s %luMB%s%s",
       pipe->image.id,
-      darktable.opencl->dev[pipe->devid].cname);
+      darktable.opencl->dev[pipe->devid].cname,
+      darktable.opencl->dev[pipe->devid].used_available / 1024lu / 1024lu,
+      darktable.opencl->dev[pipe->devid].tunehead ? ", tuned" : "",
+      darktable.opencl->dev[pipe->devid].pinned_memory ? ", pinned": "");
   else
     dt_print_pipe(DT_DEBUG_PIPE, "pipe starting", pipe, NULL, pipe->devid, &roi, &roi, "ID=%i",
       pipe->image.id);
@@ -3443,6 +3580,8 @@ void dt_dev_pixelpipe_get_dimensions(dt_dev_pixelpipe_t *pipe,
 {
   dt_pthread_mutex_lock(&pipe->busy_mutex);
   dt_iop_roi_t roi_in = (dt_iop_roi_t){ 0, 0, width_in, height_in, 1.0 };
+  dt_print_pipe(DT_DEBUG_PIPE,
+                "get dimensions", pipe, NULL, DT_DEVICE_NONE, &roi_in, NULL, "ID=%i", pipe->image.id);
   dt_iop_roi_t roi_out;
   GList *modules = pipe->iop;
   GList *pieces = pipe->nodes;
@@ -3459,8 +3598,7 @@ void dt_dev_pixelpipe_get_dimensions(dt_dev_pixelpipe_t *pipe,
       module->modify_roi_out(module, piece, &roi_out, &roi_in);
       if((darktable.unmuted & DT_DEBUG_PIPE) && memcmp(&roi_out, &roi_in, sizeof(dt_iop_roi_t)))
       dt_print_pipe(DT_DEBUG_PIPE,
-                  "modify roi OUT", piece->pipe, module, DT_DEVICE_NONE, &roi_in, &roi_out, "ID=%i",
-                  pipe->image.id);
+                  "modify roi OUT", pipe, module, DT_DEVICE_NONE, &roi_in, &roi_out);
     }
     else
     {
@@ -3740,7 +3878,7 @@ int dt_dev_write_scharr_mask_cl(dt_dev_pixelpipe_iop_t *piece,
   dt_dev_pixelpipe_t *p = piece->pipe;
   dt_dev_clear_scharr_mask(p);
 
-  if(piece->pipe->tiling)
+  if(p->tiling)
     return DT_OPENCL_PROCESS_CL;
 
   const int width = roi->width;
@@ -3784,7 +3922,7 @@ int dt_dev_write_scharr_mask_cl(dt_dev_pixelpipe_iop_t *piece,
   dt_print_pipe(DT_DEBUG_PIPE, "write scharr mask CL", p, NULL, devid, NULL, NULL, "%p (%ix%i)",
     mask, width, height);
 
-  if(darktable.dump_pfm_module && (piece->pipe->type & DT_DEV_PIXELPIPE_EXPORT))
+  if(darktable.dump_pfm_module && (p->type & DT_DEV_PIXELPIPE_EXPORT))
     dt_dump_pfm("scharr_cl", mask, width, height, sizeof(float), "detail");
 
   error:

From 2941b47e64f59333d747996b787144d2bc855680 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Sat, 1 Feb 2025 12:36:28 +0100
Subject: [PATCH 10/14] update onnxruntime version to last release

---
 src/external/onnxruntime | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/external/onnxruntime b/src/external/onnxruntime
index 4e4fd2bdcf0f..5c1b7ccbff7e 160000
--- a/src/external/onnxruntime
+++ b/src/external/onnxruntime
@@ -1 +1 @@
-Subproject commit 4e4fd2bdcf0f12e1c897c77e6384cb1e97cd80c3
+Subproject commit 5c1b7ccbff7e5141c1da7a9d963d660e5741c319

From 7c8819ab184e3f10cbe689585c63a56043e82919 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Sat, 1 Feb 2025 12:37:05 +0100
Subject: [PATCH 11/14] delete section for AI masks as they will be just a
 brush

---
 src/develop/blend.h       |   9 +-
 src/develop/blend_gui.c   | 172 --------------------------------------
 src/develop/masks.h       |   8 --
 src/develop/masks/masks.c |   8 --
 4 files changed, 1 insertion(+), 196 deletions(-)

diff --git a/src/develop/blend.h b/src/develop/blend.h
index dc9b5a1c9238..cc8a690903e2 100644
--- a/src/develop/blend.h
+++ b/src/develop/blend.h
@@ -97,7 +97,6 @@ typedef enum dt_develop_mask_mode_t
   DEVELOP_MASK_MASK = 1 << 1,                                                        // drawn mask
   DEVELOP_MASK_CONDITIONAL = 1 << 2,                                                 // parametric mask
   DEVELOP_MASK_RASTER = 1 << 3,                                                      // raster mask
-  DEVELOP_MASK_AI = 1 << 4,
   DEVELOP_MASK_MASK_CONDITIONAL = (DEVELOP_MASK_MASK | DEVELOP_MASK_CONDITIONAL)     // drawn & parametric
 } dt_develop_mask_mode_t;
 
@@ -284,7 +283,7 @@ extern const dt_introspection_type_enum_tuple_t dt_develop_combine_masks_names[]
 extern const dt_introspection_type_enum_tuple_t dt_develop_feathering_guide_names[];
 extern const dt_introspection_type_enum_tuple_t dt_develop_invert_mask_names[];
 
-#define DEVELOP_MASKS_NB_SHAPES 6
+#define DEVELOP_MASKS_NB_SHAPES 5
 
 /** blend gui data */
 typedef struct dt_iop_gui_blend_data_t
@@ -295,7 +294,6 @@ typedef struct dt_iop_gui_blend_data_t
   gboolean masks_support;
   gboolean masks_inited;
   gboolean raster_inited;
-  gboolean ai_masks_inited;
 
   dt_develop_blend_colorspace_t csp;
   dt_iop_module_t *module;
@@ -310,7 +308,6 @@ typedef struct dt_iop_gui_blend_data_t
   GtkBox *blendif_box;
   GtkBox *masks_box;
   GtkBox *raster_box;
-  GtkBox *ai_box;
 
   GtkWidget *selected_mask_mode;
   GtkWidget *colorpicker;
@@ -354,10 +351,6 @@ typedef struct dt_iop_gui_blend_data_t
   GtkWidget *raster_combo;
   GtkWidget *raster_polarity;
 
-  GtkWidget *ai_threshold;
-  GtkWidget *execute_ai;
-  GtkWidget *ai_cursor_add;
-
   int control_button_pressed;
   dt_pthread_mutex_t lock;
 } dt_iop_gui_blend_data_t;
diff --git a/src/develop/blend_gui.c b/src/develop/blend_gui.c
index 14df359fdf82..b5cf3202b031 100644
--- a/src/develop/blend_gui.c
+++ b/src/develop/blend_gui.c
@@ -160,8 +160,6 @@ const dt_introspection_type_enum_tuple_t dt_develop_mask_mode_names[]
           DEVELOP_MASK_CONDITIONAL | DEVELOP_MASK_ENABLED },
         { N_("raster mask"),
           DEVELOP_MASK_RASTER | DEVELOP_MASK_ENABLED },
-        { N_("AI mask"),
-          DEVELOP_MASK_AI | DEVELOP_MASK_ENABLED },
         { N_("drawn & parametric mask"),
           DEVELOP_MASK_MASK_CONDITIONAL | DEVELOP_MASK_ENABLED },
         { } };
@@ -692,9 +690,6 @@ static void _blendop_masks_mode_callback(const dt_develop_mask_mode_t mask_mode,
   _box_set_visible(data->raster_box,
                    data->raster_inited && (mask_mode & DEVELOP_MASK_RASTER));
 
-  _box_set_visible(data->ai_box,
-                   data->ai_masks_inited && (mask_mode & DEVELOP_MASK_AI));
-
   if(data->blendif_inited && (mask_mode & DEVELOP_MASK_CONDITIONAL))
   {
     _box_set_visible(data->blendif_box, TRUE);
@@ -1539,14 +1534,6 @@ static gboolean _blendop_masks_modes_raster_toggled(GtkToggleButton *button,
                                      DEVELOP_MASK_ENABLED | DEVELOP_MASK_RASTER);
 }
 
-static gboolean _blendop_masks_modes_ai_toggled(GtkToggleButton *button,
-                                                    GdkEventButton *event,
-                                                    dt_iop_module_t *module)
-{
-  return _blendop_masks_modes_toggle(button, module,
-                                     DEVELOP_MASK_ENABLED | DEVELOP_MASK_AI);
-}
-
 static gboolean _blendop_blendif_suppress_toggled(GtkToggleButton *togglebutton,
                                                   GdkEventButton *event,
                                                   dt_iop_module_t *module)
@@ -1669,34 +1656,6 @@ static gboolean _blendop_masks_add_shape(GtkWidget *widget,
   return TRUE;
 }
 
-
-static gboolean _blendop_masks_add_cursor(GtkWidget *widget,
-                                         GdkEventButton *event,
-                                         dt_iop_module_t *self)
-{
-  if(darktable.gui->reset
-     || event->button != GDK_BUTTON_PRIMARY)
-    return TRUE;
-
-  dt_iop_gui_blend_data_t *bd = self->blend_data;
-
-
-  // _blendop_masks_modes_toggle(NULL, self, DEVELOP_MASK_MASK);
-
-  // we want to be sure that the iop has focus
-  dt_iop_request_focus(self);
-  dt_iop_color_picker_reset(self, FALSE);
-  bd->masks_shown = DT_MASKS_EDIT_FULL;
-  // we create the new form
-  dt_masks_form_t *form = dt_masks_create(DT_MASKS_POINT);
-  dt_masks_change_form_gui(form);
-  darktable.develop->form_gui->creation_module = self;
-
-  dt_control_queue_redraw_center();
-
-  return TRUE;
-}
-
 static gboolean _blendop_masks_show_and_edit(GtkWidget *widget,
                                              GdkEventButton *event,
                                              dt_iop_module_t *self)
@@ -2872,14 +2831,6 @@ void dt_iop_gui_init_masks(GtkWidget *blendw, dt_iop_module_t *module)
                                                   FALSE, 0, 0,
                                                   dtgtk_cairo_paint_masks_brush, abox);
 
-    bd->masks_type[5] = DT_MASKS_POINT;
-    bd->masks_shapes[5] = dt_iop_togglebutton_new(module, "blend`shapes",
-                                                  N_("add point"),
-                                                  N_("add multiple points"),
-                                                  G_CALLBACK(_blendop_masks_add_shape),
-                                                  FALSE, 0, 0,
-                                                  dtgtk_cairo_paint_masks_ai, abox);
-
     bd->masks_type[1] = DT_MASKS_PATH;
     bd->masks_shapes[1] = dt_iop_togglebutton_new(module, "blend`shapes",
                                                   N_("add path"),
@@ -2959,63 +2910,6 @@ static void _raster_combo_populate(GtkWidget *w,
   }
 }
 
-static void _masks_ai_execute(GtkButton *button,
-                                      GdkEventButton *event,
-                                      dt_iop_module_t *module)
-{
-  if(event->button != 1
-     && event->button != 2)
-    return;
-
-  printf("Executing AI\n");
- 
-  dtgtk_button_set_active(DTGTK_BUTTON(button), FALSE);
-
-}
-
-static void _masks_ai_threshold_update(GtkWidget *slider,
-                                                   dt_iop_gui_blend_data_t *data)
-{
-  if(darktable.gui->reset
-     || !data
-     || !data->ai_masks_inited)
-    return;
-
-  //dt_develop_blend_params_t *bp = data->module->blend_params;
-  //const int tab = data->tab;
-
-  const float value = dt_bauhaus_slider_get(slider);
-  printf("%f\n", value);
-  /*
-  for(int in_out = 1; in_out >= 0; in_out--)
-  {
-    const int ch = data->channel[tab].param_channels[in_out];
-    float off = 0.0f;
-    if(data->csp == DEVELOP_BLEND_CS_LAB
-       && (ch == DEVELOP_BLENDIF_A_in || ch == DEVELOP_BLENDIF_A_out
-        || ch == DEVELOP_BLENDIF_B_in || ch == DEVELOP_BLENDIF_B_out))
-    {
-      off = 0.5f;
-    }
-    const float new_value = value + data->channel[tab].boost_factor_offset;
-    const float old_value = bp->blendif_boost_factors[ch];
-    const float factor = exp2f(old_value) / exp2f(new_value);
-    float *parameters = &(bp->blendif_parameters[4 * ch]);
-    if(parameters[0] > 0.0f) parameters[0] = CLIP((parameters[0] - off) * factor + off);
-    if(parameters[1] > 0.0f) parameters[1] = CLIP((parameters[1] - off) * factor + off);
-    if(parameters[2] < 1.0f) parameters[2] = CLIP((parameters[2] - off) * factor + off);
-    if(parameters[3] < 1.0f) parameters[3] = CLIP((parameters[3] - off) * factor + off);
-    if(parameters[1] == 0.0f && parameters[2] == 1.0f)
-      bp->blendif &= ~(1 << ch);
-    bp->blendif_boost_factors[ch] = new_value;
-  }
-  _blendop_blendif_update_tab(data->module, tab);
-
-  dt_dev_add_history_item(darktable.develop, data->module, TRUE);
-  */
-}
-
-
 static void _raster_value_changed_callback(GtkWidget *widget,
                                            dt_iop_module_t *module)
 {
@@ -3126,56 +3020,6 @@ void dt_iop_gui_init_raster(GtkWidget *blendw, dt_iop_module_t *module)
   }
 }
 
-void dt_iop_gui_init_ai_mask(GtkWidget *blendw, dt_iop_module_t *module)
-{
-  dt_iop_gui_blend_data_t *bd = module->blend_data;
-
-  bd->ai_box = GTK_BOX(gtk_box_new(GTK_ORIENTATION_VERTICAL, 0));
-  _add_wrapped_box(blendw, bd->ai_box, "mask_ai");
-
-  /* create and add raster support if module supports it (it's coupled
-   * to masks at the moment) */
-  if(bd->masks_support)
-  {
-    GtkWidget *hbox2 = gtk_box_new(GTK_ORIENTATION_HORIZONTAL, 0);
-    gtk_box_pack_start(GTK_BOX(hbox2), dt_ui_label_new(_("AI Mask")), TRUE, TRUE, 0);
-    dt_gui_add_class(hbox2, "dt_section_label");
-
-    GtkWidget *box = gtk_box_new(GTK_ORIENTATION_VERTICAL, 0);
-
-    bd->execute_ai = dt_iop_button_new(module, N_("Generate mask"),
-                                      G_CALLBACK(_masks_ai_execute), FALSE, 0, 0,
-                                      NULL, 0, box);
-
-    bd->ai_threshold = dt_bauhaus_slider_new_with_range(module, 0.0f, 100.0f, 0, 0.0f, 1);
-    dt_bauhaus_slider_set_format(bd->ai_threshold, _(" %"));
-    dt_bauhaus_widget_set_label(bd->ai_threshold,
-                                N_("blend"), N_("threshold"));
-    // dt_bauhaus_slider_set_soft_range(bd->ai_threshold, 0.0, 3.0);
-    gtk_widget_set_tooltip_text(bd->ai_threshold,
-                                _("adjust threshold of the mask"));
-    gtk_widget_set_sensitive(bd->ai_threshold, TRUE);
-
-    g_signal_connect(G_OBJECT(bd->ai_threshold), "value-changed",
-                     G_CALLBACK(_masks_ai_threshold_update), bd);
-
-    gtk_box_pack_start(GTK_BOX(box), GTK_WIDGET(bd->ai_threshold), TRUE, FALSE, 0);
-
-
-
-    bd->ai_cursor_add = dt_iop_togglebutton_new(module, "blend`shapes",
-                                                  N_("add cursor"),
-                                                  NULL,
-                                                  G_CALLBACK(_blendop_masks_add_cursor),// G_CALLBACK(_blendop_masks_add_shape),
-                                                  FALSE, 0, 0,
-                                                  dtgtk_cairo_paint_masks_circle, box);
-
-    gtk_box_pack_start(GTK_BOX(bd->ai_box), GTK_WIDGET(hbox2), TRUE, TRUE, 0);
-    gtk_box_pack_start(GTK_BOX(bd->ai_box), GTK_WIDGET(box), TRUE, TRUE, 0);
-
-    bd->ai_masks_inited = TRUE;
-  }
-}
 void dt_iop_gui_cleanup_blending(dt_iop_module_t *module)
 {
   if(!module->blend_data) return;
@@ -3498,7 +3342,6 @@ void dt_iop_gui_update_blending(dt_iop_module_t *module)
   }
 
   _box_set_visible(bd->raster_box, bd->raster_inited && (mask_mode & DEVELOP_MASK_RASTER));
-  _box_set_visible(bd->ai_box, bd->ai_masks_inited && (mask_mode & DEVELOP_MASK_AI));
 
   if(bd->blendif_inited && (mask_mode & DEVELOP_MASK_CONDITIONAL))
   {
@@ -3685,19 +3528,6 @@ void dt_iop_gui_init_blending(GtkWidget *iopw,
       bd->masks_modes_toggles = g_list_append(bd->masks_modes_toggles, GTK_WIDGET(but));
     }
 
-    if (bd->masks_support){
-      but = dt_iop_togglebutton_new(module, "blend`masks",
-                                    N_("AI mask"), NULL,
-                                    G_CALLBACK(_blendop_masks_modes_ai_toggled),
-                                    FALSE, 0, 0,
-                                    dtgtk_cairo_paint_masks_ai, NULL);
-      bd->masks_modes
-          = g_list_append(bd->masks_modes,
-                          GUINT_TO_POINTER(DEVELOP_MASK_ENABLED | DEVELOP_MASK_AI));
-      bd->masks_modes_toggles = g_list_append(bd->masks_modes_toggles, GTK_WIDGET(but));
-
-    }
-
     GtkWidget *presets_button = dtgtk_button_new(dtgtk_cairo_paint_presets, 0, NULL);
     gtk_widget_set_tooltip_text(presets_button, _("blending options"));
     if(bd->blendif_support)
@@ -3902,7 +3732,6 @@ void dt_iop_gui_init_blending(GtkWidget *iopw,
     dt_iop_gui_init_masks(iopw, module);
     dt_iop_gui_init_raster(iopw, module);
     dt_iop_gui_init_blendif(iopw, module);
-    dt_iop_gui_init_ai_mask(iopw, module);
 
     bd->bottom_box = GTK_BOX(gtk_box_new(GTK_ORIENTATION_VERTICAL, 0));
     gtk_box_pack_start(GTK_BOX(bd->bottom_box),
@@ -3926,7 +3755,6 @@ void dt_iop_gui_init_blending(GtkWidget *iopw,
     gtk_widget_set_name(GTK_WIDGET(bd->top_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->masks_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->raster_box), "blending-box");
-    gtk_widget_set_name(GTK_WIDGET(bd->ai_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->blendif_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(bd->bottom_box), "blending-box");
     gtk_widget_set_name(GTK_WIDGET(iopw), "blending-wrapper");
diff --git a/src/develop/masks.h b/src/develop/masks.h
index 7b832f210d09..9951b3602da7 100644
--- a/src/develop/masks.h
+++ b/src/develop/masks.h
@@ -327,12 +327,6 @@ typedef struct dt_masks_functions_t
                       const int num_points);
 } dt_masks_functions_t;
 
-typedef struct dt_masks_fast_sam_data_t{
-    bool proxy_data_initialized;
-    uint8_t *proxy_data; // Scaled-down image data
-    int proxy_width;
-    int proxy_height;
-} dt_masks_fast_sam_data_t;
 
 /** structure used to define a form */
 typedef struct dt_masks_form_t
@@ -347,8 +341,6 @@ typedef struct dt_masks_form_t
   char name[128];
   // id used to store the form
   dt_mask_id_t formid;
-  // data for the FastSAM model
-  dt_masks_fast_sam_data_t *fast_sam_data;
   // version of the form
   int version;
 } dt_masks_form_t;
diff --git a/src/develop/masks/masks.c b/src/develop/masks/masks.c
index 4f72b2ceff3b..cce4186dfbd1 100644
--- a/src/develop/masks/masks.c
+++ b/src/develop/masks/masks.c
@@ -855,15 +855,7 @@ dt_masks_form_t *dt_masks_create(dt_masks_type_t type)
   else if(type & DT_MASKS_GROUP)
     form->functions = &dt_masks_functions_group;
   else if(type & DT_MASKS_POINT)
-  {
     form->functions = &dt_masks_functions_point;
-    dt_masks_fast_sam_data_t *data = malloc(sizeof(dt_masks_fast_sam_data_t));
-    data->proxy_data_initialized = false;
-    data->proxy_data = NULL;
-    data->proxy_width = 1024;
-    data->proxy_height = 1024;
-    form->fast_sam_data = data;
-  }
 
   if(form->functions && form->functions->sanitize_config)
     form->functions->sanitize_config(type);

From 03ee389d4f8bc781fb595d8144a9b129809584df Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Sat, 1 Feb 2025 12:37:37 +0100
Subject: [PATCH 12/14] Cleaned code into separate files

---
 src/CMakeLists.txt             |   2 +
 src/develop/object_detection.c | 331 ++++++++++++++++++++++++
 src/develop/object_detection.h | 121 +++++++++
 src/develop/pixelpipe_hb.c     | 451 +--------------------------------
 src/develop/tensor_boxes.c     |  66 +++++
 src/develop/tensor_boxes.h     |  76 ++++++
 6 files changed, 599 insertions(+), 448 deletions(-)
 create mode 100644 src/develop/object_detection.c
 create mode 100644 src/develop/object_detection.h
 create mode 100644 src/develop/tensor_boxes.c
 create mode 100644 src/develop/tensor_boxes.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5b63390a36c8..55ea39ef4fc2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -119,7 +119,9 @@ FILE(GLOB SOURCE_FILES
   "develop/masks/group.c"
   "develop/masks/masks.c"
   "develop/masks/path.c"
+  "develop/object_detection.c"
   "develop/pixelpipe.c"
+  "develop/tensor_boxes.c"
   "develop/tiling.c"
   "develop/image_file_libpng.c"
   "dtgtk/button.c"
diff --git a/src/develop/object_detection.c b/src/develop/object_detection.c
new file mode 100644
index 000000000000..5f71e8cbb698
--- /dev/null
+++ b/src/develop/object_detection.c
@@ -0,0 +1,331 @@
+#include "develop/object_detection.h"
+
+const OrtApi* g_ort = NULL;
+
+void process_mask_native( float *protos, float *masks_in, TensorBoxes* boxes,
+                          int n, int mask_dim, int mask_h, int mask_w, int output_h,
+                          int output_w, float *output_masks)
+{
+  // Allocate intermediate storage for masks [n, mask_h, mask_w]
+  float *masks = (float *)malloc(n * mask_h * mask_w * sizeof(float));
+  printf("Allocated masks");
+  if (!masks) {
+    fprintf(stderr, "Memory allocation failed\n");
+    exit(EXIT_FAILURE);
+  }
+
+  // Flattened version of `protos` reshaped to [mask_dim, mask_h * mask_w]
+  float *protos_flat = (float *)malloc(mask_dim * mask_h * mask_w * sizeof(float));
+  printf("Allocated protos");
+  if (!protos_flat) {
+    fprintf(stderr, "Memory allocation failed\n");
+    free(masks);
+    exit(EXIT_FAILURE);
+  }
+
+  printf("Allocated everything");
+
+  // Flatten protos
+  for (int c = 0; c < mask_dim; ++c) {
+    for (int i = 0; i < mask_h * mask_w; ++i) {
+      protos_flat[c * (mask_h * mask_w) + i] = protos[c * mask_h * mask_w + i];
+    }
+  }
+
+  printf("Flattened protos");
+
+  // Perform masks_in @ protos
+  for (int i = 0; i < n; ++i) {
+      for (int j = 0; j < mask_h * mask_w; ++j) {
+          masks[i * mask_h * mask_w + j] = 0.0f;
+          for (int k = 0; k < mask_dim; ++k) {
+              masks[i * mask_h * mask_w + j] += masks_in[i * mask_dim + k] * protos_flat[k * (mask_h * mask_w) + j];
+          }
+      }
+  }
+
+  printf("Created masks");
+
+  float *max_value = (float *)malloc(n  * sizeof(float));
+  float *min_value = (float *)malloc(n  * sizeof(float));
+
+  // Threshold and create masks
+  for (int i = 0; i < n; ++i) {
+    max_value[i] = 0;
+    min_value[i] = 0;
+    TensorBoxes current_box = boxes[i];
+    if (i == 0){
+      printf("x1 %f, x2 %f, y1 %f, y2 %f\n", current_box.x1, current_box.x2, current_box.y1, current_box.y2);
+    }
+    for (int y = 0; y < output_h; ++y) {
+      for (int x = 0; x < output_w; ++x) {
+        // Calculate the corresponding coordinates in the original mask
+        float src_x = (float)x * mask_w / output_w;
+        float src_y = (float)y * mask_h / output_h;
+
+        // Find the four nearest neighbors
+        int x1 = (int)src_x;
+        int y1 = (int)src_y;
+        int x2 = (x1 + 1 < mask_w) ? x1 + 1 : x1;
+        int y2 = (y1 + 1 < mask_h) ? y1 + 1 : y1;
+
+        // Calculate the distances (weights) for interpolation
+        float dx = src_x - x1;
+        float dy = src_y - y1;
+
+        // Get the pixel values from the original mask
+        float top_left = masks[i * mask_h * mask_w + y1 * mask_w + x1];
+        float top_right = masks[i * mask_h * mask_w + y1 * mask_w + x2];
+        float bottom_left = masks[i * mask_h * mask_w + y2 * mask_w + x1];
+        float bottom_right = masks[i * mask_h * mask_w + y2 * mask_w + x2];
+
+        // Perform bilinear interpolation
+        float interpolated_value = (1 - dx) * (1 - dy) * top_left +
+                                            dx * (1 - dy) * top_right +
+                                            (1 - dx) * dy * bottom_left +
+                                            dx * dy * bottom_right;
+
+        // Set the value in the output mask
+        int idx_out = i * output_h * output_w + y * output_w + x;
+
+        // FIXME
+        if (((src_x*4) < current_box.x1) || ((src_x*4) > current_box.x2) || ((src_y*4) < current_box.y1) || ((src_y*4) > current_box.y2))
+          interpolated_value = 0.0f;
+
+        output_masks[idx_out] = interpolated_value;
+        if (interpolated_value > max_value[i]) max_value[i] = output_masks[idx_out];
+        if (interpolated_value < min_value[i]) min_value[i] = output_masks[idx_out];
+      }
+    }
+  }
+
+  printf("Loaded masks");
+
+  for (int i = 0; i < n; ++i) {
+    for (int y = 0; y < output_h; ++y) {
+      for (int x = 0; x < output_w; ++x) {
+        int idx_out = i * output_h * output_w + y * output_w + x;
+        if (output_masks[idx_out] > 0.0f)
+          output_masks[idx_out] = 1.0; // output_masks[idx_out] / max_value[i];
+        else
+          output_masks[idx_out] = 0.0f;
+      }
+    }
+  }
+
+  printf("Refined masks");
+  // Free allocated memory
+  free(masks);
+  free(protos_flat);
+  free(min_value);
+  free(max_value);
+}
+
+void prep_out_data( float* input_data[6], int64_t definition_size, int64_t numb_boxes,
+                    float** output, size_t output_height, size_t output_width, size_t* n_masks)
+{
+  float* mask = input_data[0];
+  
+  TensorBoxes* boxes = malloc(numb_boxes * sizeof(TensorBoxes));
+
+  size_t coordinates_count = 4;
+  size_t class_count = 1;
+  size_t mask_dim = definition_size - coordinates_count - class_count;
+  size_t b_stride = numb_boxes;
+  size_t counter = 0;
+  for (int64_t i = 0; i < numb_boxes; i++) {
+
+    float score = mask[i + 4 * b_stride];
+    if (score < CONF) {
+      continue;
+    }
+    float w = mask[i + (2 * b_stride)];
+    float h = mask[i + (3 * b_stride)];
+
+    if (w < 0 || h < 0) {
+      continue;
+    }
+
+    boxes[counter].x1 = mask[i + (0 * b_stride) ] - (w / 2);
+    boxes[counter].y1 = mask[i + (1 * b_stride) ] - (h / 2);
+    boxes[counter].x2 = mask[i + (0 * b_stride) ] + (w / 2);
+    boxes[counter].y2 = mask[i + (1 * b_stride) ] + (h / 2);
+    
+    boxes[counter].score = score;
+
+    boxes[counter].mask = (float*)malloc(mask_dim * sizeof(float));
+    for (size_t j = 0; j < mask_dim; j++) {
+      boxes[counter].mask[j] = mask[i  + (5 + j) * b_stride ];
+    }
+    counter++;
+  }
+
+  printf("counter: %ld\n", counter);
+
+  if (counter == 0){
+    return;
+  }
+  boxes = realloc(boxes, counter * sizeof(TensorBoxes));
+
+  sort_tensor_boxes_by_score(boxes, counter);
+  
+  TensorBoxes* output_boxes = (TensorBoxes*)malloc(counter * sizeof(TensorBoxes));
+  size_t num_boxes = NMS(boxes, counter, output_boxes);
+
+  printf("num_boxes: %ld\n", num_boxes);
+
+  output_boxes = realloc(output_boxes, num_boxes * sizeof(TensorBoxes));
+
+  int mask_h = 256, mask_w = 256;
+
+  // Allocate and initialize inputs
+  float *protos = input_data[5];
+  float *masks_in = (float *)malloc(num_boxes * mask_dim * sizeof(float));
+  float *output_masks = (float *)malloc(output_height * output_width * num_boxes * sizeof(float));
+
+  for (size_t i = 0; i < num_boxes; ++i){
+    for (size_t j = 0; j < mask_dim; ++j){
+      masks_in[i * mask_dim + j] = output_boxes[i].mask[j];
+    }
+  }
+  
+  process_mask_native(protos, masks_in, output_boxes, num_boxes, mask_dim, mask_h, mask_w, output_height, output_width, output_masks);
+
+  *output = output_masks;
+  *n_masks = num_boxes;
+
+  for (size_t i = 0; i < counter; i++) {
+    free(boxes[i].mask);
+  }
+  free(boxes);
+
+}
+
+void resize_image(const float** input, const int input_height, const int input_width,
+                  float** out, size_t output_height, size_t output_width,
+                  size_t* output_count)
+{
+  float* output_data = (float*)malloc(3 * output_width * output_width * sizeof(float));
+  size_t out_stride = output_height * output_width;
+  size_t in_stride = input_height * input_width;
+  float height_ratio = (float)input_height / (float)output_height;
+  float width_ratio = (float)input_width / (float)output_width;
+
+  for (size_t c = 0; c < 3; c++){
+    for (size_t i = 0; i < output_height; i++){
+      for (size_t j = 0; j < output_width; j++){
+        size_t input_j = (size_t)((float)j * width_ratio);
+        size_t input_i = (size_t)((float)i * height_ratio);
+        float input_d = (*input)[c * in_stride + input_i * input_width + input_j];
+        output_data[c * out_stride + i * output_width + j] = input_d;
+      }
+    }
+  }
+  *out = output_data;
+  *output_count = out_stride * 3;
+};
+
+void hwc_to_chw(const uint8_t* input, const int h, const int w,
+                float** output, size_t* output_count) {
+  size_t stride = h * w;
+  *output_count = stride * 3;
+  float* output_data = (float*)malloc(3* stride * sizeof(float));
+  if (!output_data) return;
+
+  for (size_t i = 0; i != stride; ++i) {
+    for (size_t c = 0; c != 3; ++c) {
+      
+      output_data[c * stride + i] = ((float)input[i * 3 + c])/255.0; // I'm also converting from 0-255 to 0-1 and RGBA to RGB
+    }
+  }
+  *output = output_data;
+}
+
+
+int run_inference(OrtSession* session, const float* input_image, const int h, const int w,
+                  float** out, size_t * n_masks) {
+  const int input_height = h;
+  const int input_width = w;
+  printf("Roi h:%d, w:%d\n", h, w);
+  float* model_input;
+  size_t model_input_ele_count = 1024 * 1024;
+
+  resize_image(&input_image, input_height, input_width, &model_input, 1024, 1024, &model_input_ele_count);
+
+  OrtMemoryInfo* memory_info;
+  ORT_ABORT_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
+  const int64_t input_shape[] = {1, 3, 1024, 1024};
+  const size_t input_shape_len = sizeof(input_shape) / sizeof(input_shape[0]);
+  const size_t model_input_len = model_input_ele_count * sizeof(float);
+
+  OrtValue* input_tensor = NULL;
+  ORT_ABORT_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, model_input, model_input_len, input_shape,
+                                                           input_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
+                                                           &input_tensor));
+  
+  int is_tensor;
+  ORT_ABORT_ON_ERROR(g_ort->IsTensor(input_tensor, &is_tensor));
+  
+ 
+  OrtAllocator* allocator;
+  ORT_ABORT_ON_ERROR(g_ort->GetAllocatorWithDefaultOptions(&allocator))
+
+  const char* input_names[] = {"images"};
+  const char* output_names[] = {"output0", "output1", "onnx::Shape_1304", "onnx::Shape_1323",
+                                "onnx::Concat_1263", "onnx::Shape_1215"};
+  
+  OrtValue* output_tensor[6];
+
+  for (int i = 0; i < 6; i++) {
+    output_tensor[i] = NULL;
+  }
+
+  
+  printf("Running inference\n");
+  ORT_ABORT_ON_ERROR(g_ort->Run(session, NULL, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 6,
+                                output_tensor));
+  printf("Inference done\n");
+
+  for (int i = 0; i < 6; i++) {
+    ORT_ABORT_ON_ERROR(g_ort->IsTensor(output_tensor[i], &is_tensor));
+  }
+
+  OrtTensorTypeAndShapeInfo* tensor_info;
+  ORT_ABORT_ON_ERROR(g_ort->GetTensorTypeAndShape(output_tensor[0], &tensor_info));
+
+  // Get the shape dimensions
+  size_t num_dims;
+  ORT_ABORT_ON_ERROR(g_ort->GetDimensionsCount(tensor_info, &num_dims));
+
+  int64_t* shape = (int64_t*)malloc(num_dims * sizeof(int64_t));
+  ORT_ABORT_ON_ERROR(g_ort->GetDimensions(tensor_info, shape, num_dims));
+
+  // Get tensor element type
+  ONNXTensorElementDataType data_type;
+  ORT_ABORT_ON_ERROR(g_ort->GetTensorElementType(tensor_info, &data_type));
+
+  // Get the output tensor information
+  int ret = 0;
+  float* output_tensor_data = NULL;
+  ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor[0], (void**)&output_tensor_data));
+
+  float* output_tensor_data_t[6];
+  for (int i = 0; i < 6; i++) {
+    output_tensor_data_t[i] = NULL;
+    ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor[i], (void**)&output_tensor_data_t[i]));
+  }
+
+  prep_out_data(output_tensor_data_t, shape[1], shape[2], out, input_height, input_width, n_masks);
+
+  for (int i = 0; i < 6; i++) {
+    g_ort->ReleaseValue(output_tensor[i]);
+  }
+
+  g_ort->ReleaseMemoryInfo(memory_info);
+  free(shape);
+  g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+  g_ort->ReleaseValue(input_tensor);
+  free(model_input);
+  
+  return ret;
+}
\ No newline at end of file
diff --git a/src/develop/object_detection.h b/src/develop/object_detection.h
new file mode 100644
index 000000000000..54fee246f9f7
--- /dev/null
+++ b/src/develop/object_detection.h
@@ -0,0 +1,121 @@
+#ifndef OBJECT_DETECTION_H
+#define OBJECT_DETECTION_H
+
+#include "onnxruntime_c_api.h"
+#include "develop/tensor_boxes.h"
+
+#include <stdio.h>
+
+#define tcscmp strcmp
+
+#define ORT_ABORT_ON_ERROR(expr)                             \
+  do {                                                       \
+    OrtStatus* onnx_status = (expr);                         \
+    if (onnx_status != NULL) {                               \
+      const char* msg = g_ort->GetErrorMessage(onnx_status); \
+      fprintf(stderr, "%s\n", msg);                          \
+      g_ort->ReleaseStatus(onnx_status);                     \
+      abort();                                               \
+    }                                                        \
+  } while (0);
+
+
+extern const OrtApi* g_ort;
+
+/**
+ * Processes input masks by applying mask prototypes and resizing them to the desired output dimensions.
+ * Allocates memory for intermediate and output masks and performs matrix multiplication followed by bilinear interpolation.
+ *
+ * @param protos       Pointer to the mask prototypes with dimensions [mask_dim, mask_h, mask_w].
+ * @param masks_in     Pointer to input masks with dimensions [n, mask_dim].
+ * @param boxes        Pointer to an array of TensorBoxes, each associated with a mask.
+ * @param n            Number of input masks.
+ * @param mask_dim     Number of channels in the masks (dimension of protos).
+ * @param mask_h       Height of the prototype masks.
+ * @param mask_w       Width of the prototype masks.
+ * @param output_h     Height of the output masks.
+ * @param output_w     Width of the output masks.
+ * @param output_masks Pointer to the output masks with dimensions [output_h, output_w, n], where each mask is boolean.
+ *
+ * The function performs matrix multiplication of `masks_in` with flattened `protos` to generate intermediate masks,
+ * followed by bilinear interpolation to resize the masks to the specified output dimensions. The output masks are
+ * thresholded to boolean values based on the interpolation results.
+ */
+void process_mask_native( float *protos, float *masks_in, TensorBoxes* boxes,
+                          int n, int mask_dim, int mask_h, int mask_w, int output_h,
+                          int output_w, float *output_masks); 
+
+
+/**
+ * @brief Prepares the output data for the object detection model
+ * 
+ * This function takes the output of the object detection model and prepares 
+ * it for the rest of the program. It first selects the bounding boxes with 
+ * a score higher than a certain threshold, then applies non-maximum suppression
+ * and finally generates the corresponding masks.
+ * 
+ * @param input_data The output of the object detection model
+ * @param definition_size The size of the definition of the model
+ * @param numb_boxes The number of bounding boxes in the output
+ * @param output A pointer to a float array that will contain the masks
+ * @param output_height The height of the masks
+ * @param output_width The width of the masks
+ * @param n_masks A pointer to a size_t that will contain the number of masks
+ */
+void prep_out_data( float* input_data[6], int64_t definition_size, int64_t numb_boxes,
+                    float** output, size_t output_height, size_t output_width, size_t* n_masks);
+ 
+/**
+ * @brief Resize an image
+ *
+ * Resizes an image from (input_height, input_width) to (output_height, output_width).
+ * The resized image is stored in a float array of size (output_height, output_width, 3).
+ *
+ * @param[in] input The input image as a float array of size (input_height, input_width, 3).
+ * @param[in] input_height The height of the input image.
+ * @param[in] input_width The width of the input image.
+ * @param[out] out The resized image as a float array of size (output_height, output_width, 3).
+ * @param[in] output_height The height of the output image.
+ * @param[in] output_width The width of the output image.
+ * @param[out] output_count The size of the output array.
+ */
+void resize_image(const float** input, const int input_height, const int input_width,
+                  float** out, size_t output_height, size_t output_width,
+                  size_t* output_count);
+
+/**
+ * @brief Convert an image in HWC format to CHW format.
+ *
+ * This function takes an image in HWC (Height, Width, Channels) format and
+ * converts it to CHW (Channels, Height, Width) format. The input image is
+ * expected to be in RGBA format, and the output image is in RGB format.
+ *
+ * @param input The input image in HWC format.
+ * @param h The height of the input image.
+ * @param w The width of the input image.
+ * @param output The output image in CHW format.
+ * @param output_count The number of elements in the output image.
+ */
+void hwc_to_chw(const uint8_t* input, const int h, const int w,
+                float** output, size_t* output_count);
+
+/**
+ * @brief Run object detection inference on a region of interest (ROI).
+ *
+ * @param session OrtSession* to run the inference on
+ * @param input_image const float* input image data
+ * @param h const int height of the ROI
+ * @param w const int width of the ROI
+ * @param out float** output data
+ * @param n_masks size_t* number of masks
+ *
+ * @return int 0 if successful, -1 otherwise
+ *
+ * The model input is resized to 1024x1024 and then run through the object detection model.
+ * The output is a set of masks, where each mask is a 2D array of size h x w.
+ * The number of masks is stored in the n_masks parameter.
+ */
+ int run_inference(OrtSession* session, const float* input_image, const int h, const int w,
+                   float** out, size_t * n_masks);
+
+#endif
\ No newline at end of file
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index ccbb0a467e02..9da7cbef306f 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -36,11 +36,12 @@
 #include "libs/lib.h"
 #include "gui/color_picker_proxy.h"
 
-#include "onnxruntime_c_api.h"
 #include "common/image_cache.h"
 
 #include "develop/image_file.h"
 
+#include "develop/object_detection.h"
+
 #include <assert.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -1126,444 +1127,6 @@ static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe,
   }
 }
 
-#define tcscmp strcmp
-
-const OrtApi* g_ort = NULL;
-float conf = 0.25;
-float iou_threshold = 0.65;
-#define ORT_ABORT_ON_ERROR(expr)                             \
-  do {                                                       \
-    OrtStatus* onnx_status = (expr);                         \
-    if (onnx_status != NULL) {                               \
-      const char* msg = g_ort->GetErrorMessage(onnx_status); \
-      fprintf(stderr, "%s\n", msg);                          \
-      g_ort->ReleaseStatus(onnx_status);                     \
-      abort();                                               \
-    }                                                        \
-  } while (0);
-
-
-typedef struct {
-  float x1;
-  float y1;
-  float x2;
-  float y2;
-  float score;
-  float* mask;
-} TensorBoxes;
-
-float max(float a, float b) {
-    return (a > b) ? a : b;
-}
-
-float min(float a, float b) {
-    return (a < b) ? a : b;
-}
-float IoU(TensorBoxes a, TensorBoxes b) {
-    float x1 = max(a.x1, b.x1);
-    float y1 = max(a.y1, b.y1);
-    float x2 = min(a.x2, b.x2);
-    float y2 = min(a.y2, b.y2);
-
-    float intersection = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1);
-    float areaA = (a.x2 - a.x1 + 1) * (a.y2 - a.y1 + 1);
-    float areaB = (b.x2 - b.x1 + 1) * (b.y2 - b.y1 + 1);
-
-    return intersection / (areaA + areaB - intersection);
-}
-
-static int compare_scores(const void* a, const void* b) {
-    TensorBoxes* boxA = (TensorBoxes*)a;
-    TensorBoxes* boxB = (TensorBoxes*)b;
-    if (boxA->score == boxB->score){
-      // Sort in descending order of area
-      float A_area = (boxA->x2 - boxA->x1) * (boxA->y2 - boxA->y1);
-      float B_area = (boxB->x2 - boxB->x1) * (boxB->y2 - boxB->y1);
-      if (A_area < B_area) return 1;
-      if (A_area > B_area) return -1;
-      return 0;
-    }
-    // Sort in descending order of score
-    if (boxA->score < boxB->score) return 1;
-    if (boxA->score > boxB->score) return -1;
-    return 0;
-}
-
-// Function to sort an array of TensorBoxes
-static void sort_tensor_boxes_by_score(TensorBoxes* boxes, size_t count) {
-    qsort(boxes, count, sizeof(TensorBoxes), compare_scores);
-}
-
-static size_t NMS(TensorBoxes* boxes, size_t count, TensorBoxes* output) {
-
-  qsort(boxes, count, sizeof(TensorBoxes), compare_scores);
-
-    char* suppressed = (char*)calloc(count, sizeof(char)); // 0 = not suppressed, 1 = suppressed
-    size_t output_count = 0;
-
-    for (size_t i = 0; i < count; i++) {
-        if (suppressed[i]) continue; // Skip if the box is suppressed
-
-        output[output_count++] = boxes[i]; // Add the current box to output
-
-        for (size_t j = i + 1; j < count; j++) {
-            if (suppressed[j]) continue; // Skip if already suppressed
-
-            float iou = IoU(boxes[i], boxes[j]);
-            if (iou > iou_threshold) {
-                suppressed[j] = 1; // Suppress the box
-            }
-        }
-    }
-
-    free(suppressed);
-    return output_count; // Return the number of boxes kept
-}
-
-static void process_mask_native(
-    float *protos,       // [mask_dim, mask_h, mask_w]
-    float *masks_in,     // [n, mask_dim]
-    TensorBoxes* boxes,  // [n]
-    int n,               // Number of masks
-    int mask_dim,        // Channels
-    int mask_h,          // Height of protos
-    int mask_w,          // Width of protos
-    int output_h,        // Desired output height
-    int output_w,        // Desired output width
-    float *output_masks   // [output_h, output_w, n], boolean output
-    
-) {
-    // Allocate intermediate storage for masks [n, mask_h, mask_w]
-    float *masks = (float *)malloc(n * mask_h * mask_w * sizeof(float));
-    printf("Allocated masks");
-    if (!masks) {
-        fprintf(stderr, "Memory allocation failed\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // Flattened version of `protos` reshaped to [mask_dim, mask_h * mask_w]
-    float *protos_flat = (float *)malloc(mask_dim * mask_h * mask_w * sizeof(float));
-    printf("Allocated protos");
-    if (!protos_flat) {
-        fprintf(stderr, "Memory allocation failed\n");
-        free(masks);
-        exit(EXIT_FAILURE);
-    }
-
-    printf("Allocated everything");
-
-    // Flatten protos
-    for (int c = 0; c < mask_dim; ++c) {
-        for (int i = 0; i < mask_h * mask_w; ++i) {
-            protos_flat[c * (mask_h * mask_w) + i] = protos[c * mask_h * mask_w + i];
-        }
-    }
-
-    printf("Flattened protos");
-
-    // Perform masks_in @ protos
-    for (int i = 0; i < n; ++i) {
-        for (int j = 0; j < mask_h * mask_w; ++j) {
-            masks[i * mask_h * mask_w + j] = 0.0f;
-            for (int k = 0; k < mask_dim; ++k) {
-                masks[i * mask_h * mask_w + j] += masks_in[i * mask_dim + k] * protos_flat[k * (mask_h * mask_w) + j];
-            }
-        }
-    }
-
-    printf("Created masks");
-
-    float *max_value = (float *)malloc(n  * sizeof(float));
-    float *min_value = (float *)malloc(n  * sizeof(float));
-
-    // Threshold and create masks
-    for (int i = 0; i < n; ++i) {
-        max_value[i] = 0;
-        min_value[i] = 0;
-        TensorBoxes current_box = boxes[i];
-        if (i == 0){
-          printf("x1 %f, x2 %f, y1 %f, y2 %f\n", current_box.x1, current_box.x2, current_box.y1, current_box.y2);
-        }
-        for (int y = 0; y < output_h; ++y) {
-            for (int x = 0; x < output_w; ++x) {
-                
-                // Calculate the corresponding coordinates in the original mask
-                float src_x = (float)x * mask_w / output_w;
-                float src_y = (float)y * mask_h / output_h;
-
-                // Find the four nearest neighbors
-                int x1 = (int)src_x;
-                int y1 = (int)src_y;
-                int x2 = (x1 + 1 < mask_w) ? x1 + 1 : x1;
-                int y2 = (y1 + 1 < mask_h) ? y1 + 1 : y1;
-
-                // Calculate the distances (weights) for interpolation
-                float dx = src_x - x1;
-                float dy = src_y - y1;
-
-                // Get the pixel values from the original mask
-                float top_left = masks[i * mask_h * mask_w + y1 * mask_w + x1];
-                float top_right = masks[i * mask_h * mask_w + y1 * mask_w + x2];
-                float bottom_left = masks[i * mask_h * mask_w + y2 * mask_w + x1];
-                float bottom_right = masks[i * mask_h * mask_w + y2 * mask_w + x2];
-
-                // Perform bilinear interpolation
-                float interpolated_value = (1 - dx) * (1 - dy) * top_left +
-                                                   dx * (1 - dy) * top_right +
-                                                   (1 - dx) * dy * bottom_left +
-                                                   dx * dy * bottom_right;
-
-                // Set the value in the output mask
-                int idx_out = i * output_h * output_w + y * output_w + x;
-
-                // FIXME
-                if (((src_x*4) < current_box.x1) || ((src_x*4) > current_box.x2) || ((src_y*4) < current_box.y1) || ((src_y*4) > current_box.y2))
-                  interpolated_value = 0.0f;
-
-                output_masks[idx_out] = interpolated_value;
-                if (interpolated_value > max_value[i]) max_value[i] = output_masks[idx_out];
-                if (interpolated_value < min_value[i]) min_value[i] = output_masks[idx_out];
-            }
-        }
-    }
-
-    printf("Loaded masks");
-
-    for (int i = 0; i < n; ++i) {
-        for (int y = 0; y < output_h; ++y) {
-            for (int x = 0; x < output_w; ++x) {
-                int idx_out = i * output_h * output_w + y * output_w + x;
-                if (output_masks[idx_out] > 0.0f)
-                  output_masks[idx_out] = 1.0; // output_masks[idx_out] / max_value[i];
-                else
-                  output_masks[idx_out] = 0.0f;
-            }
-        }
-    }
-
-    printf("Refined masks");
-    // Free allocated memory
-    free(masks);
-    free(protos_flat);
-    free(min_value);
-    free(max_value);
-}
-
-static void prep_out_data(float* input_data[6], int64_t definition_size, int64_t numb_boxes, float** output, size_t output_height, size_t output_width, size_t* n_masks){
-  
-  float* mask = input_data[0];
-  
-  TensorBoxes* boxes = malloc(numb_boxes * sizeof(TensorBoxes));
-
-  size_t coordinates_count = 4;
-  size_t class_count = 1;
-  size_t mask_dim = definition_size - coordinates_count - class_count;
-  size_t b_stride = numb_boxes;
-  size_t counter = 0;
-  for (int64_t i = 0; i < numb_boxes; i++) {
-
-    float score = mask[i + 4 * b_stride];
-    if (score < conf) {
-      continue;
-    }
-    float w = mask[i + (2 * b_stride)];
-    float h = mask[i + (3 * b_stride)];
-
-    if (w < 0 || h < 0) {
-      continue;
-    }
-    
-
-    boxes[counter].x1 = mask[i + (0 * b_stride) ] - (w / 2);
-    boxes[counter].y1 = mask[i + (1 * b_stride) ] - (h / 2);
-    boxes[counter].x2 = mask[i + (0 * b_stride) ] + (w / 2);
-    boxes[counter].y2 = mask[i + (1 * b_stride) ] + (h / 2);
-    
-    boxes[counter].score = score;
-
-    boxes[counter].mask = (float*)malloc(mask_dim * sizeof(float));
-    for (size_t j = 0; j < mask_dim; j++) {
-      boxes[counter].mask[j] = mask[i  + (5 + j) * b_stride ];
-    }
-    counter++;
-  }
-
-  printf("counter: %ld\n", counter);
-  // assert(counter > 0);
-  if (counter == 0){
-    return;
-  }
-  boxes = realloc(boxes, counter * sizeof(TensorBoxes));
-
-  sort_tensor_boxes_by_score(boxes, counter);
-  
-  TensorBoxes* output_boxes = (TensorBoxes*)malloc(counter * sizeof(TensorBoxes));
-  size_t num_boxes = NMS(boxes, counter, output_boxes);
-
-  printf("num_boxes: %ld\n", num_boxes);
-
-  output_boxes = realloc(output_boxes, num_boxes * sizeof(TensorBoxes));
-
-  int mask_h = 256, mask_w = 256;
-
-  // Allocate and initialize inputs
-  float *protos = input_data[5];
-  float *masks_in = (float *)malloc(num_boxes * mask_dim * sizeof(float));
-  float *output_masks = (float *)malloc(output_height * output_width * num_boxes * sizeof(float));
-
-  for (size_t i = 0; i < num_boxes; ++i){
-    for (size_t j = 0; j < mask_dim; ++j){
-      masks_in[i * mask_dim + j] = output_boxes[i].mask[j];
-    }
-  }
-
-  // Call the function
-  printf("Preparing masks\n");
-  
-  process_mask_native(protos, masks_in, output_boxes, num_boxes, mask_dim, mask_h, mask_w, output_height, output_width, output_masks);
-  printf("Masks generated");
-
-  *output = output_masks;
-  *n_masks = num_boxes;
-  printf("Mask loaded");
-
-  for (size_t i = 0; i < counter; i++) {
-    free(boxes[i].mask);
-  }
-  free(boxes);
-  
-}
-
-static void resize_image(const float** input, const int input_height, const int input_width, float** out, size_t output_height, size_t output_width, size_t* output_count)
-{
-  float* output_data = (float*)malloc(3 * output_width * output_width * sizeof(float));
-  size_t out_stride = output_height * output_width;
-  size_t in_stride = input_height * input_width;
-  float height_ratio = (float)input_height / (float)output_height;
-  float width_ratio = (float)input_width / (float)output_width;
-
-  for (size_t c = 0; c < 3; c++){
-    for (size_t i = 0; i < output_height; i++){
-      for (size_t j = 0; j < output_width; j++){
-        size_t input_j = (size_t)((float)j * width_ratio);
-        size_t input_i = (size_t)((float)i * height_ratio);
-        float input_d = (*input)[c * in_stride + input_i * input_width + input_j];
-        output_data[c * out_stride + i * output_width + j] = input_d;
-      }
-    }
-  }
-  *out = output_data;
-  *output_count = out_stride * 3;
-};
-
-void hwc_to_chw(const uint8_t* input, const int h, const int w, float** output, size_t* output_count) {
-  size_t stride = h * w;
-  *output_count = stride * 3;
-  float* output_data = (float*)malloc(3* stride * sizeof(float));
-  assert(output_data != NULL);
-  for (size_t i = 0; i != stride; ++i) {
-    for (size_t c = 0; c != 3; ++c) {
-      
-      output_data[c * stride + i] = ((float)input[i * 3 + c])/255.0; // I'm also converting from 0-255 to 0-1 and RGBA to RGB
-    }
-  }
-  *output = output_data;
-}
-
-int run_inference(OrtSession* session, const float* input_image, const int h, const int w, float** out, size_t * n_masks) {
-  const int input_height = h;
-  const int input_width = w;
-  printf("Roi h:%d, w:%d\n", h, w);
-  float* model_input;
-  size_t model_input_ele_count = 1024 * 1024;
-
-  resize_image(&input_image, input_height, input_width, &model_input, 1024, 1024, &model_input_ele_count);
-
-  OrtMemoryInfo* memory_info;
-  ORT_ABORT_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
-  const int64_t input_shape[] = {1, 3, 1024, 1024};
-  const size_t input_shape_len = sizeof(input_shape) / sizeof(input_shape[0]);
-  const size_t model_input_len = model_input_ele_count * sizeof(float);
-
-  OrtValue* input_tensor = NULL;
-  ORT_ABORT_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, model_input, model_input_len, input_shape,
-                                                           input_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
-                                                           &input_tensor));
-  assert(input_tensor != NULL);
-  int is_tensor;
-  ORT_ABORT_ON_ERROR(g_ort->IsTensor(input_tensor, &is_tensor));
-  assert(is_tensor);
- 
-  OrtAllocator* allocator;
-  ORT_ABORT_ON_ERROR(g_ort->GetAllocatorWithDefaultOptions(&allocator))
-
-  const char* input_names[] = {"images"};
-  const char* output_names[] = {"output0", "output1", "onnx::Shape_1304", "onnx::Shape_1323", "onnx::Concat_1263", "onnx::Shape_1215"};
-  OrtValue* output_tensor[6];
-
-  for (int i = 0; i < 6; i++) {
-    output_tensor[i] = NULL;
-  }
-
-  
-  printf("Running inference\n");
-  ORT_ABORT_ON_ERROR(g_ort->Run(session, NULL, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 6,
-                                output_tensor));
-  printf("Inference done\n");
-
-  for (int i = 0; i < 6; i++) {
-    assert(output_tensor[i] != NULL);
-    ORT_ABORT_ON_ERROR(g_ort->IsTensor(output_tensor[i], &is_tensor));
-    assert(is_tensor);
-  }
-
-  printf("Tensors are not null and is tensor\n");
-
-  OrtTensorTypeAndShapeInfo* tensor_info;
-  ORT_ABORT_ON_ERROR(g_ort->GetTensorTypeAndShape(output_tensor[0], &tensor_info));
-
-  printf("Gather tensor info");
-  // Get the shape dimensions
-  size_t num_dims;
-  ORT_ABORT_ON_ERROR(g_ort->GetDimensionsCount(tensor_info, &num_dims));
-  printf("Number of dimensions: %zu\n", num_dims);
-
-  int64_t* shape = (int64_t*)malloc(num_dims * sizeof(int64_t));
-  ORT_ABORT_ON_ERROR(g_ort->GetDimensions(tensor_info, shape, num_dims));
-
-  // Get tensor element type
-  ONNXTensorElementDataType data_type;
-  ORT_ABORT_ON_ERROR(g_ort->GetTensorElementType(tensor_info, &data_type));
-
-  // Get the output tensor information
-  int ret = 0;
-  float* output_tensor_data = NULL;
-  ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor[0], (void**)&output_tensor_data));
-  
-  printf("Base Data gathered\n");
-
-  float* output_tensor_data_t[6];
-  for (int i = 0; i < 6; i++) {
-    output_tensor_data_t[i] = NULL;
-    ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor[i], (void**)&output_tensor_data_t[i]));
-  }
-
-  printf("Data gathered\n");
-
-  prep_out_data(output_tensor_data_t, shape[1], shape[2], out, input_height, input_width, n_masks);
-
-  for (int i = 0; i < 6; i++) {
-    g_ort->ReleaseValue(output_tensor[i]);
-  }
-
-  g_ort->ReleaseMemoryInfo(memory_info);
-  free(shape);
-  g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
-  g_ort->ReleaseValue(input_tensor);
-  free(model_input);
-  
-  return ret;
-}
 /* About the module-in-focus blending cache
   Processing a piece in the pixelpipe is basically
     a) call the module->process
@@ -1679,16 +1242,12 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
     int colors = piece->colors;
     int bpc = piece->bpc;
     
-    //const dt_develop_blend_params_t *const d = piece->blendop_data;
-    //dt_develop_blend_colorspace_t blend_csp = d->blend_cst;
-    //(dt_develop_blend_colorspace_t)blend_csp;
-    // dt_image_t* image  = dt_image_cache_get(darktable.image_cache, piece->pipe->image.id, 'r');
     
-    // printf("image w:%d, h:%d\n", image->width, image->height);
     printf("output w:%d, h:%d\n", w, h);
 
     uint8_t* local_copy = (uint8_t*)malloc(4 * sizeof(uint8_t) * stride);
     memcpy(local_copy, piece->pipe->backbuf, 4 * stride * sizeof(uint8_t));
+    
     uint8_t* new_image = (uint8_t*)malloc(3 * sizeof(uint8_t) * stride);
     if (new_image == NULL){
       printf("malloc new image incorrect\n");
@@ -1699,10 +1258,6 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
       new_image[i*3 + 1] = local_copy[i*4 + 1];
       new_image[i*3 + 2] = local_copy[i*4 + 2];
     }
-
-    if (write_image_file(new_image, h, w, "/home/miko/Desktop/test_base.png") != 0) {
-        printf("Error writing image\n");
-    }
     
     float *converted_image = NULL;
     size_t output_count;
diff --git a/src/develop/tensor_boxes.c b/src/develop/tensor_boxes.c
new file mode 100644
index 000000000000..3a7a23e16b8a
--- /dev/null
+++ b/src/develop/tensor_boxes.c
@@ -0,0 +1,66 @@
+#include "develop/tensor_boxes.h"
+
+float IoU(TensorBoxes a, TensorBoxes b)
+{
+    float x1 = MAX(a.x1, b.x1);
+    float y1 = MAX(a.y1, b.y1);
+    float x2 = MIN(a.x2, b.x2);
+    float y2 = MIN(a.y2, b.y2);
+
+    float intersection = MAX(0, x2 - x1 + 1) * MAX(0, y2 - y1 + 1);
+    float areaA = (a.x2 - a.x1 + 1) * (a.y2 - a.y1 + 1);
+    float areaB = (b.x2 - b.x1 + 1) * (b.y2 - b.y1 + 1);
+
+    return intersection / (areaA + areaB - intersection);
+}
+
+
+int compare_scores(const void* a, const void* b)
+{
+    TensorBoxes* boxA = (TensorBoxes*)a;
+    TensorBoxes* boxB = (TensorBoxes*)b;
+    if (boxA->score == boxB->score){
+      // Sort in descending order of area
+      float A_area = (boxA->x2 - boxA->x1) * (boxA->y2 - boxA->y1);
+      float B_area = (boxB->x2 - boxB->x1) * (boxB->y2 - boxB->y1);
+      if (A_area < B_area) return 1;
+      if (A_area > B_area) return -1;
+      return 0;
+    }
+    // Sort in descending order of score
+    if (boxA->score < boxB->score) return 1;
+    if (boxA->score > boxB->score) return -1;
+    return 0;
+}
+
+
+void sort_tensor_boxes_by_score(TensorBoxes* boxes, size_t count)
+{
+    qsort(boxes, count, sizeof(TensorBoxes), compare_scores);
+}
+
+size_t NMS(TensorBoxes* boxes, size_t count, TensorBoxes* output)
+{
+    sort_tensor_boxes_by_score(boxes, count);
+
+    char* suppressed = (char*)calloc(count, sizeof(char)); // 0 = not suppressed, 1 = suppressed
+    size_t output_count = 0;
+
+    for (size_t i = 0; i < count; i++) {
+        if (suppressed[i]) continue; // Skip if the box is suppressed
+
+        output[output_count++] = boxes[i]; // Add the current box to output
+
+        for (size_t j = i + 1; j < count; j++) {
+            if (suppressed[j]) continue; // Skip if already suppressed
+
+            float iou = IoU(boxes[i], boxes[j]);
+            if (iou > IOU_THRESHOLD) {
+                suppressed[j] = 1; // Suppress the box
+            }
+        }
+    }
+
+    free(suppressed);
+    return output_count; // Return the number of boxes kept
+}
\ No newline at end of file
diff --git a/src/develop/tensor_boxes.h b/src/develop/tensor_boxes.h
new file mode 100644
index 000000000000..d94bcebac845
--- /dev/null
+++ b/src/develop/tensor_boxes.h
@@ -0,0 +1,76 @@
+#ifndef TENSOR_BOXES_H
+#define TENSOR_BOXES_H
+
+#ifndef MAX
+#define MAX(a, b) ((a) < (b) ? (b) : (a))
+#endif
+#ifndef MIN
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+#endif
+
+#define IOU_THRESHOLD 0.7
+#define CONF 0.3
+
+#include <stdlib.h>
+#include <stddef.h>
+
+typedef struct
+{
+  float x1;
+  float y1;
+  float x2;
+  float y2;
+  float score;
+  float* mask;
+} TensorBoxes;
+
+
+/**
+ * Calculate the intersection over union between two bounding boxes.
+ *
+ * @param a first bounding box
+ * @param b second bounding box
+ * @return IoU value between 0 and 1
+ */
+float IoU(TensorBoxes a, TensorBoxes b);
+
+/**
+ * Compares two TensorBoxes in terms of their scores and areas.
+ * 
+ * This function is used as a comparison function for sorting an array of TensorBoxes.
+ * The boxes are sorted in descending order of their scores, and then in descending order of their areas.
+ * 
+ * @param a A pointer to the first TensorBoxes to compare.
+ * @param b A pointer to the second TensorBoxes to compare.
+ * @return An integer less than, equal to, or greater than zero if a is considered to be
+ *  respectively less than, equal to, or greater than b.
+ */
+int compare_scores(const void* a, const void* b);
+
+/**
+ * Sort the given array of TensorBoxes in descending order of their score.
+ *
+ * This function does not modify the original array but instead sorts it in-place.
+ * The sorting is done by the qsort function from the standard library
+ *
+ * This function uses the compare_scores function as the comparison function to
+ * compare two TensorBoxes.
+ *
+ * @param boxes The array of TensorBoxes to be sorted.
+ * @param count The number of TensorBoxes in the array.
+ * @see compare_scores
+ */
+void sort_tensor_boxes_by_score(TensorBoxes* boxes, size_t count);
+
+/**
+ * Perform non-maximum suppression on the given boxes.
+ *
+ * @param boxes An array of boxes to perform NMS on.
+ * @param count The number of boxes in the given array.
+ * @param output An array to store the output boxes.
+ *
+ * @return The number of boxes kept after NMS.
+ */
+size_t NMS(TensorBoxes* boxes, size_t count, TensorBoxes* output);
+
+#endif
\ No newline at end of file

From 79a459a7378368b3e3e29bc55781943b3649b025 Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Sat, 1 Feb 2025 12:56:24 +0100
Subject: [PATCH 13/14] updated pixepiple cause half merge not well done

---
 src/develop/pixelpipe_hb.c | 97 +++++++-------------------------------
 1 file changed, 16 insertions(+), 81 deletions(-)

diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index 9da7cbef306f..f458a4d8318f 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -270,8 +270,6 @@ gboolean dt_dev_pixelpipe_init_cached(dt_dev_pixelpipe_t *pipe,
   pipe->input_profile_info = NULL;
   pipe->output_profile_info = NULL;
   pipe->runs = 0;
-  pipe->bcache_data = NULL;
-  pipe->bcache_hash = 0;
   return dt_dev_pixelpipe_cache_init(pipe, entries, size, memlimit);
 }
 
@@ -332,7 +330,6 @@ void dt_dev_pixelpipe_cleanup(dt_dev_pixelpipe_t *pipe)
   dt_dev_pixelpipe_cleanup_nodes(pipe);
   // so now it's safe to clean up cache:
   dt_dev_pixelpipe_cache_cleanup(pipe);
-  dt_free_align(pipe->bcache_data);
 
   pipe->icc_type = DT_COLORSPACE_NONE;
   g_free(pipe->icc_filename);
@@ -1093,7 +1090,7 @@ static gboolean _request_color_pick(dt_dev_pixelpipe_t *pipe,
 static inline gboolean _piece_may_tile(const dt_dev_pixelpipe_iop_t *piece)
 {
   return piece->process_tiling_ready
-        && !(piece->pipe->want_detail_mask && piece->module->flags() & IOP_FLAGS_WRITE_DETAILS);
+        && !(piece->pipe->want_detail_mask && piece->module->flags());
 }
 
 static void _collect_histogram_on_CPU(dt_dev_pixelpipe_t *pipe,
@@ -1162,16 +1159,6 @@ static inline gboolean _piece_fast_blend(const dt_dev_pixelpipe_iop_t *piece,
       && _transform_for_blend(module, piece);
 }
 
-static inline float *_get_fast_blendcache(const size_t nfloats,
-                                          const dt_hash_t phash,
-                                          dt_dev_pixelpipe_t *pipe)
-{
-  dt_free_align(pipe->bcache_data);
-  pipe->bcache_data = dt_alloc_align_float(nfloats);
-  pipe->bcache_hash = phash;
-  return pipe->bcache_data;
-}
-
 static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
                                           dt_develop_t *dev,
                                           float *input,
@@ -1355,33 +1342,19 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
                      roi_in->width, roi_in->height, in_bpp,
                      TRUE, dt_dev_pixelpipe_type_to_str(pipe->type));
 
-  const gboolean relevant = _piece_fast_blend(piece, module);
-  const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0;
-  const size_t nfloats = bpp * roi_out->width * roi_out->height / sizeof(float);
-  const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE;
+  _piece_fast_blend(piece, module);
 
   if(!fitting && _piece_may_tile(piece))
   {
     dt_print_pipe(DT_DEBUG_PIPE,
-                        bcaching ? "from focus cache" : "process tiles",
+                        "process tiles",
                         pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s",
                         dt_iop_colorspace_to_name(cst_to),
                         cst_to != cst_out ? " -> " : "",
                         cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "");
 
-    if(bcaching)
-    {
-      dt_iop_image_copy(*output, pipe->bcache_data, nfloats);
-    }
-    else
-    {
-      module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp);
-      if(relevant)
-      {
-        float *cache = _get_fast_blendcache(nfloats, phash, pipe);
-        if(cache) dt_iop_image_copy(cache, *output, nfloats);
-      }
-    }
+    module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp);
+    
     *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU
                         | PIXELPIPE_FLOW_PROCESSED_WITH_TILING);
     *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU);
@@ -1389,7 +1362,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
   else
   {
     dt_print_pipe(DT_DEBUG_PIPE,
-       bcaching ? "from focus cache" : "process",
+       "process",
        pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s%s %.fMB",
        dt_iop_colorspace_to_name(cst_to),
        cst_to != cst_out ? " -> " : "",
@@ -1429,19 +1402,7 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
       }
     }
 
-    if(bcaching)
-    {
-      dt_iop_image_copy(*output, pipe->bcache_data, nfloats);
-    }
-    else
-    {
-      module->process(module, piece, input, *output, roi_in, roi_out);
-      if(relevant)
-      {
-        float *cache = _get_fast_blendcache(nfloats, phash, pipe);
-        if(cache) dt_iop_image_copy(cache, *output, nfloats);
-      }
-    }
+    module->process(module, piece, input, *output, roi_in, roi_out);
 
     *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU);
     *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU
@@ -2052,12 +2013,10 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            meaningful messages in case of error */
         if(success_opencl)
         {
-          const gboolean relevant = _piece_fast_blend(piece, module);
-          const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0;
-          const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE;
+          _piece_fast_blend(piece, module);
 
           dt_print_pipe(DT_DEBUG_PIPE,
-                        bcaching ? "from focus cache" : "process",
+                        "process",
                         pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s %.1fMB",
                         dt_iop_colorspace_to_name(cst_to),
                         cst_to != cst_out ? " -> " : "",
@@ -2112,19 +2071,8 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
 
           cl_int err = CL_SUCCESS;
 
-          if(bcaching)
-          {
-            err = dt_opencl_write_host_to_device(pipe->devid, pipe->bcache_data, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
-          }
-          else
-          {
-            err = module->process_cl(module, piece, cl_mem_input, *cl_mem_output, &roi_in, roi_out);
-            if(relevant && (err == CL_SUCCESS))
-            {
-              float *cache = _get_fast_blendcache(out_bpp * roi_out->width * roi_out->height / sizeof(float), phash, pipe);
-              if(cache) err = dt_opencl_read_host_from_device(pipe->devid, cache, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
-            }
-          }
+          err = module->process_cl(module, piece, cl_mem_input, *cl_mem_output, &roi_in, roi_out);
+          
           success_opencl = (err == CL_SUCCESS);
 
           if(!success_opencl)
@@ -2332,11 +2280,10 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
            meaningful messages in case of error */
         if(success_opencl)
         {
-          const gboolean relevant = _piece_fast_blend(piece, module);
-          const dt_hash_t phash = relevant ? _piece_process_hash(piece, roi_out, module) : 0;
-          const gboolean bcaching = relevant ? pipe->bcache_data && phash == pipe->bcache_hash : FALSE;
+          _piece_fast_blend(piece, module);
+
           dt_print_pipe(DT_DEBUG_PIPE,
-                        bcaching ? "from focus cache" : "process tiled",
+                        "process",
                         pipe, module, pipe->devid, &roi_in, roi_out, "%s%s%s",
                         dt_iop_colorspace_to_name(cst_to),
                         cst_to != cst_out ? " -> " : "",
@@ -2344,20 +2291,8 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
 
           cl_int err = CL_SUCCESS;
 
-          if(bcaching)
-          {
-            err = dt_opencl_write_host_to_device(pipe->devid, pipe->bcache_data, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
-          }
-          else
-          {
-            err = module->process_tiling_cl(module, piece, input, *output, &roi_in, roi_out, in_bpp);
-            if(relevant && (err == CL_SUCCESS))
-            {
-              float *cache = _get_fast_blendcache(out_bpp * roi_out->width * roi_out->height / sizeof(float), phash, pipe);
-              if(cache)
-                err = dt_opencl_read_host_from_device(pipe->devid, cache, *cl_mem_output, roi_out->width, roi_out->height, out_bpp);
-            }
-          }
+          err = module->process_tiling_cl(module, piece, input, *output, &roi_in, roi_out, in_bpp);
+          
           success_opencl = (err == CL_SUCCESS);
 
           if(!success_opencl)

From 2c2b21e9d7658710c6c9dcfb3651bb507f2edcba Mon Sep 17 00:00:00 2001
From: MikoMikarro <lopezcuestam@gmail.com>
Date: Sat, 1 Feb 2025 21:04:42 +0100
Subject: [PATCH 14/14] recover AI button from previous commit

---
 src/develop/blend.h     | 2 +-
 src/develop/blend_gui.c | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/develop/blend.h b/src/develop/blend.h
index cc8a690903e2..cda07d5f5454 100644
--- a/src/develop/blend.h
+++ b/src/develop/blend.h
@@ -283,7 +283,7 @@ extern const dt_introspection_type_enum_tuple_t dt_develop_combine_masks_names[]
 extern const dt_introspection_type_enum_tuple_t dt_develop_feathering_guide_names[];
 extern const dt_introspection_type_enum_tuple_t dt_develop_invert_mask_names[];
 
-#define DEVELOP_MASKS_NB_SHAPES 5
+#define DEVELOP_MASKS_NB_SHAPES 6
 
 /** blend gui data */
 typedef struct dt_iop_gui_blend_data_t
diff --git a/src/develop/blend_gui.c b/src/develop/blend_gui.c
index b5cf3202b031..cad9e15fdc45 100644
--- a/src/develop/blend_gui.c
+++ b/src/develop/blend_gui.c
@@ -2831,6 +2831,14 @@ void dt_iop_gui_init_masks(GtkWidget *blendw, dt_iop_module_t *module)
                                                   FALSE, 0, 0,
                                                   dtgtk_cairo_paint_masks_brush, abox);
 
+    bd->masks_type[5] = DT_MASKS_POINT;
+    bd->masks_shapes[5] = dt_iop_togglebutton_new(module, "blend`shapes",
+                                                  N_("add point"),
+                                                  N_("add multiple points"),
+                                                  G_CALLBACK(_blendop_masks_add_shape),
+                                                  FALSE, 0, 0,
+                                                  dtgtk_cairo_paint_masks_ai, abox);
+
     bd->masks_type[1] = DT_MASKS_PATH;
     bd->masks_shapes[1] = dt_iop_togglebutton_new(module, "blend`shapes",
                                                   N_("add path"),