Merge 0ce08941ad into b78390c2ab

2025-12-11 19:00:00 +01:00 · 2025-03-01 15:35:52 +00:00
parent b78390c2ab 0ce08941ad
commit e66e949ea5
5 changed files with 333 additions and 0 deletions
--- a/src/Modules.js
+++ b/src/Modules.js
@@ -24,6 +24,7 @@ module.exports = {
  'dynamic': require('./modules/Dynamic'),
  'edge-detect': require('./modules/EdgeDetect'),
  'exposure': require('./modules/Exposure'),
+  'face-detection': require('./modules/FaceDetection'),
  'flip-image': require('./modules/FlipImage'),
  'fisheye-gl': require('./modules/FisheyeGl'),
  'histogram': require('./modules/Histogram'),
--- a/src/modules/FaceDetection/FaceDetection.js
+++ b/src/modules/FaceDetection/FaceDetection.js
@@ -0,0 +1,278 @@
+/* This library is released under the MIT license, see https://github.com/tehnokv/picojs */
+pico = {};
+
+pico.unpack_cascade = function(bytes)
+{
+  //
+  const dview = new DataView(new ArrayBuffer(4));
+  /*
+		we skip the first 8 bytes of the cascade file
+		(cascade version number and some data used during the learning process)
+	*/
+  let p = 8;
+  /*
+		read the depth (size) of each tree first: a 32-bit signed integer
+	*/
+  dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]);
+  const tdepth = dview.getInt32(0, true);
+  p = p + 4;
+  /*
+		next, read the number of trees in the cascade: another 32-bit signed integer
+	*/
+  dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]);
+  const ntrees = dview.getInt32(0, true);
+  p = p + 4;
+  /*
+		read the actual trees and cascade thresholds
+	*/
+  const tcodes_ls = [];
+  const tpreds_ls = [];
+  const thresh_ls = [];
+  for(let t = 0; t < ntrees; ++t)
+  {
+    // read the binary tests placed in internal tree nodes
+    Array.prototype.push.apply(tcodes_ls, [0, 0, 0, 0]);
+    Array.prototype.push.apply(tcodes_ls, bytes.slice(p, p + 4 * Math.pow(2, tdepth) - 4));
+    p = p + 4 * Math.pow(2, tdepth) - 4;
+    // read the prediction in the leaf nodes of the tree
+    for(let i = 0; i < Math.pow(2, tdepth); ++i)
+    {
+      dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]);
+      tpreds_ls.push(dview.getFloat32(0, true));
+      p = p + 4;
+    }
+    // read the threshold
+    dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]);
+    thresh_ls.push(dview.getFloat32(0, true));
+    p = p + 4;
+  }
+  const tcodes = new Int8Array(tcodes_ls);
+  const tpreds = new Float32Array(tpreds_ls);
+  const thresh = new Float32Array(thresh_ls);
+  /*
+		construct the classification function from the read data
+	*/
+  function classify_region(r, c, s, pixels, ldim)
+  {
+		 r = 256 * r;
+		 c = 256 * c;
+		 let root = 0;
+		 let o = 0.0;
+		 const pow2tdepth = Math.pow(2, tdepth) >> 0; // '>>0' transforms this number to int
+
+		 for(let i = 0; i < ntrees; ++i)
+		 {
+      idx = 1;
+      for(let j = 0; j < tdepth; ++j)
+      // we use '>> 8' here to perform an integer division: this seems important for performance
+        idx = 2 * idx + (pixels[((r + tcodes[root + 4 * idx + 0] * s) >> 8) * ldim + ((c + tcodes[root + 4 * idx + 1] * s) >> 8)] <= pixels[((r + tcodes[root + 4 * idx + 2] * s) >> 8) * ldim + ((c + tcodes[root + 4 * idx + 3] * s) >> 8)]);
+
+			 o = o + tpreds[pow2tdepth * i + idx - pow2tdepth];
+
+			 if(o <= thresh[i])
+				 return -1;
+
+			 root += 4 * pow2tdepth;
+    }
+    return o - thresh[ntrees - 1];
+  }
+  /*
+		we're done
+	*/
+  return classify_region;
+};
+
+pico.run_cascade = function(image, classify_region, params)
+{
+  const pixels = image.pixels;
+  const nrows = image.nrows;
+  const ncols = image.ncols;
+  const ldim = image.ldim;
+
+  const shiftfactor = params.shiftfactor;
+  const minsize = params.minsize;
+  const maxsize = params.maxsize;
+  const scalefactor = params.scalefactor;
+
+  let scale = minsize;
+  const detections = [];
+
+  while(scale <= maxsize)
+  {
+    const step = Math.max(shiftfactor * scale, 1) >> 0; // '>>0' transforms this number to int
+    const offset = (scale / 2 + 1) >> 0;
+
+    for(let r = offset; r <= nrows - offset; r += step)
+      for(let c = offset; c <= ncols - offset; c += step)
+      {
+        const q = classify_region(r, c, scale, pixels, ldim);
+        if (q > 0.0)
+          detections.push([r, c, scale, q]);
+      }
+		
+    scale = scale * scalefactor;
+  }
+
+  return detections;
+};
+
+pico.cluster_detections = function(dets, iouthreshold)
+{
+  /*
+		sort detections by their score
+	*/
+  dets = dets.sort(function(a, b) {
+    return b[3] - a[3];
+  });
+  /*
+		this helper function calculates the intersection over union for two detections
+	*/
+  function calculate_iou(det1, det2)
+  {
+    // unpack the position and size of each detection
+    const r1 = det1[0], c1 = det1[1], s1 = det1[2];
+    const r2 = det2[0], c2 = det2[1], s2 = det2[2];
+    // calculate detection overlap in each dimension
+    const overr = Math.max(0, Math.min(r1 + s1 / 2, r2 + s2 / 2) - Math.max(r1 - s1 / 2, r2 - s2 / 2));
+    const overc = Math.max(0, Math.min(c1 + s1 / 2, c2 + s2 / 2) - Math.max(c1 - s1 / 2, c2 - s2 / 2));
+    // calculate and return IoU
+    return overr * overc / (s1 * s1 + s2 * s2 - overr * overc);
+  }
+  /*
+		do clustering through non-maximum suppression
+	*/
+  const assignments = new Array(dets.length).fill(0);
+  const clusters = [];
+  for(let i = 0; i < dets.length; ++i)
+  {
+    // is this detection assigned to a cluster?
+    if(assignments[i] == 0)
+    {
+      // it is not:
+      // now we make a cluster out of it and see whether some other detections belong to it
+      let r = 0.0, c = 0.0, s = 0.0, q = 0.0, n = 0;
+      for(let j = i; j < dets.length; ++j)
+        if(calculate_iou(dets[i], dets[j]) > iouthreshold)
+        {
+          assignments[j] = 1;
+          r = r + dets[j][0];
+          c = c + dets[j][1];
+          s = s + dets[j][2];
+          q = q + dets[j][3];
+          n = n + 1;
+        }
+      // make a cluster representative
+      clusters.push([r / n, c / n, s / n, q]);
+    }
+  }
+
+  return clusters;
+};
+
+pico.instantiate_detection_memory = function(size)
+{
+  /*
+		initialize a circular buffer of `size` elements
+	*/
+  let n = 0;
+  const memory = [];
+  for(let i = 0; i < size; ++i)
+    memory.push([]);
+  /*
+		build a function that:
+		(1) inserts the current frame's detections into the buffer;
+		(2) merges all detections from the last `size` frames and returns them
+	*/
+  function update_memory(dets)
+  {
+    memory[n] = dets;
+    n = (n + 1) % memory.length;
+    dets = [];
+    for(i = 0; i < memory.length; ++i)
+      dets = dets.concat(memory[i]);
+    //
+    return dets;
+  }
+  /*
+		we're done
+	*/
+  return update_memory;
+};
+
+function rgba_to_grayscale(rgba, nrows, ncols) {
+  var gray = new Uint8Array(nrows * ncols);
+  for(var r = 0; r < nrows; ++r)
+    for(var c = 0; c < ncols; ++c)
+    // gray = 0.2*red + 0.7*green + 0.1*blue
+      gray[r * ncols + c] = (2 * rgba[r * 4 * ncols + 4 * c + 0] + 7 * rgba[r * 4 * ncols + 4 * c + 1] + 1 * rgba[r * 4 * ncols + 4 * c + 2]) / 10;
+  return gray;
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+
+module.exports = exports = function(pixels, cb){
+  const pixelSetter = require('../../util/pixelSetter.js');
+  var facefinder_classify_region = function(r, c, s, pixels, ldim) {return -1.0;};
+  var drawSide = function(startX, startY, endX, endY){
+    for (var n = startX; n <= endX + 1; n++){
+      for (var k = startY; k <= endY + 1; k++){
+    
+        pixelSetter(n, k, [25, 25, 25, 255], pixels); // To remove 4th channel - pixels.set(n, k, 3, color[3]);
+      }
+    }
+  };
+  var cascadeurl = 'https://raw.githubusercontent.com/nenadmarkus/pico/c2e81f9d23cc11d1a612fd21e4f9de0921a5d0d9/rnt/cascades/facefinder';
+  color = 'rgba(20,20,20,1)';
+  color = color.substring(color.indexOf('(') + 1, color.length - 1); // Extract only the values from rgba(_,_,_,_)
+  color = color.split(',');
+  fetch(cascadeurl).then(function(response) {
+    response.arrayBuffer().then(function(buffer) {
+      var bytes = new Int8Array(buffer);
+      facefinder_classify_region = pico.unpack_cascade(bytes);
+      console.log('* cascade loaded');
+      image = {
+        'pixels': rgba_to_grayscale(pixels.data, pixels.shape[1], pixels.shape[0]),
+        'nrows': pixels.shape[1],
+        'ncols': pixels.shape[0],
+        'ldim': pixels.shape[0]
+      };
+      params = {
+        'shiftfactor': 0.1, // move the detection window by 10% of its size
+        'minsize': 20,      // minimum size of a face (not suitable for real-time detection, set it to 100 in that case)
+        'maxsize': 1000,    // maximum size of a face
+        'scalefactor': 1.1  // for multiscale processing: resize the detection window by 10% when moving to the higher scale
+      };
+      dets = pico.run_cascade(image, facefinder_classify_region, params);
+      dets = pico.cluster_detections(dets, 0.2); // set IoU threshold to 0.2
+      qthresh = 5.0; // this constant is empirical: other cascades might require a different one
+            
+      for(i = 0; i < dets.length; ++i){
+        // check the detection score
+        // if it's above the threshold, draw it
+        if(dets[i][3] > qthresh)
+        {
+          var ox = parseInt(dets[i][1] - dets[i][2] / 2);
+          var oy = parseInt(dets[i][0] - dets[i][2] / 2);
+          var ex = parseInt(dets[i][1] + dets[i][2] / 2) - 1;
+          var ey = parseInt(dets[i][0] + dets[i][2] / 2) - 1;
+                    
+          drawSide(ox, oy, ox, ey); // Left
+          drawSide(ex, oy, ex, ey); // Right
+          drawSide(ox, oy, ex, oy); // Top
+          drawSide(ox, ey, ex, ey); // Bottom
+        }
+      }
+            
+      if (cb) cb();
+
+    });
+  });
+
+
+
+    
+   
+    
+};
--- a/src/modules/FaceDetection/Module.js
+++ b/src/modules/FaceDetection/Module.js
@@ -0,0 +1,44 @@
+
+module.exports = function FaceDetection(options, UI){
+
+  var output;
+    
+  function draw(input, callback, progressObj) {
+    
+    progressObj.stop(true);
+    progressObj.overrideFlag = true;
+    
+    var step = this;
+  
+    function extraManipulation(pixels, setRenderState, generateOutput){
+      setRenderState(false);
+      require('./FaceDetection')(pixels, () => {
+        // alert("yo")
+        setRenderState(true);
+        generateOutput();
+      });
+    }
+    
+    function output(image, datauri, mimetype, wasmSuccess) {
+      step.output = { src: datauri, format: mimetype, wasmSuccess, useWasm: options.useWasm };
+    }
+    
+    return require('../_nomodule/PixelManipulation.js')(input, {
+      output: output,
+      extraManipulation: extraManipulation,
+      format: input.format,
+      image: options.image,
+      inBrowser: options.inBrowser,
+      callback: callback
+    });
+    
+  }
+    
+  return {
+    options: options,
+    draw: draw,
+    output: output,
+    UI: UI
+  };
+};
+  
--- a/src/modules/FaceDetection/index.js
+++ b/src/modules/FaceDetection/index.js
@@ -0,0 +1,4 @@
+module.exports = [
+  require('./Module'),
+  require('./info.json')
+];
--- a/src/modules/FaceDetection/info.json
+++ b/src/modules/FaceDetection/info.json
@@ -0,0 +1,6 @@
+{
+    "name": "Face Detection",
+    "description": "Detect faces in given image",
+    "inputs": {},
+    "docs-link":""
+}