diff --git a/src/Modules.js b/src/Modules.js index 6dc0bdc5..c1163c7a 100644 --- a/src/Modules.js +++ b/src/Modules.js @@ -24,6 +24,7 @@ module.exports = { 'dynamic': require('./modules/Dynamic'), 'edge-detect': require('./modules/EdgeDetect'), 'exposure': require('./modules/Exposure'), + 'face-detection': require('./modules/FaceDetection'), 'flip-image': require('./modules/FlipImage'), 'fisheye-gl': require('./modules/FisheyeGl'), 'histogram': require('./modules/Histogram'), diff --git a/src/modules/FaceDetection/FaceDetection.js b/src/modules/FaceDetection/FaceDetection.js new file mode 100644 index 00000000..67debcec --- /dev/null +++ b/src/modules/FaceDetection/FaceDetection.js @@ -0,0 +1,278 @@ +/* This library is released under the MIT license, see https://github.com/tehnokv/picojs */ +pico = {}; + +pico.unpack_cascade = function(bytes) +{ + // + const dview = new DataView(new ArrayBuffer(4)); + /* + we skip the first 8 bytes of the cascade file + (cascade version number and some data used during the learning process) + */ + let p = 8; + /* + read the depth (size) of each tree first: a 32-bit signed integer + */ + dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]); + const tdepth = dview.getInt32(0, true); + p = p + 4; + /* + next, read the number of trees in the cascade: another 32-bit signed integer + */ + dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]); + const ntrees = dview.getInt32(0, true); + p = p + 4; + /* + read the actual trees and cascade thresholds + */ + const tcodes_ls = []; + const tpreds_ls = []; + const thresh_ls = []; + for(let t = 0; t < ntrees; ++t) + { + // read the binary tests placed in internal tree nodes + Array.prototype.push.apply(tcodes_ls, [0, 0, 0, 0]); + Array.prototype.push.apply(tcodes_ls, bytes.slice(p, p + 4 * Math.pow(2, tdepth) - 4)); + p = p + 4 * Math.pow(2, tdepth) - 4; + // read the prediction in the leaf nodes of the tree + for(let i = 0; i < Math.pow(2, tdepth); ++i) + { + dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]); + tpreds_ls.push(dview.getFloat32(0, true)); + p = p + 4; + } + // read the threshold + dview.setUint8(0, bytes[p + 0]), dview.setUint8(1, bytes[p + 1]), dview.setUint8(2, bytes[p + 2]), dview.setUint8(3, bytes[p + 3]); + thresh_ls.push(dview.getFloat32(0, true)); + p = p + 4; + } + const tcodes = new Int8Array(tcodes_ls); + const tpreds = new Float32Array(tpreds_ls); + const thresh = new Float32Array(thresh_ls); + /* + construct the classification function from the read data + */ + function classify_region(r, c, s, pixels, ldim) + { + r = 256 * r; + c = 256 * c; + let root = 0; + let o = 0.0; + const pow2tdepth = Math.pow(2, tdepth) >> 0; // '>>0' transforms this number to int + + for(let i = 0; i < ntrees; ++i) + { + idx = 1; + for(let j = 0; j < tdepth; ++j) + // we use '>> 8' here to perform an integer division: this seems important for performance + idx = 2 * idx + (pixels[((r + tcodes[root + 4 * idx + 0] * s) >> 8) * ldim + ((c + tcodes[root + 4 * idx + 1] * s) >> 8)] <= pixels[((r + tcodes[root + 4 * idx + 2] * s) >> 8) * ldim + ((c + tcodes[root + 4 * idx + 3] * s) >> 8)]); + + o = o + tpreds[pow2tdepth * i + idx - pow2tdepth]; + + if(o <= thresh[i]) + return -1; + + root += 4 * pow2tdepth; + } + return o - thresh[ntrees - 1]; + } + /* + we're done + */ + return classify_region; +}; + +pico.run_cascade = function(image, classify_region, params) +{ + const pixels = image.pixels; + const nrows = image.nrows; + const ncols = image.ncols; + const ldim = image.ldim; + + const shiftfactor = params.shiftfactor; + const minsize = params.minsize; + const maxsize = params.maxsize; + const scalefactor = params.scalefactor; + + let scale = minsize; + const detections = []; + + while(scale <= maxsize) + { + const step = Math.max(shiftfactor * scale, 1) >> 0; // '>>0' transforms this number to int + const offset = (scale / 2 + 1) >> 0; + + for(let r = offset; r <= nrows - offset; r += step) + for(let c = offset; c <= ncols - offset; c += step) + { + const q = classify_region(r, c, scale, pixels, ldim); + if (q > 0.0) + detections.push([r, c, scale, q]); + } + + scale = scale * scalefactor; + } + + return detections; +}; + +pico.cluster_detections = function(dets, iouthreshold) +{ + /* + sort detections by their score + */ + dets = dets.sort(function(a, b) { + return b[3] - a[3]; + }); + /* + this helper function calculates the intersection over union for two detections + */ + function calculate_iou(det1, det2) + { + // unpack the position and size of each detection + const r1 = det1[0], c1 = det1[1], s1 = det1[2]; + const r2 = det2[0], c2 = det2[1], s2 = det2[2]; + // calculate detection overlap in each dimension + const overr = Math.max(0, Math.min(r1 + s1 / 2, r2 + s2 / 2) - Math.max(r1 - s1 / 2, r2 - s2 / 2)); + const overc = Math.max(0, Math.min(c1 + s1 / 2, c2 + s2 / 2) - Math.max(c1 - s1 / 2, c2 - s2 / 2)); + // calculate and return IoU + return overr * overc / (s1 * s1 + s2 * s2 - overr * overc); + } + /* + do clustering through non-maximum suppression + */ + const assignments = new Array(dets.length).fill(0); + const clusters = []; + for(let i = 0; i < dets.length; ++i) + { + // is this detection assigned to a cluster? + if(assignments[i] == 0) + { + // it is not: + // now we make a cluster out of it and see whether some other detections belong to it + let r = 0.0, c = 0.0, s = 0.0, q = 0.0, n = 0; + for(let j = i; j < dets.length; ++j) + if(calculate_iou(dets[i], dets[j]) > iouthreshold) + { + assignments[j] = 1; + r = r + dets[j][0]; + c = c + dets[j][1]; + s = s + dets[j][2]; + q = q + dets[j][3]; + n = n + 1; + } + // make a cluster representative + clusters.push([r / n, c / n, s / n, q]); + } + } + + return clusters; +}; + +pico.instantiate_detection_memory = function(size) +{ + /* + initialize a circular buffer of `size` elements + */ + let n = 0; + const memory = []; + for(let i = 0; i < size; ++i) + memory.push([]); + /* + build a function that: + (1) inserts the current frame's detections into the buffer; + (2) merges all detections from the last `size` frames and returns them + */ + function update_memory(dets) + { + memory[n] = dets; + n = (n + 1) % memory.length; + dets = []; + for(i = 0; i < memory.length; ++i) + dets = dets.concat(memory[i]); + // + return dets; + } + /* + we're done + */ + return update_memory; +}; + +function rgba_to_grayscale(rgba, nrows, ncols) { + var gray = new Uint8Array(nrows * ncols); + for(var r = 0; r < nrows; ++r) + for(var c = 0; c < ncols; ++c) + // gray = 0.2*red + 0.7*green + 0.1*blue + gray[r * ncols + c] = (2 * rgba[r * 4 * ncols + 4 * c + 0] + 7 * rgba[r * 4 * ncols + 4 * c + 1] + 1 * rgba[r * 4 * ncols + 4 * c + 2]) / 10; + return gray; +} + + +///////////////////////////////////////////////////////////////////////////////////////// + + +module.exports = exports = function(pixels, cb){ + const pixelSetter = require('../../util/pixelSetter.js'); + var facefinder_classify_region = function(r, c, s, pixels, ldim) {return -1.0;}; + var drawSide = function(startX, startY, endX, endY){ + for (var n = startX; n <= endX + 1; n++){ + for (var k = startY; k <= endY + 1; k++){ + + pixelSetter(n, k, [25, 25, 25, 255], pixels); // To remove 4th channel - pixels.set(n, k, 3, color[3]); + } + } + }; + var cascadeurl = 'https://raw.githubusercontent.com/nenadmarkus/pico/c2e81f9d23cc11d1a612fd21e4f9de0921a5d0d9/rnt/cascades/facefinder'; + color = 'rgba(20,20,20,1)'; + color = color.substring(color.indexOf('(') + 1, color.length - 1); // Extract only the values from rgba(_,_,_,_) + color = color.split(','); + fetch(cascadeurl).then(function(response) { + response.arrayBuffer().then(function(buffer) { + var bytes = new Int8Array(buffer); + facefinder_classify_region = pico.unpack_cascade(bytes); + console.log('* cascade loaded'); + image = { + 'pixels': rgba_to_grayscale(pixels.data, pixels.shape[1], pixels.shape[0]), + 'nrows': pixels.shape[1], + 'ncols': pixels.shape[0], + 'ldim': pixels.shape[0] + }; + params = { + 'shiftfactor': 0.1, // move the detection window by 10% of its size + 'minsize': 20, // minimum size of a face (not suitable for real-time detection, set it to 100 in that case) + 'maxsize': 1000, // maximum size of a face + 'scalefactor': 1.1 // for multiscale processing: resize the detection window by 10% when moving to the higher scale + }; + dets = pico.run_cascade(image, facefinder_classify_region, params); + dets = pico.cluster_detections(dets, 0.2); // set IoU threshold to 0.2 + qthresh = 5.0; // this constant is empirical: other cascades might require a different one + + for(i = 0; i < dets.length; ++i){ + // check the detection score + // if it's above the threshold, draw it + if(dets[i][3] > qthresh) + { + var ox = parseInt(dets[i][1] - dets[i][2] / 2); + var oy = parseInt(dets[i][0] - dets[i][2] / 2); + var ex = parseInt(dets[i][1] + dets[i][2] / 2) - 1; + var ey = parseInt(dets[i][0] + dets[i][2] / 2) - 1; + + drawSide(ox, oy, ox, ey); // Left + drawSide(ex, oy, ex, ey); // Right + drawSide(ox, oy, ex, oy); // Top + drawSide(ox, ey, ex, ey); // Bottom + } + } + + if (cb) cb(); + + }); + }); + + + + + + +}; \ No newline at end of file diff --git a/src/modules/FaceDetection/Module.js b/src/modules/FaceDetection/Module.js new file mode 100644 index 00000000..a16b6d96 --- /dev/null +++ b/src/modules/FaceDetection/Module.js @@ -0,0 +1,44 @@ + +module.exports = function FaceDetection(options, UI){ + + var output; + + function draw(input, callback, progressObj) { + + progressObj.stop(true); + progressObj.overrideFlag = true; + + var step = this; + + function extraManipulation(pixels, setRenderState, generateOutput){ + setRenderState(false); + require('./FaceDetection')(pixels, () => { + // alert("yo") + setRenderState(true); + generateOutput(); + }); + } + + function output(image, datauri, mimetype, wasmSuccess) { + step.output = { src: datauri, format: mimetype, wasmSuccess, useWasm: options.useWasm }; + } + + return require('../_nomodule/PixelManipulation.js')(input, { + output: output, + extraManipulation: extraManipulation, + format: input.format, + image: options.image, + inBrowser: options.inBrowser, + callback: callback + }); + + } + + return { + options: options, + draw: draw, + output: output, + UI: UI + }; +}; + \ No newline at end of file diff --git a/src/modules/FaceDetection/index.js b/src/modules/FaceDetection/index.js new file mode 100644 index 00000000..71549002 --- /dev/null +++ b/src/modules/FaceDetection/index.js @@ -0,0 +1,4 @@ +module.exports = [ + require('./Module'), + require('./info.json') +]; \ No newline at end of file diff --git a/src/modules/FaceDetection/info.json b/src/modules/FaceDetection/info.json new file mode 100644 index 00000000..b23e2b67 --- /dev/null +++ b/src/modules/FaceDetection/info.json @@ -0,0 +1,6 @@ +{ + "name": "Face Detection", + "description": "Detect faces in given image", + "inputs": {}, + "docs-link":"" +}