看见网上有一个 MTCNN 的Java 实现,闲来无事,把Java 实现用Koltin 重写了一下,主要是针对使用MTCNN来识别人脸。
关于MTCNN 的算法说明有很多,百度一下就出来了,没有必要再花大把时间去描述这个算法。MTCNN 在人脸识别算法中,算是对Android 比较友好的。dlib 需要大量的 C++代码,opencv更是如此。
当然 大神用的是 python 实现了一个
首先要感谢Google 能够提供一个在android 上使用的 机器学习框架,这样才能将后端的机器学习所产生的模型,应用在Android层面。
要使用MTCNN 首先要知道的一点,MTCNN 训练出来的模型,可以转存成PB文件供tensorflow加载使用。
pb文件大约20多M,文件在我的Github demo项目中有,大家可以clone 之后 获取到项目中使用的MTCNN pb 模型文件。
Android 依赖配置
既然要使用Android 中的tensorflow ,那肯定是要配置依赖的,不然怎么用?
implementation 'org.tensorflow:tensorflow-android:1.13.1'
MTCNN kotlin
直接上代码 不罗嗦
class MTCNN(assetManager: AssetManager){
private val factor = 0.709f
private val pNetThreshold = 0.6f
private val rNetThreshold = 0.7f
private val outputNetThreshold = 0.7f
private val modelFile = "mtcnn_freezed_model.pb"
//tensor name
private val pNetInName = "pnet/input:0"
private val pNetOutName = arrayOf("pnet/prob1:0", "pnet/conv4-2/BiasAdd:0")
private val rNetInName = "rnet/input:0"
private val rNetOutName = arrayOf("rnet/prob1:0", "rnet/conv5-2/conv5-2:0")
private val outputNetInName = "onet/input:0"
private val outputNetOutName = arrayOf("onet/prob1:0", "onet/conv6-2/conv6-2:0", "onet/conv6-3/conv6-3:0")
var lastProcessTime: Long = 0 //最后一张图片处理的时间ms
private var inferenceInterface: TensorFlowInferenceInterface = TensorFlowInferenceInterface(assetManager, modelFile)
private val TAG = "MTCNN"
//读取Bitmap像素值,预处理(-127.5 /128),转化为一维数组返回
private fun normalizeImage(bitmap: Bitmap): FloatArray {
val w = bitmap.width
val h = bitmap.height
val floatValues = FloatArray(w * h * 3)
val intValues = IntArray(w * h)
bitmap.getPixels(intValues, 0, bitmap.width, 0, 0, bitmap.width, bitmap.height)
val imageMean = 127.5f
val imageStd = 128f
for (i in intValues.indices) {
val `val` = intValues[i]
floatValues[i * 3 + 0] = ((`val` shr 16 and 0xFF) - imageMean) / imageStd
floatValues[i * 3 + 1] = ((`val` shr 8 and 0xFF) - imageMean) / imageStd
floatValues[i * 3 + 2] = ((`val` and 0xFF) - imageMean) / imageStd
return floatValues
private fun bitmapResize(bm: Bitmap, scale: Float): Bitmap {
val width = bm.width
val height = bm.height
val matrix = Matrix()
matrix.postScale(scale, scale)
return Bitmap.createBitmap(
bm, 0, 0, width, height, matrix, true
private fun proposalNetForward(
bitmap: Bitmap,
pNetOutProb: Array<FloatArray>,
pNetOutBias: Array<Array<FloatArray>>
): Int {
val w = bitmap.width
val h = bitmap.height
val pNetIn = normalizeImage(bitmap)
PicUtils.flipDiag(pNetIn, h, w, 3) //沿着对角线翻转
inferenceInterface.feed(pNetInName, pNetIn, 1, w.toLong(), h.toLong(), 3)
inferenceInterface.run(pNetOutName, false)
val pNetOutSizeW = Math.ceil(w * 0.5 - 5).toInt()
val pNetOutSizeH = Math.ceil(h * 0.5 - 5).toInt()
val pNetOutP = FloatArray(pNetOutSizeW * pNetOutSizeH * 2)
val pNetOutB = FloatArray(pNetOutSizeW * pNetOutSizeH * 4)
inferenceInterface.fetch(pNetOutName[0], pNetOutP)
inferenceInterface.fetch(pNetOutName[1], pNetOutB)
PicUtils.flipDiag(pNetOutP, pNetOutSizeW, pNetOutSizeH, 2)
PicUtils.flipDiag(pNetOutB, pNetOutSizeW, pNetOutSizeH, 4)
PicUtils.expand(pNetOutB, pNetOutBias)
PicUtils.expandProb(pNetOutP, pNetOutProb)
for (int y=0;y<pNetOutSizeH;y++)
for (int x=0;x<pNetOutSizeW;x++){
int idx=pNetOutSizeH*x+y;
for(int i=0;i<4;i++)
return 0
//Non-Maximum Suppression
private fun nms(boxes: Vector<Box>, threshold: Float, method: String) {
//int delete_cnt=0;
val cnt = 0
for (i in 0 until boxes.size) {
val box = boxes[i]
if (!box.deleted) {
for (j in i + 1 until boxes.size) {
val box2 = boxes.get(j)
if (!box2.deleted) {
val x1 = max(box.box[0], box2.box[0])
val y1 = max(box.box[1], box2.box[1])
val x2 = min(box.box[2], box2.box[2])
val y2 = min(box.box[3], box2.box[3])
if (x2 < x1 || y2 < y1) continue
val areaIoU = (x2 - x1 + 1) * (y2 - y1 + 1)
var iou = 0f
if (method == "Union")
iou = 1.0f * areaIoU / (box.area() + box2.area() - areaIoU)
else if (method == "Min") {
iou = 1.0f * areaIoU / min(box.area(), box2.area())
Log.i(TAG, "[*]iou=$iou")
if (iou >= threshold) { //删除prob小的那个框
if (box.score > box2.score)
box2.deleted = true
box.deleted = true
//Log.i(TAG,"[*]sum:"+boxes.size+" delete:"+delete_cnt);
private fun generateBoxes(
prob: Array<FloatArray>,
bias: Array<Array<FloatArray>>,
scale: Float,
threshold: Float,
boxes: Vector<Box>
): Int {
val h = prob.size
val w = prob[0].size
//Log.i(TAG,"[*]height:"+prob.length+" width:"+prob[0].length);
for (y in 0 until h)
for (x in 0 until w) {
val score = prob[y][x]
//only accept prob >threadshold(0.6 here)
if (score > threshold) {
val box = Box()
box.score = score
box.box[0] = Math.round(x * 2 / scale)
box.box[1] = Math.round(y * 2 / scale)
box.box[2] = Math.round((x * 2 + 11) / scale)
box.box[3] = Math.round((y * 2 + 11) / scale)
for (i in 0..3)
box.bbr[i] = bias[y][x][i]
return 0
private fun boundingBoxRegression(boxes: Vector<Box>) {
for (i in 0 until boxes.size)
//Pnet + Bounding Box Regression + Non-Maximum Regression
/* NMS执行完后,才执行Regression
* (1) For each scale , use NMS with threshold=0.5
* (2) For all candidates , use NMS with threshold=0.7
* (3) Calibrate Bounding Box
* 注意:CNN输入图片最上面一行,坐标为[0..width,0]。所以Bitmap需要对折后再跑网络;网络输出同理.
private fun proposalNet(bitmap: Bitmap, minSize: Int): Vector<Box> {
val whMin = min(bitmap.width, bitmap.height)
var currentFaceSize = minSize.toFloat() //currentFaceSize=minSize/(factor^k) k=0,1,2... until excced whMin
val totalBoxes = Vector<Box>()
//【1】Image Paramid and Feed to Pnet
while (currentFaceSize <= whMin) {
val scale = 12.0f / currentFaceSize
//(1)Image Resize
val bm = bitmapResize(bitmap, scale)
val w = bm.width
val h = bm.height
//(2)RUN CNN
val pNetOutSizeW = (Math.ceil(w * 0.5 - 5) + 0.5).toInt()
val pNetOutSizeH = (Math.ceil(h * 0.5 - 5) + 0.5).toInt()
val pNetOutProb = Array(pNetOutSizeH) { FloatArray(pNetOutSizeW) }
val pNetOutBias = Array(pNetOutSizeH) { Array(pNetOutSizeW) { FloatArray(4) } }
proposalNetForward(bm, pNetOutProb, pNetOutBias)
val curBoxes = Vector<Box>()
generateBoxes(pNetOutProb, pNetOutBias, scale, pNetThreshold, curBoxes)
//Log.i(TAG,"[*]CNN Output Box number:"+curBoxes.size+" Scale:"+scale);
//(4)nms 0.5
nms(curBoxes, 0.5f, "Union")
//(5)add to totalBoxes
for (i in 0 until curBoxes.size)
if (!curBoxes[i].deleted)
//Face Size等比递增
currentFaceSize /= factor
//NMS 0.7
nms(totalBoxes, 0.7f, "Union")
return PicUtils.updateBoxes(totalBoxes)
var tmp_bm: Bitmap? = null
private fun cropAndResize(bitmap: Bitmap, box: Box, size: Int, data: FloatArray) {
//(2)crop and resize
val matrix = Matrix()
val scale = 1.0f * size / box.width()
matrix.postScale(scale, scale)
val croped = Bitmap.createBitmap(bitmap, box.left(), box.top(), box.width(), box.height(), matrix, true)
val pixelsBuf = IntArray(size * size)
croped.getPixels(pixelsBuf, 0, croped.width, 0, 0, croped.width, croped.height)
val imageMean = 127.5f
val imageStd = 128f
for (i in pixelsBuf.indices) {
val `val` = pixelsBuf[i]
data[i * 3 + 0] = ((`val` shr 16 and 0xFF) - imageMean) / imageStd
data[i * 3 + 1] = ((`val` shr 8 and 0xFF) - imageMean) / imageStd
data[i * 3 + 2] = ((`val` and 0xFF) - imageMean) / imageStd
* RNET跑神经网络,将score和bias写入boxes
private fun refineNetForward(RNetIn: FloatArray, boxes: Vector<Box>) {
val num = RNetIn.size / 24 / 24 / 3
//feed & run
inferenceInterface.feed(rNetInName, RNetIn, num.toLong(), 24, 24, 3)
inferenceInterface.run(rNetOutName, false)
val rNetP = FloatArray(num * 2)
val rNetB = FloatArray(num * 4)
inferenceInterface.fetch(rNetOutName[0], rNetP)
inferenceInterface.fetch(rNetOutName[1], rNetB)
for (i in 0 until num) {
boxes[i].score = rNetP[i * 2 + 1]
for (j in 0..3)
boxes[i].bbr[j] = rNetB[i * 4 + j]
//Refine Net
private fun refineNet(bitmap: Bitmap, boxes: Vector<Box>): Vector<Box> {
//refineNet Input Init
val num = boxes.size
val rNetIn = FloatArray(num * 24 * 24 * 3)
val curCrop = FloatArray(24 * 24 * 3)
var rNetInIdx = 0
for (i in 0 until num) {
cropAndResize(bitmap, boxes.get(i), 24, curCrop)
PicUtils.flipDiag(curCrop, 24, 24, 3)
//Log.i(TAG,"[*]Pixels values:"+curCrop[0]+" "+curCrop[1]);
for (j in curCrop.indices) rNetIn[rNetInIdx++] = curCrop[j]
//Run refineNet
refineNetForward(rNetIn, boxes)
for (i in 0 until num)
if (boxes[i].score < rNetThreshold)
boxes[i].deleted = true
nms(boxes, 0.7f, "Union")
return PicUtils.updateBoxes(boxes)
* outputNet跑神经网络,将score和bias写入boxes
private fun outputNetForward(outputNetIn: FloatArray, boxes: Vector<Box>) {
val num = outputNetIn.size / 48 / 48 / 3
//feed & run
inferenceInterface.feed(outputNetInName, outputNetIn, num.toLong(), 48, 48, 3)
inferenceInterface.run(outputNetOutName, false)
val outputNetP = FloatArray(num * 2) //prob
val outputNetB = FloatArray(num * 4) //bias
val outputNetL = FloatArray(num * 10) //landmark
inferenceInterface.fetch(outputNetOutName[0], outputNetP)
inferenceInterface.fetch(outputNetOutName[1], outputNetB)
inferenceInterface.fetch(outputNetOutName[2], outputNetL)
for (i in 0 until num) {
boxes[i].score = outputNetP[i * 2 + 1]
for (j in 0..3)
boxes[i].bbr[j] = outputNetB[i * 4 + j]
for (j in 0..4) {
val x = boxes[i].left() + (outputNetL[i * 10 + j] * boxes[i].width()).toInt()
val y = boxes[i].top() + (outputNetL[i * 10 + j + 5] * boxes[i].height()).toInt()
boxes[i].landmark[j] = Point(x, y)
private fun outputNet(bitmap: Bitmap, boxes: Vector<Box>): Vector<Box> {
//outputNet Input Init
val num = boxes.size
val outputNetIn = FloatArray(num * 48 * 48 * 3)
val curCrop = FloatArray(48 * 48 * 3)
var outputNetInIdx = 0
for (i in 0 until num) {
cropAndResize(bitmap, boxes[i], 48, curCrop)
PicUtils.flipDiag(curCrop, 48, 48, 3)
for (j in curCrop.indices) outputNetIn[outputNetInIdx++] = curCrop[j]
//Run outputNet
outputNetForward(outputNetIn, boxes)
for (i in 0 until num)
if (boxes[i].score < outputNetThreshold)
boxes[i].deleted = true
nms(boxes, 0.7f, "Min")
return PicUtils.updateBoxes(boxes)
private fun squareLimit(boxes: Vector<Box>, w: Int, h: Int) {
for (i in 0 until boxes.size) {
boxes[i].limitSquare(w, h)
* 参数:
* bitmap:要处理的图片
* minFaceSize:最小的人脸像素值.(此值越大,检测越快)
* 返回:
* 人脸框
fun detectFaces(bitmap: Bitmap, minFaceSize: Int): Vector<Box> {
val tStart = System.currentTimeMillis()
//【1】proposalNet generate candidate boxes
var boxes = proposalNet(bitmap, minFaceSize)
squareLimit(boxes, bitmap.width, bitmap.height)
boxes = refineNet(bitmap, boxes)
squareLimit(boxes, bitmap.width, bitmap.height)
boxes = outputNet(bitmap, boxes)
lastProcessTime = System.currentTimeMillis() - tStart
Log.i(TAG, "[*]Mtcnn Detection Time:$lastProcessTime")
return boxes
fun cutFace(bitmap: Bitmap? , boxes: Vector<Box>): List<Bitmap> {
if(bitmap == null){
throw IllegalArgumentException("no images!")
val findFaceBitmap = PicUtils.copyBitmap(bitmap)
val faces = mutableListOf<Bitmap>()
PicUtils.drawRect(findFaceBitmap, it.transform2Rect())
PicUtils.drawPoints(findFaceBitmap, it.landmark)
PicUtils.rectExtend(findFaceBitmap , it.transform2Rect() , 20)
faces.add(Bitmap.createScaledBitmap(PicUtils.crop(findFaceBitmap , boxes[0].transform2Rect()),160,160,true))
return faces
object PicUtils {
fun copyBitmap(bitmap: Bitmap): Bitmap {
return bitmap.copy(bitmap.config, true)
fun drawRect(bitmap: Bitmap, rect: Rect) {
try {
val canvas = Canvas(bitmap)
val paint = Paint()
val r = 255//(int)(Math.random()*255);
val g = 0//(int)(Math.random()*255);
val b = 0//(int)(Math.random()*255);
paint.color = Color.rgb(r, g, b)
paint.strokeWidth = (1 + bitmap.width / 500).toFloat()
paint.style = Paint.Style.STROKE
canvas.drawRect(rect, paint)
} catch (e: Exception) {
Log.i("Utils", "[*] error$e")
fun drawPoints(bitmap: Bitmap, landmark: Array<Point?>) {
for (i in landmark.indices) {
val x = landmark[i]?.x?:0
val y = landmark[i]?.y?:0
//Log.i("Utils","[*] landmarkd "+x+ " "+y);
drawRect(bitmap, Rect(x - 1, y - 1, x + 1, y + 1))
//Flip alone diagonal
fun flipDiag(data: FloatArray, h: Int, w: Int, stride: Int) {
val tmp = FloatArray(w * h * stride)
for (i in 0 until w * h * stride) tmp[i] = data[i]
for (y in 0 until h)
for (x in 0 until w) {
for (z in 0 until stride)
data[(x * h + y) * stride + z] = tmp[(y * w + x) * stride + z]
fun expand(src: FloatArray, dst: Array<FloatArray>) {
var idx = 0
for (y in dst.indices)
for (x in 0 until dst[0].size)
dst[y][x] = src[idx++]
fun expand(src: FloatArray, dst: Array<Array<FloatArray>>) {
var idx = 0
for (y in dst.indices)
for (x in 0 until dst[0].size)
for (c in 0 until dst[0][0].size)
dst[y][x][c] = src[idx++]
fun expandProb(src: FloatArray, dst: Array<FloatArray>) {
var idx = 0
for (y in dst.indices)
for (x in 0 until dst[0].size)
dst[y][x] = src[idx++ * 2 + 1]
fun boxes2rects(boxes: Vector<Box>): Array<Rect?> {
var cnt = 0
for (i in 0 until boxes.size) if (!boxes.get(i).deleted) cnt++
val r = arrayOfNulls<Rect>(cnt)
var idx = 0
for (i in 0 until boxes.size)
if (!boxes.get(i).deleted)
r[idx++] = boxes.get(i).transform2Rect()
return r
fun updateBoxes(boxes: Vector<Box>): Vector<Box> {
val b = Vector<Box>()
for (i in 0 until boxes.size)
if (!boxes[i].deleted)
return b
fun showPixel(v: Int) {
Log.i("MainActivity", "[*]Pixel:R" + (v shr 16 and 0xff) + "G:" + (v shr 8 and 0xff) + " B:" + (v and 0xff))
fun getBitmapFromAssets(assets: AssetManager?, fileName: String): Bitmap {
val inputStream = assets?.open(fileName)
val bitmap = BitmapFactory.decodeStream(inputStream)
return bitmap
fun crop(bitmap: Bitmap, rect: Rect): Bitmap {
return Bitmap.createBitmap(bitmap, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top)
fun rectExtend(bitmap: Bitmap, rect: Rect, pixels: Int) {
rect.left = max(0, rect.left - pixels)
rect.right = min(bitmap.width - 1, rect.right + pixels)
rect.top = max(0, rect.top - pixels)
rect.bottom = min(bitmap.height - 1, rect.bottom + pixels)
import android.graphics.Point
import android.graphics.Rect
import kotlin.math.max
class Box{
var box: IntArray = intArrayOf(0,0,0,0) //left:box[0],top:box[1],right:box[2],bottom:box[3]
var score: Float = 0.toFloat() //probability
var bbr: FloatArray = floatArrayOf(0f,0f,0f,0f) //bounding box regression
var deleted: Boolean = false
var landmark: Array<Point?> = arrayOfNulls(5) //facial landmark.只有ONet输出Landmark
fun left(): Int {
return box[0]
fun right(): Int {
return box[2]
fun top(): Int {
return box[1]
fun bottom(): Int {
return box[3]
fun width(): Int {
return box[2] - box[0] + 1
fun height(): Int {
return box[3] - box[1] + 1
fun transform2Rect(): Rect {
val rect = Rect()
rect.left = Math.round(box[0].toFloat())
rect.top = Math.round(box[1].toFloat())
rect.right = Math.round(box[2].toFloat())
rect.bottom = Math.round(box[3].toFloat())
return rect
fun area(): Int {
return width() * height()
//Bounding Box Regression
fun calibrate() {
val w = box[2] - box[0] + 1
val h = box[3] - box[1] + 1
box[0] = (box[0] + w * bbr[0]).toInt()
box[1] = (box[1] + h * bbr[1]).toInt()
box[2] = (box[2] + w * bbr[2]).toInt()
box[3] = (box[3] + h * bbr[3]).toInt()
for (i in 0..3) bbr[i] = 0.0f
fun toSquareShape() {
val w = width()
val h = height()
if (w > h) {
box[1] -= (w - h) / 2
box[3] += (w - h + 1) / 2
} else {
box[0] -= (h - w) / 2
box[2] += (h - w + 1) / 2
fun limitSquare(w: Int, h: Int) {
if (box[0] < 0 || box[1] < 0) {
val len = max(-box[0], -box[1])
box[0] += len
box[1] += len
if (box[2] >= w || box[3] >= h) {
val len = max(box[2] - w + 1, box[3] - h + 1)
box[2] -= len
box[3] -= len
fun limitSquare2(w: Int, h: Int) {
if (width() > w) box[2] -= width() - w
if (height() > h) box[3] -= height() - h
if (box[0] < 0) {
val sz = -box[0]
box[0] += sz
box[2] += sz
if (box[1] < 0) {
val sz = -box[1]
box[1] += sz
box[3] += sz
if (box[2] >= w) {
val sz = box[2] - w + 1
box[2] -= sz
box[0] -= sz
if (box[3] >= h) {
val sz = box[3] - h + 1
box[3] -= sz
box[1] -= sz