基本数据增强主要包含如下方式:
1.旋转: 可通过在原图上先放大图像,然后剪切图像得到。
2.平移:先放大图像,然后水平或垂直偏移位置剪切
3.缩放:缩放图像
4.随机遮挡:对图像进行小区域遮挡
5.水平翻转:以过图像中心的竖直轴为对称轴,将左、右两边像素交换
6.颜色色差(饱和度、亮度、对比度、 锐度等)
7.噪声扰动: 对图像的每个像素RGB进行随机扰动, 常用的噪声模式是椒盐噪声和高斯噪声;
Tensorflow代码实现:
参数可根据需求进行相应调整。
# -*- coding: utf-8 -*-
"""
# 数据增强实现
"""
import tensorflow as tf
import cv2
import numpy as np
from scipy import misc
import random
def random_rotate_image(image):
interb = ['nearest','bilinear','cubic','bicubic']
angle = np.random.uniform(low=-10.0, high=10.0)
key = random.randint(0,3)
return misc.imrotate(image, angle, interb[key])
def random_occlusion(image):
b_ratio = 1./10 #遮挡比例
M1 = np.ones((320,250))
b_H = random.randint(10,320*(1-b_ratio)-10)
b_W = random.randint(10,250*(1-b_ratio)-10)
M1[b_H:int(b_H+320*b_ratio),b_W:int(b_W+250*b_ratio)] = 0
M1 = np.expand_dims(M1, 2)
image = image*M1
image = image.astype(np.uint8)
return image
def data_augumrntation(image):
image = tf.py_func(random_occlusion, [image], tf.uint8) #随机遮挡
image = tf.py_func(random_rotate_image, [image], tf.uint8) #旋转
ratio = [0.9,1.1] #缩放比例
new_H = random.randint(320*ratio[0], 320*ratio[1])
new_W = random.randint(250*ratio[0], 250*ratio[1])
print(new_H,new_W)
image.set_shape((320, 250,3))
image = tf.image.resize_images(image,[new_H, new_W])
image = tf.cast(image,tf.uint8)
image = tf.image.resize_image_with_crop_or_pad(image, 320, 250 )#缩放
image = tf.random_crop(image, [299, 235, 3]) #随机裁剪
image = tf.image.random_flip_left_right(image)#镜像
N_key = random.randint(0,10)
if N_key == 8:
image = tf.image.per_image_standardization(image)#标准化
image = tf.cast(image, tf.float32)
image = tf.minimum(255.0, tf.maximum(0.0,tf.image.random_brightness(image,25.0)))#光照
image = tf.minimum(255.0, tf.maximum(0.0,tf.image.random_contrast(image,0.8,1.2)))#对比度
noise = tf.random_normal((299, 235, 3), mean=0.0, stddev=1.0, dtype=tf.float32)
image = tf.minimum(255.0, tf.maximum(0.0,image+noise))#随机噪声
image = tf.subtract(image,127.5)
image = tf.multiply(image,0.0078125)
return image
if __name__ == '__main__':
pic = r"bb.jpg"
file_contents = tf.read_file(pic)
image = tf.image.decode_jpeg(file_contents, dct_method="INTEGER_ACCURATE")
R,G,B=tf.unstack(image, num=3, axis=2)
image=tf.stack([B,G,R], axis=2) #通道转换
image = data_augumrntation(image)
#image = tf.cast(image,tf.uint8)
sess = tf.Session()
img = sess.run(image)
cv2.imshow('img',img)
cv2.waitKey()
原图:
增强后图像(图像做了归一化操作):