基于keras的cnn定长验证码识别
使用到的库;环境搭建不再细说
tensorflow>2.0
captcha
PIL
numpy
1、验证码的生成;使用captcha库
1number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
2alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
3 'v', 'w', 'x', 'y', 'z']
4ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
5 'V', 'W', 'X', 'Y', 'Z']
6# 包含的字符只有数字和大小写字母
7SAVE_PATH = "/data/keras_cnn/"
8CHAR_SET = number + alphabet + ALPHABET
9CHAR_SET_LEN = len(CHAR_SET)
10# 字符集总长库
11IMAGE_HEIGHT = 60
12IMAGE_WIDTH = 160
13# 生成图片的大小
14
15def random_captcha_text(char_set=None, captcha_size=4):
16 if char_set is None:
17 char_set = number + alphabet + ALPHABET
18
19 captcha_text = []
20 for i in range(captcha_size):
21 c = random.choice(char_set)
22 captcha_text.append(c)
23 return captcha_text
24
25def gen_captcha_text_and_image(width=160, height=60, char_set=CHAR_SET):
26 image = ImageCaptcha(width=width, height=height)
27
28 captcha_text = random_captcha_text(char_set)
29 captcha_text = ''.join(captcha_text)
30
31 captcha = image.generate(captcha_text)
32
33 captcha_image = Image.open(captcha)
34 captcha_image = np.array(captcha_image)
35 return captcha_text, captcha_image
2、图片编码处理:转为灰度
1def convert2gray(img):
2 if len(img.shape) > 2:
3 gray = np.mean(img, -1)
4 return gray
5 else:
6 return img
3、图片编码转换
1def text2vec(text):
2 vector = np.zeros([MAX_CAPTCHA, CHAR_SET_LEN])
3 for i, c in enumerate(text):
4 idx = CHAR_SET.index(c)
5 vector[i][idx] = 1.0
6 return vector
7
8
9def vec2text(vec):
10 text = []
11 for i, c in enumerate(vec):
12 text.append(CHAR_SET[c])
13 return "".join(text)
4、模型定义
1def crack_captcha_cnn():
2 model = tf.keras.Sequential()
3
4 model.add(tf.keras.layers.Conv2D(32, (3, 3)))
5 model.add(tf.keras.layers.PReLU())
6 model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))
7
8 model.add(tf.keras.layers.Conv2D(64, (5, 5)))
9 model.add(tf.keras.layers.PReLU())
10 model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))
11
12 model.add(tf.keras.layers.Conv2D(128, (5, 5)))
13 model.add(tf.keras.layers.PReLU())
14 model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))
15
16 model.add(tf.keras.layers.Flatten())
17 model.add(tf.keras.layers.Dense(MAX_CAPTCHA * CHAR_SET_LEN))
18 model.add(tf.keras.layers.Reshape([MAX_CAPTCHA, CHAR_SET_LEN]))
19
20 model.add(tf.keras.layers.Softmax())
21
22 return model
5、完整代码
1# coding:utf-8
2from captcha.image import ImageCaptcha
3import random
4from PIL import Image
5import numpy as np
6import tensorflow as tf
7
8number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
9alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
10 'v', 'w', 'x', 'y', 'z']
11ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
12 'V', 'W', 'X', 'Y', 'Z']
13SAVE_PATH = "/data/keras_cnn/"
14CHAR_SET = number + alphabet + ALPHABET
15CHAR_SET_LEN = len(CHAR_SET)
16IMAGE_HEIGHT = 60
17IMAGE_WIDTH = 160
18
19
20def random_captcha_text(char_set=None, captcha_size=4):
21 if char_set is None:
22 char_set = number + alphabet + ALPHABET
23
24 captcha_text = []
25 for i in range(captcha_size):
26 c = random.choice(char_set)
27 captcha_text.append(c)
28 return captcha_text
29
30
31def gen_captcha_text_and_image(width=160, height=60, char_set=CHAR_SET):
32 image = ImageCaptcha(width=width, height=height)
33
34 captcha_text = random_captcha_text(char_set)
35 captcha_text = ''.join(captcha_text)
36
37 captcha = image.generate(captcha_text)
38
39 captcha_image = Image.open(captcha)
40 captcha_image = np.array(captcha_image)
41 return captcha_text, captcha_image
42
43
44text, image = gen_captcha_text_and_image(char_set=CHAR_SET)
45MAX_CAPTCHA = len(text)
46print('CHAR_SET_LEN=', CHAR_SET_LEN, ' MAX_CAPTCHA=', MAX_CAPTCHA)
47
48
49def convert2gray(img):
50 if len(img.shape) > 2:
51 gray = np.mean(img, -1)
52 return gray
53 else:
54 return img
55
56
57def text2vec(text):
58 vector = np.zeros([MAX_CAPTCHA, CHAR_SET_LEN])
59 for i, c in enumerate(text):
60 idx = CHAR_SET.index(c)
61 vector[i][idx] = 1.0
62 return vector
63
64
65def vec2text(vec):
66 text = []
67 for i, c in enumerate(vec):
68 text.append(CHAR_SET[c])
69 return "".join(text)
70
71
72def get_next_batch(batch_size=128):
73 batch_x = np.zeros([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
74 batch_y = np.zeros([batch_size, MAX_CAPTCHA, CHAR_SET_LEN])
75
76 def wrap_gen_captcha_text_and_image():
77 while True:
78 text, image = gen_captcha_text_and_image(char_set=CHAR_SET)
79 if image.shape == (60, 160, 3):
80 return text, image
81
82 for i in range(batch_size):
83 text, image = wrap_gen_captcha_text_and_image()
84 image = tf.reshape(convert2gray(image), (IMAGE_HEIGHT, IMAGE_WIDTH, 1))
85 batch_x[i, :] = image
86 batch_y[i, :] = text2vec(text)
87
88 return batch_x, batch_y
89
90
91def crack_captcha_cnn():
92 model = tf.keras.Sequential()
93
94 model.add(tf.keras.layers.Conv2D(32, (3, 3)))
95 model.add(tf.keras.layers.PReLU())
96 model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))
97
98 model.add(tf.keras.layers.Conv2D(64, (5, 5)))
99 model.add(tf.keras.layers.PReLU())
100 model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))
101
102 model.add(tf.keras.layers.Conv2D(128, (5, 5)))
103 model.add(tf.keras.layers.PReLU())
104 model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))
105
106 model.add(tf.keras.layers.Flatten())
107 model.add(tf.keras.layers.Dense(MAX_CAPTCHA * CHAR_SET_LEN))
108 model.add(tf.keras.layers.Reshape([MAX_CAPTCHA, CHAR_SET_LEN]))
109
110 model.add(tf.keras.layers.Softmax())
111
112 return model
113
114
115def train():
116 try:
117 model = tf.keras.models.load_model(SAVE_PATH + 'model')
118 except Exception as e:
119 print('#######Exception', e)
120 model = crack_captcha_cnn()
121
122 model.compile(optimizer='Adam',
123 metrics=['accuracy'],
124 loss='categorical_crossentropy')
125
126 for times in range(500000):
127 batch_x, batch_y = get_next_batch(512)
128 print('times=', times, ' batch_x.shape=', batch_x.shape, ' batch_y.shape=', batch_y.shape)
129 model.fit(batch_x, batch_y, epochs=4)
130 print("y预测=\n", np.argmax(model.predict(batch_x), axis=2))
131 print("y实际=\n", np.argmax(batch_y, axis=2))
132
133 if 0 == times % 10:
134 print("save model at times=", times)
135 model.save(SAVE_PATH + 'model')
136
137
138def predict():
139 model = tf.keras.models.load_model(SAVE_PATH + 'model')
140 success = 0
141 count = 100
142 for _ in range(count):
143 data_x, data_y = get_next_batch(1)
144 prediction_value = model.predict(data_x)
145 data_y = vec2text(np.argmax(data_y, axis=2)[0])
146 prediction_value = vec2text(np.argmax(prediction_value, axis=2)[0])
147
148 if data_y.upper() == prediction_value.upper():
149 print("y预测=", prediction_value, "y实际=", data_y, "预测成功。")
150 success += 1
151 else:
152 print("y预测=", prediction_value, "y实际=", data_y, "预测失败。")
153 print("预测", count, "次", "成功率=", success / count)
154
155 pass
156
157
158if __name__ == "__main__":
159 train()
160 predict()