สรุปขั้นตอน
- Prepare data
- Build a model
- Pre-process image
- Train data with model
- Show train result
- Save model
- Save history
- Show time used
- Plot train accuracy and loss
- Prepare test data
- Read test csv
- [x] Load image
- [x] Convert image to array.
- [x] Put image array in list
- [x] Convert list to numpy array
- [y] Transform label in form of numpy array 0 to n-1 (LabelEncoder)
- [y] Convert list to one-hot array (to_categorical)
- Predict test data
- Show test result
- Evaluate model
- Convert one-hot to list
- Save prediction
- Plot confusion matrix
- Print and save classification report
1. Prepare data
ในตัวอย่างนี้ เราจะใช้ Flowers Recognition dataset เป็นรูปดอกไม้ 5 ชนิดแยกตามโฟลเดอร์
จัดโฟลเดอร์โปรเจ็คให้อยู่ในโครงสร้างต่อไปนี้
- train
- daisy
- dandelion
- rose
- sunflower
- tulip
- test
หมายเหตุ ในโฟลเดอร์ที่โหลดมา จะมีไฟล์ชื่อ flickr.py และ flickr.pyc ติดมาด้วย ให้แน่ใจว่าลบไฟล์นั้นออกจากโฟลเดอร์ด้วย เพราะต้องการดึงแค่เฉพาะไฟล์รูปภาพเท่านั้น
เราจะสร้าง test set โดยสุ่มดอกไม้แต่ละชนิดมาจำนวน 100 ดอก มาใส่โฟลเดอร์ test และสร้างไฟล์ test.csv กำกับภาพและชนิด
เราจะสร้าง test set โดยสุ่มดอกไม้แต่ละชนิดมาจำนวน 100 ดอก มาใส่โฟลเดอร์ test และสร้างไฟล์ test.csv กำกับภาพและชนิด
import os
import random
import pandas as pd
path = 'train'
clsList = os.listdir(path)
testList = []
print('Original number of images per class:')
for i in clsList:
curDir = os.listdir(path+'/'+i)
print( '- ' + str(i) + ' = ' + str(len(curDir)) )
print('\nPick 50 images per class for test...')
for i in clsList:
curDir = os.listdir(path+'/'+i)
rd = random.sample(curDir, k=100)
for j in range(len(rd)):
os.rename(path+'/'+i+'/'+rd[j],'test/'+rd[j])
testList.append([rd[j],i])
print('\nRemained number of images per class:')
for i in clsList:
curDir = os.listdir(path+'/'+i)
print( '- ' + str(i) + ' = ' + str(len(curDir)) )
testDf = pd.DataFrame(testList, columns = ['image', 'category'])
shuffleDf = testDf.sample(frac=1).reset_index(drop=True)
shuffleDf.to_csv(r'test.csv', index = None, header=True)
ผลลัพธ์
Original number of images per class: - daisy = 769 - dandelion = 1055 - rose = 784 - sunflower = 734 - tulip = 984 Pick 100 images per class for test... Remained number of images per class: - daisy = 669 - dandelion = 955 - rose = 684 - sunflower = 634 - tulip = 884
2. Build a model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()
3. Pre-process image
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory('train/',
target_size=(150,150),
color_mode='rgb',
batch_size=32,
class_mode='categorical',
shuffle=True)
4. Train data with model
import time
startTime = time.time()
step_size_train = train_generator.n//train_generator.batch_size
history = model.fit_generator(generator=train_generator,
steps_per_epoch=step_size_train,
epochs=10)
endTime = time.time()
5. Show train result
5.1 Save model
model.save_weights('my-model_save-wieghts.h5')
model.save('my-model_save.h5')
5.2 Save history
import pandas as pd
histDf = pd.DataFrame(history.history)
histCsv = 'my-model_history.csv'
with open(histCsv, mode='w') as f:
histDf.to_csv(f)
5.3 Show time used
print("Strat time = "+str(startTime))
print("End time = "+str(endTime))
print("Use time = "+str(endTime-startTime))
5.4 Plot train accuracy and loss
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
acc = history.history['accuracy']
loss = history.history['loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.style.use('default')
plt.grid(True)
plt.xticks(np.arange(0, len(acc)+1, 1.0))
plt.plot(epochs, acc, 'b', label='Training Accurarcy')
plt.title('Training Accuracy (My Model)')
plt.xlabel("Epoch #")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.figure()
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.grid(True)
plt.xticks(np.arange(0, len(acc)+1, 1.0))
plt.title('Training Loss (My Model)')
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend()
plt.show()
6. Prepare test data
6.1 Read test csv
6.2 [x] Load image
6.3 [x] Convert image to array
6.4 [x] Put image array in list
6.5 [x] Convert list to numpy array
import pandas as pd
import numpy as np
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
testCsv = pd.read_csv('test.csv')
testArrListX = [img_to_array(load_img('test/'+imgName, target_size=(150,150))) for imgName in testCsv.image]
x_test = np.array(testArrListX)/255.0
6.6 [y] Transform label to numpy array 0 to n-1 (LabelEncoder)
6.7 [y] Convert list to one-hot array
testY = testCsv['category'].values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_test = le.fit_transform(testY)
from keras.utils import to_categorical
y_test_onehot = to_categorical(y_test)
7. Predict test data
y_pred_onehot = model.predict(x_test)
8. Show test result
8.1 Evaluate model
model.evaluate(x_test, y_test_onehot)
8.2 Convert one-hot to list
y_pred = []
for i in range(len(y_pred_onehot)):
y_pred.append(np.argmax(y_pred_onehot[i]))
หรือ
y_pred = [np.argmax(y_pred_onehot[i]) for i in range(len(y_pred_onehot))]
8.3 Save prediction
testDict = {'image': testCsv.image, 'y_label': testY, 'y_true': testY_le, 'y_pred': y_pred}
dfPred = pd.DataFrame(testDict)
exportPred = dfPred.to_csv(r'prediction.csv', index = None, header=True)
8.4 Plot confusion matrix
from sklearn.metrics import confusion_matrix
cfArr = confusion_matrix(y_test, y_pred)
import seaborn as sn
dfCm = pd.DataFrame(cfArr, index = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip'], columns = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip'])
plt.figure()
sn.heatmap(dfCm, annot=True, cmap="Blues")
plt.title('Confusion Matrix (My Model)',fontsize = 15)
plt.show()
8.5 Print and save classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
report = classification_report(y_test, y_pred, output_dict=True)
dfRp = pd.DataFrame(report).transpose()
exportRp = dfRp.to_csv(r'classification-report.csv', index = None, header=True)
โค้ดเต็ม
# ---------- 1. Prepare data ----------
import os
import random
import pandas as pd
path = 'train'
clsList = os.listdir(path)
testList = []
print('Original number of images per class:')
for i in clsList:
curDir = os.listdir(path+'/'+i)
print( '- ' + str(i) + ' = ' + str(len(curDir)) )
print('\nPick 50 images per class for test...')
for i in clsList:
curDir = os.listdir(path+'/'+i)
rd = random.sample(curDir, k=100)
for j in range(len(rd)):
os.rename(path+'/'+i+'/'+rd[j],'test/'+rd[j])
testList.append([rd[j],i])
print('\nRemained number of images per class:')
for i in clsList:
curDir = os.listdir(path+'/'+i)
print( '- ' + str(i) + ' = ' + str(len(curDir)) )
testDf = pd.DataFrame(testList, columns = ['image', 'category'])
shuffleDf = testDf.sample(frac=1).reset_index(drop=True)
shuffleDf.to_csv(r'test.csv', index = None, header=True)
# ---------- 2. Build a model ----------
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()
# ---------- 3. Pre-process image ----------
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory('train/',
target_size=(150,150),
color_mode='rgb',
batch_size=32,
class_mode='categorical',
shuffle=True)
# ---------- 4. Train data with model ----------
import time
startTime = time.time()
step_size_train = train_generator.n//train_generator.batch_size
history = model.fit_generator(generator=train_generator,
steps_per_epoch=step_size_train,
epochs=10)
endTime = time.time()
# ---------- 5. Show train result ----------
# ---- Save model & history, Show time used ----
model.save_weights('my-model_save-wieghts.h5')
model.save('my-model_save.h5')
import pandas as pd
histDf = pd.DataFrame(history.history)
histCsv = 'my-model_history.csv'
with open(histCsv, mode='w') as f:
histDf.to_csv(f)
print("Strat time = "+str(startTime))
print("End time = "+str(endTime))
print("Use time = "+str(endTime-startTime))
# ---- Plot train accuracy and loss ----
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
acc = history.history['accuracy']
loss = history.history['loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.style.use('default')
plt.grid(True)
plt.xticks(np.arange(0, len(acc)+1, 1.0))
plt.plot(epochs, acc, 'b', label='Training Accurarcy')
plt.title('Training Accuracy (My Model)')
plt.xlabel("Epoch #")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.figure()
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.grid(True)
plt.xticks(np.arange(0, len(acc)+1, 1.0))
plt.title('Training Loss (My Model)')
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend()
plt.show()
# ---------- 6. Prepare test data ----------
# ---- Load image, convert to array, put in list, convert to numpy array, normalized ----
import pandas as pd
import numpy as np
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
testCsv = pd.read_csv('test.csv')
testArrListX = [img_to_array(load_img('test/'+imgName, target_size=(150,150))) for imgName in testCsv.image]
x_test = np.array(testArrListX)/255.0
# ---- Transform label to mumpy array, convert to one-hot ----
y_pred = [np.argmax(y_pred_onehot[i]) for i in range(len(y_pred_onehot))]
# ---------- 7. Predict test data ----------
y_pred_onehot = model.predict(x_test)
# ---------- 8. Show test result ----------
# ---- Evaluate model ----
model.evaluate(x_test, y_test_onehot)
# ---- Convert one-hot to list ----
y_pred = [np.argmax(y_pred_onehot[i]) for i in range(len(y_pred_onehot))]
# ---- Save prediction ----
testDict = {'image': testCsv.image, 'y_label': testY, 'y_true': testY_le, 'y_pred': y_pred}
dfPred = pd.DataFrame(testDict)
exportPred = dfPred.to_csv(r'prediction.csv', index = None, header=True)
# ---- Plot confusion matrix ----
from sklearn.metrics import confusion_matrix
cfArr = confusion_matrix(y_test, y_pred)
import seaborn as sn
dfCm = pd.DataFrame(cfArr, index = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip'], columns = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip'])
plt.figure()
sn.heatmap(dfCm, annot=True, cmap="Blues")
plt.title('Confusion Matrix (My Model)',fontsize = 15)
plt.show()
# ---- Print and save classification report ----
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
report = classification_report(y_test, y_pred, output_dict=True)
dfRp = pd.DataFrame(report).transpose()
exportRp = dfRp.to_csv(r'classification-report.csv', index = None, header=True)
โค้ดย่อ
import pandas as pd
import numpy as np
import seaborn as sn
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory('train/',
target_size=(150,150),
color_mode='rgb',
batch_size=32,
class_mode='categorical',
shuffle=True)
step_size_train = train_generator.n//train_generator.batch_size
history = model.fit_generator(generator=train_generator,
steps_per_epoch=step_size_train,
epochs=10)
testCsv = pd.read_csv('test.csv')
testArrListX = [img_to_array(load_img('test/'+imgName, target_size=(150,150))) for imgName in testCsv.image]
x_test = np.array(testArrListX)/255.0
testY = testCsv['category'].values
le = LabelEncoder()
y_test = le.fit_transform(testY)
y_test_onehot = to_categorical(y_test)
model.evaluate(x_test, y_test_onehot)
y_pred_onehot = model.predict(x_test)
y_pred = [np.argmax(y_pred_onehot[i]) for i in range(len(y_pred_onehot))]
confusion_matrix(y_test, y_pred)
print(classification_report(y_test, y_pred))
Sign up here with your email
ConversionConversion EmoticonEmoticon