This is the ipython notebook for the shiyanlou svm_cat

Reference: https://www.shiyanlou.com/courses/794

python-opencv

training

In [1]:
import numpy as np
import cv2
from os.path import dirname, join, basename
import os
import sys
from glob import glob
In [2]:
bin_n = 16 * 16
In [3]:
def hog(img):
    x_pixel, y_pixel = 194, 259
    gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
    gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    bins = np.int32(bin_n*ang/(2*np.pi))
    bin_cells = bins[:x_pixel/2, :y_pixel/2], bins[x_pixel/2:, :y_pixel/2], bins[:x_pixel/2, y_pixel/2:], bins[x_pixel/2:, y_pixel/2:]
    mag_cells = mag[:x_pixel/2,:y_pixel/2], mag[x_pixel/2:,:y_pixel/2], mag[:x_pixel/2,y_pixel/2:], mag[x_pixel/2:,y_pixel/2:]
    hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
    hist = np.hstack(hists)
    return hist
In [4]:
img = []
for name in os.listdir(os.getcwd() + '/cat/'):
    pic = cv2.imread(os.getcwd() + '/cat/' + name, 0)
    img.append(pic)

print len(img), ' num'
40  num
In [5]:
positive = len(img)
for name in os.listdir(os.getcwd() + '/other/'):
    pic = cv2.imread(os.getcwd() + '/other/' + name, 0)
    img.append(pic)
    
print len(img), ' num'
print positive, ' positive'
89  num
40  positive
In [6]:
trainpic = []
for item in img:
    trainpic.append(item)

svm_params = dict(kernel_type = cv2.SVM_LINEAR, svm_type = cv2.SVM_C_SVC, C = 2.67, gamma = 5.383)
In [7]:
# test
tmp = hog(img[0])
print tmp.shape
(1024,)
In [8]:
# data preprocessing
hogdata = map(hog, trainpic)
print np.float32(hogdata).shape, ' hogdata'
trainData = np.float32(hogdata).reshape(-1, bin_n*4)
print trainData.shape, ' trainData'
responses = np.float32(np.repeat(1.0, trainData.shape[0])[:, np.newaxis])
responses[positive:trainData.shape[0]] = -1.0
print responses.shape, ' responses'
print len(trainData)
print len(responses)
print type(trainData)
(89, 1024)  hogdata
(89, 1024)  trainData
(89, 1)  responses
89
89
<type 'numpy.ndarray'>
In [9]:
svm = cv2.SVM()
svm.train(trainData, responses, params=svm_params)
svm.save('svm_cat_data.dat')

predicting

In [10]:
svm = cv2.SVM()
svm.load('svm_cat_data.dat')
In [11]:
# test
img = cv2.imread('./predict/01.jpg', 0)
print img.shape, ' img_test0'
hogdata = hog(img)
testData = np.float32(hogdata).reshape(-1, bin_n*4)
print testData.shape, ' testData'
result = svm.predict(testData)
print result
if result > 0:
    print ' this pic is a cat!'
(259, 194)  img_test0
(1, 1024)  testData
-1.0
In [12]:
# preprocess for prediction
test_tmp = []
for name in os.listdir(os.getcwd() + '/predict/'):
    img = cv2.imread(os.getcwd() + '/predict/' + name, 0)
    test_tmp.append(img)
print len(test_tmp), ' len(test_tmp)'
19  len(test_tmp)
In [13]:
# predict
hogdata = map(hog, test_tmp)
testData = np.float32(hogdata).reshape(-1, bin_n*4)
print testData.shape, ' testData'
result = [svm.predict(item) for item in testData]
print result
(19, 1024)  testData
[-1.0, -1.0, -1.0, -1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0]

sklearn

use sklearn to build the svm models instead of opencv

In [15]:
import os
import cv2
import numpy as np
from skimage import feature as ft
from sklearn.svm import SVC
In [16]:
img = []
# positive
for name in os.listdir(os.getcwd() + '/cat/'):
    pic = cv2.imread(os.getcwd() + '/cat/' + name, 0)
    img.append(pic)
positive = len(img)
print "positive samples: {}".format(positive)

# negative
for name in os.listdir(os.getcwd() + '/other/'):
    pic = cv2.imread(os.getcwd() + '/other/' + name, 0)
    img.append(pic)

print "negative samples: {}".format(len(img) - positive)

# predict
predict_img = []
for name in os.listdir(os.getcwd() + '/predict/'):
    pic = cv2.imread(os.getcwd() + '/predict/' + name, 0)
    predict_img.append(pic)

print "prediction samples: {}".format(len(predict_img))
positive samples: 40
negative samples: 49
prediction samples: 19
In [17]:
def myHog(img):
    feature = ft.hog(img, orientations=8, pixels_per_cell=(16,16), cells_per_block=(1,1))
    return feature
In [18]:
HOG_feature = map(myHog, img)
predict_feature = map(myHog, predict_img)
predict_vector = np.asarray(predict_feature)
/usr/local/lib/python2.7/dist-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)
In [19]:
vector = np.asarray(HOG_feature)
label = np.array(np.repeat(1, vector.shape[0]))
label[positive:] = 0
In [20]:
clf = SVC(kernel='linear', C=2.67, gamma=5.383)
clf.fit(vector, label)
Out[20]:
SVC(C=2.67, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=5.383, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
In [21]:
predict_label = clf.predict(predict_vector)
In [22]:
predict_label
Out[22]:
array([0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])

Summary

The process of these two solutions are almost the same, and the only difference is that skimage provides the hog function.