Thanks fleabay!
I added in the following statement:
flat_file = np.transpose(flat_file, (2, 0, 1))
Now I'm getting a different error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (198x10816 and 198x128)
The full code is:
import numpy as np
import math
import cv2
import random
import torch
from torch import flatten
from torch.autograd import Variable
import torch.nn as nn
import os.path
from os import path
img_width = 64
num_channels = 3
num_input_components = img_width*img_width*num_channels
num_output_components = 1
num_epochs = 100
learning_rate = 0.00001
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=num_channels, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(1600, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_output_components)
# Progresses data across layers
def forward(self, x):
out = self.conv_layer1(x)
out = self.conv_layer2(out)
out = self.max_pool1(out)
out = self.conv_layer3(out)
out = self.conv_layer4(out)
out = self.max_pool2(out)
out = out.reshape(out.size(0), -1)
out = self.fc1(out)
out = self.relu1(out)
out = self.fc2(out)
return out
"""
def __init__(self):
# call the parent constructor
super(Net, self).__init__()
# initialize first set of CONV => RELU => POOL layers
self.conv1 = nn.Conv2d(in_channels=num_channels, out_channels=20, kernel_size=(5, 5))
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# initialize second set of CONV => RELU => POOL layers
self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5))
self.relu2 = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# initialize first (and only) set of FC => RELU layers
self.fc1 = nn.Linear(in_features=800, out_features=500)
self.relu3 = nn.ReLU()
# initialize our softmax classifier
self.fc2 = nn.Linear(in_features=500, out_features=num_output_components)
self.logSoftmax = nn.LogSoftmax(dim=1)
def forward(self, x):
# pass the input through our first set of CONV => RELU =>
# POOL layers
x = self.conv1(x)
x = self.relu1(x)
x = self.maxpool1(x)
# pass the output from the previous layer through the second
# set of CONV => RELU => POOL layers
x = self.conv2(x)
x = self.relu2(x)
x = self.maxpool2(x)
# flatten the output from the previous layer and pass it
# through our only set of FC => RELU layers
x = flatten(x, 1)
x = self.fc1(x)
x = self.relu3(x)
# pass the output to our softmax classifier to get our output
# predictions
x = self.fc2(x)
output = self.logSoftmax(x)
# return the output predictions
return output
"""
"""
def __init__(self):
super(Net, self).__init__()
self.hidden1 = torch.nn.Linear(num_input_components, 8192)
self.hidden2 = torch.nn.Linear(8192, 1024)
self.hidden3 = torch.nn.Linear(1024, 128)
self.predict = torch.nn.Linear(128, num_output_components)
def forward(self, x):
x = torch.tanh(self.hidden1(x))
x = torch.tanh(self.hidden2(x))
x = torch.tanh(self.hidden3(x))
x = self.predict(x) # linear output
return x
"""
class float_image:
def __init__(self, img):
self.img = img
class image_type:
def __init__(self, img_type, float_img):
self.img_type = img_type
self.float_img = float_img
net = Net()
if False: #path.exists('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'):
net.load_state_dict(torch.load('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'))
print("loaded file successfully")
else:
print("training...")
all_train_files = []
file_count = 0
path = 'training_set\\cats\\'
filenames = next(os.walk(path))[2]
for f in filenames:
file_count = file_count + 1
if file_count >= 100:
break;
print(path + f)
img = cv2.imread(path + f).astype(np.float32)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0
flat_file = np.transpose(flat_file, (2, 0, 1))
all_train_files.append(image_type(0, flat_file))
file_count = 0
path = 'training_set\\dogs\\'
filenames = next(os.walk(path))[2]
for f in filenames:
file_count = file_count + 1
if file_count >= 100:
break;
print(path + f)
img = cv2.imread(path + f).astype(np.float32)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0
flat_file = np.transpose(flat_file, (2, 0, 1))
all_train_files.append(image_type(1, flat_file))
optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
loss_func = torch.nn.MSELoss()
batch = np.zeros((len(all_train_files), num_channels, img_width, img_width), dtype=np.float32)
ground_truth = np.zeros((len(all_train_files), 1), dtype=np.float32)
random.shuffle(all_train_files)
count = 0
for i in all_train_files:
batch[count] = i.float_img
ground_truth[count] = i.img_type
count = count + 1
for epoch in range(num_epochs):
x = Variable(torch.from_numpy(batch))
y = Variable(torch.from_numpy(ground_truth))
prediction = net(x)
loss = loss_func(prediction, y)
print(epoch, loss)
optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
#torch.save(net.state_dict(), 'weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth')
path = 'test_set\\cats\\'
filenames = next(os.walk(path))[2]
cat_count = 0
total_count = 0
for f in filenames:
# print(path + f)
img = cv2.imread(path + f).astype(np.float32)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0# np.asarray(res).flatten() / 255.0
batch = torch.from_numpy(flat_file)
prediction = net(Variable(batch))
if prediction < 0.5:
cat_count = cat_count + 1
total_count = total_count + 1
# print(batch)
# print(prediction)
print(cat_count / total_count)
print(total_count)
path = 'test_set\\dogs\\'
filenames = next(os.walk(path))[2]
dog_count = 0
total_count = 0
for f in filenames:
# print(path + f)
img = cv2.imread(path + f).astype(np.float32)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0 # np.asarray(res).flatten() / 255.0
batch = torch.from_numpy(flat_file)
prediction = net(Variable(batch))
if prediction > 0.5:
dog_count = dog_count + 1
total_count = total_count + 1
# print(batch)
# print(prediction)
print(dog_count / total_count)
print(total_count)