import numpy
import random
from matplotlib import pyplot as plt

import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader

from sklearn.decomposition import KernelPCA
from sklearn.manifold import TSNE
from sklearn.cluster import MiniBatchKMeans


# 将图片的像素值从 0~255 转到 -1~1
# 将图片的格式从(NHWC)转成(NCHW)
def preprocess(image):
    image = numpy.array(image)
    image = numpy.transpose(image, (0, 3, 1, 2))
    image = (image / 255.0) * 2 - 1
    image = image.astype(numpy.float_)
    return image


class ImageDataset(Dataset):
    def __init__(self, image):
        self.image = image

    def __len__(self):
        return len(self.image)

    def __getitem__(self, index):
        return self.image[index]


train_data = numpy.load('trainX.npy')
print(train_data.shape)
train_data = preprocess(train_data)
image_dataset = ImageDataset(train_data)
print(train_data.shape)


def random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)        # 使用GPU
    torch.cuda.manual_seed_all(seed)    # 使用所有GPU
    numpy.random.seed(seed)         # Numpy的
    random.seed(seed)               # Python的


class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(5, 5)),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(9, 9)),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=(17, 17)),
            nn.Tanh()
        )

    def forward(self, input):
        input = self.encoder(input)
        output = self.decoder(input)
        return input, output


random_seed(0)
model = AutoEncoder().cuda()
criterion = nn.MSELoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)

model.train()
Epoch = 100
batch_size = 100
train_batch = train_data.shape[0] / batch_size
image_dataloader = DataLoader(dataset=image_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(Epoch):
    cost = 0
    for image in image_dataloader:
        image = image.cuda()
        optimizer.zero_grad()
        input, output = model(image)
        loss = criterion(output, image)
        cost += loss.item()
        loss.backward()
        optimizer.step()
    print('Epoch:', epoch+1, '\tLoss:', cost/train_batch)
# 存储模型的参数
torch.save(model.state_dict(), 'parameters.pth')


def get_accuracy(label, prediction):
    correct = numpy.sum(label == prediction)
    accuracy = correct / label.shape[0]
    return max(accuracy, 1-accuracy)


def show_image(feature, label):
    x = feature[:, 0]
    y = feature[:, 1]
    plt.scatter(x, y, c=label)
    plt.legend(loc='best')
    plt.show()


def inference(image, model):
    image = preprocess(image)
    dataset = ImageDataset(image)
    dataloader = DataLoader(dataset, batch_size, shuffle=False)
    latent = list()
    for i, img in enumerate(dataloader):
        img = torch.FloatTensor(img)
        input, output = model(img.cuda())
        if i==0:
            latent = input.view(img.shape[0], -1).cpu().detach().numpy()
        else:
            latent = numpy.concatenate((latent, input.view(img.shape[0], -1).cpu().detach().numpy()), axis=0)
    print(latent.shape)
    return latent


def predict(latent):
    kpca = KernelPCA(n_components=200, kernel='rbf', n_jobs=-1)
    result = kpca.fit_transform(latent)
    print('KernelPCA:', result.shape)

    tsne = TSNE().fit_transform(result)
    print('TSNE:', tsne.shape)

    prediction = MiniBatchKMeans(n_clusters=2, random_state=0).fit(tsne)
    prediction = [int(i) for i in prediction.labels_]
    prediction = numpy.array(prediction)
    return tsne, prediction


def save_prediction(prediction, path):
    with open(path, mode='w') as file:
        file.write('id,label\n')
        for i, p in enumerate(prediction):
            file.write('{},{}\n'.format(i, p))
    print('Save Prediction')


model = AutoEncoder().cuda()
model.load_state_dict(torch.load('parameters.pth'))
model.eval()

train_data = numpy.load('trainX.npy')
latent = inference(image=train_data, model=model)
tsne, prediction = predict(latent)
save_prediction(prediction, 'prediction.csv')
# 无监督学习的二分类问题，只看是否成功将图片分为两类，下面是label反过来的结果
save_prediction(numpy.abs(1-prediction), 'prediction_invert.csv')

print('聚类结果如图:')
show_image(tsne, test_y)

plt.figure(figsize=(14,4))
index = [11,22,33,44,55,66,77,88,99]		
image = train_data[index, :]
# 原图
for i, img in enumerate(image):
    plt.subplot(2, 9, i+1, xticks=[], yticks=[])
    plt.imshow(img)

re_image = torch.Tensor(train_data_process[index, :]).cuda()
input, output = model(re_image)
output = ((output+1)/2).cpu().detach().numpy()
output = output.transpose(0, 2, 3, 1)   
# 重构图
for i, img in enumerate(output):
    plt.subplot(2, 9, 9+i+1, xticks=[], yticks=[])
    plt.imshow(img)
  
plt.tight_layout()
