import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pk
import multiprocessing
import time

MemoryValue = 0.55

start_time = time.time()
reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
print(time.time() - start_time)
print(imageData.head())

print(transcriptomicsDF.head())
# exit()
GeneLegend = list(reservoir_weightsDF.columns)

WeightMatrix = reservoir_weightsDF.values
# print(GeneLegend)

TranscriptomicData = transcriptomicsDF[GeneLegend]

InitialMemory = list(TranscriptomicData.iloc[0])
print("InitialMemory", InitialMemory)


def getMaxExpression(TranscriptomicDF, GeneID):
    return TranscriptomicDF[GeneID].max()

def worker(a, b, result, row):
    n = len(b[0])
    for j in range(n):
        result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))

def worker_multi(a, b, result, start_index, end_index):
    n = len(b[0])
    for row in range(start_index, end_index):
        for j in range(n):
            result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))

def rc_output(rc_weights, input_array):
    memory = np.maximum(rc_weights.dot(input_array), 0)
    return memory

NUM_PROCESSES=4

def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

def parallel_rc_output(rc_weights, input_array):
    a = rc_weights
    b = input_array
    #reservoir_weights[:len(expanded_inputs), :]
    rows = len(a)
    print(b[0])
    result = multiprocessing.Array('d', rows * len(b[0]))
    result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))    
    processes = []

    # num_rows_per_proc = ceil(rows / NUM_PROCESSES)
    # index = 0
    # for i in range(NUM_PROCESSES):
    #     start_index = index
    #     end_index = max(index + num_rows_per_proc, rows)
    #     index = end_index + 1

    #     p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
    #     processes.append(p)
    #     p.start()   
    for i in range(rows):
    # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
    # merge reults
        p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
        processes.append(p)
        p.start()   
    for p in processes:
        p.join()    
    return np.maximum(result_np, 0)

def input_padding(input_geneID, input_value, GeneLegend, initMem):
    input_matrix = np.array(initMem, dtype=float)

    for ig in range(0, len(input_geneID)):
        input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])


    return input_matrix


#pick 15 random genes from GeneLegend
InputGenes = np.random.choice(GeneLegend, 15, replace=False)
print("InputGene", InputGenes)
# exit()

output_matrix = []
for i in range(0, 1):
# for i in range(0, len(imageData)):

    imgTemp = imageData.iloc[i].values
    # normalize the image
    # imgTemp = imgTemp / 255
    imgDims = np.sqrt(len(imgTemp)).astype(int)
    img = imgTemp.reshape(imgDims, imgDims)
    print(img)

    for j in range(0, imgDims):
        valueArray = img[:,j]
        print(valueArray)

        if j == 0:
            input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
            Output = parallel_rc_output(WeightMatrix, input_array)
        else:
            input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
            Output = parallel_rc_output(WeightMatrix, input_array)
    output_matrix.append(Output)

with open('ImageClassification/output_matrix.pkl', 'wb') as f:
    pk.dump(output_matrix, f)

#to csv
#output_matrixDF = pd.DataFrame(output_matrix)

#add column names
#output_matrixDF.columns = GeneLegend
#print(output_matrixDF.head())
#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')