import numpy as np import pandas as pd import matplotlib.pyplot as plt import pickle as pk import multiprocessing import time MemoryValue = 0.55 start_time = time.time() reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0) transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0) imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0) print(time.time() - start_time) print(imageData.head()) print(transcriptomicsDF.head()) # exit() GeneLegend = list(reservoir_weightsDF.columns) WeightMatrix = reservoir_weightsDF.values # print(GeneLegend) TranscriptomicData = transcriptomicsDF[GeneLegend] InitialMemory = list(TranscriptomicData.iloc[0]) print("InitialMemory", InitialMemory) def getMaxExpression(TranscriptomicDF, GeneID): return TranscriptomicDF[GeneID].max() def worker(a, b, result, row): n = len(b[0]) for j in range(n): result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) def worker_multi(a, b, result, start_index, end_index): n = len(b[0]) for row in range(start_index, end_index): for j in range(n): result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) def rc_output(rc_weights, input_array): memory = np.maximum(rc_weights.dot(input_array), 0) return memory NUM_PROCESSES=4 def split(a, n): k, m = divmod(len(a), n) return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) def parallel_rc_output(rc_weights, input_array): a = rc_weights b = input_array #reservoir_weights[:len(expanded_inputs), :] rows = len(a) print(b[0]) result = multiprocessing.Array('d', rows * len(b[0])) result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0]))) processes = [] # num_rows_per_proc = ceil(rows / NUM_PROCESSES) # index = 0 # for i in range(NUM_PROCESSES): # start_index = index # end_index = max(index + num_rows_per_proc, rows) # index = end_index + 1 # p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index)) # processes.append(p) # p.start() for i in range(rows): # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...] # merge reults p = multiprocessing.Process(target=worker, args=(a, b, result_np, i)) processes.append(p) p.start() for p in processes: p.join() return np.maximum(result_np, 0) def input_padding(input_geneID, input_value, GeneLegend, initMem): input_matrix = np.array(initMem, dtype=float) for ig in range(0, len(input_geneID)): input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig]) return input_matrix #pick 15 random genes from GeneLegend InputGenes = np.random.choice(GeneLegend, 15, replace=False) print("InputGene", InputGenes) # exit() output_matrix = [] for i in range(0, 1): # for i in range(0, len(imageData)): imgTemp = imageData.iloc[i].values # normalize the image # imgTemp = imgTemp / 255 imgDims = np.sqrt(len(imgTemp)).astype(int) img = imgTemp.reshape(imgDims, imgDims) print(img) for j in range(0, imgDims): valueArray = img[:,j] print(valueArray) if j == 0: input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory) Output = parallel_rc_output(WeightMatrix, input_array) else: input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue) Output = parallel_rc_output(WeightMatrix, input_array) output_matrix.append(Output) with open('ImageClassification/output_matrix.pkl', 'wb') as f: pk.dump(output_matrix, f) #to csv #output_matrixDF = pd.DataFrame(output_matrix) #add column names #output_matrixDF.columns = GeneLegend #print(output_matrixDF.head()) #output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')