MNISTGRNN_multi.py
· 3.9 KiB · Python
Raw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pk
import multiprocessing
import time
MemoryValue = 0.55
start_time = time.time()
reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
print(time.time() - start_time)
print(imageData.head())
print(transcriptomicsDF.head())
# exit()
GeneLegend = list(reservoir_weightsDF.columns)
WeightMatrix = reservoir_weightsDF.values
# print(GeneLegend)
TranscriptomicData = transcriptomicsDF[GeneLegend]
InitialMemory = list(TranscriptomicData.iloc[0])
print("InitialMemory", InitialMemory)
def getMaxExpression(TranscriptomicDF, GeneID):
return TranscriptomicDF[GeneID].max()
def worker(a, b, result, row):
n = len(b[0])
for j in range(n):
result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
def worker_multi(a, b, result, start_index, end_index):
n = len(b[0])
for row in range(start_index, end_index):
for j in range(n):
result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
def rc_output(rc_weights, input_array):
memory = np.maximum(rc_weights.dot(input_array), 0)
return memory
NUM_PROCESSES=4
def split(a, n):
k, m = divmod(len(a), n)
return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
def parallel_rc_output(rc_weights, input_array):
a = rc_weights
b = input_array
#reservoir_weights[:len(expanded_inputs), :]
rows = len(a)
print(b[0])
result = multiprocessing.Array('d', rows * len(b[0]))
result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
processes = []
# num_rows_per_proc = ceil(rows / NUM_PROCESSES)
# index = 0
# for i in range(NUM_PROCESSES):
# start_index = index
# end_index = max(index + num_rows_per_proc, rows)
# index = end_index + 1
# p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
# processes.append(p)
# p.start()
for i in range(rows):
# only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
# merge reults
p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
processes.append(p)
p.start()
for p in processes:
p.join()
return np.maximum(result_np, 0)
def input_padding(input_geneID, input_value, GeneLegend, initMem):
input_matrix = np.array(initMem, dtype=float)
for ig in range(0, len(input_geneID)):
input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
return input_matrix
#pick 15 random genes from GeneLegend
InputGenes = np.random.choice(GeneLegend, 15, replace=False)
print("InputGene", InputGenes)
# exit()
output_matrix = []
for i in range(0, 1):
# for i in range(0, len(imageData)):
imgTemp = imageData.iloc[i].values
# normalize the image
# imgTemp = imgTemp / 255
imgDims = np.sqrt(len(imgTemp)).astype(int)
img = imgTemp.reshape(imgDims, imgDims)
print(img)
for j in range(0, imgDims):
valueArray = img[:,j]
print(valueArray)
if j == 0:
input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
Output = parallel_rc_output(WeightMatrix, input_array)
else:
input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
Output = parallel_rc_output(WeightMatrix, input_array)
output_matrix.append(Output)
with open('ImageClassification/output_matrix.pkl', 'wb') as f:
pk.dump(output_matrix, f)
#to csv
#output_matrixDF = pd.DataFrame(output_matrix)
#add column names
#output_matrixDF.columns = GeneLegend
#print(output_matrixDF.head())
#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')
| 1 | import numpy as np |
| 2 | import pandas as pd |
| 3 | import matplotlib.pyplot as plt |
| 4 | import pickle as pk |
| 5 | import multiprocessing |
| 6 | import time |
| 7 | |
| 8 | MemoryValue = 0.55 |
| 9 | |
| 10 | start_time = time.time() |
| 11 | reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0) |
| 12 | transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0) |
| 13 | imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0) |
| 14 | print(time.time() - start_time) |
| 15 | print(imageData.head()) |
| 16 | |
| 17 | print(transcriptomicsDF.head()) |
| 18 | # exit() |
| 19 | GeneLegend = list(reservoir_weightsDF.columns) |
| 20 | |
| 21 | WeightMatrix = reservoir_weightsDF.values |
| 22 | # print(GeneLegend) |
| 23 | |
| 24 | TranscriptomicData = transcriptomicsDF[GeneLegend] |
| 25 | |
| 26 | InitialMemory = list(TranscriptomicData.iloc[0]) |
| 27 | print("InitialMemory", InitialMemory) |
| 28 | |
| 29 | |
| 30 | |
| 31 | def getMaxExpression(TranscriptomicDF, GeneID): |
| 32 | return TranscriptomicDF[GeneID].max() |
| 33 | |
| 34 | def worker(a, b, result, row): |
| 35 | n = len(b[0]) |
| 36 | for j in range(n): |
| 37 | result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) |
| 38 | |
| 39 | def worker_multi(a, b, result, start_index, end_index): |
| 40 | n = len(b[0]) |
| 41 | for row in range(start_index, end_index): |
| 42 | for j in range(n): |
| 43 | result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) |
| 44 | |
| 45 | def rc_output(rc_weights, input_array): |
| 46 | memory = np.maximum(rc_weights.dot(input_array), 0) |
| 47 | return memory |
| 48 | |
| 49 | NUM_PROCESSES=4 |
| 50 | |
| 51 | def split(a, n): |
| 52 | k, m = divmod(len(a), n) |
| 53 | return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) |
| 54 | |
| 55 | def parallel_rc_output(rc_weights, input_array): |
| 56 | a = rc_weights |
| 57 | b = input_array |
| 58 | #reservoir_weights[:len(expanded_inputs), :] |
| 59 | rows = len(a) |
| 60 | print(b[0]) |
| 61 | result = multiprocessing.Array('d', rows * len(b[0])) |
| 62 | result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0]))) |
| 63 | processes = [] |
| 64 | |
| 65 | # num_rows_per_proc = ceil(rows / NUM_PROCESSES) |
| 66 | # index = 0 |
| 67 | # for i in range(NUM_PROCESSES): |
| 68 | # start_index = index |
| 69 | # end_index = max(index + num_rows_per_proc, rows) |
| 70 | # index = end_index + 1 |
| 71 | |
| 72 | # p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index)) |
| 73 | # processes.append(p) |
| 74 | # p.start() |
| 75 | for i in range(rows): |
| 76 | # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...] |
| 77 | # merge reults |
| 78 | p = multiprocessing.Process(target=worker, args=(a, b, result_np, i)) |
| 79 | processes.append(p) |
| 80 | p.start() |
| 81 | for p in processes: |
| 82 | p.join() |
| 83 | return np.maximum(result_np, 0) |
| 84 | |
| 85 | def input_padding(input_geneID, input_value, GeneLegend, initMem): |
| 86 | input_matrix = np.array(initMem, dtype=float) |
| 87 | |
| 88 | for ig in range(0, len(input_geneID)): |
| 89 | input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig]) |
| 90 | |
| 91 | |
| 92 | return input_matrix |
| 93 | |
| 94 | |
| 95 | #pick 15 random genes from GeneLegend |
| 96 | InputGenes = np.random.choice(GeneLegend, 15, replace=False) |
| 97 | print("InputGene", InputGenes) |
| 98 | # exit() |
| 99 | |
| 100 | output_matrix = [] |
| 101 | for i in range(0, 1): |
| 102 | # for i in range(0, len(imageData)): |
| 103 | |
| 104 | imgTemp = imageData.iloc[i].values |
| 105 | # normalize the image |
| 106 | # imgTemp = imgTemp / 255 |
| 107 | imgDims = np.sqrt(len(imgTemp)).astype(int) |
| 108 | img = imgTemp.reshape(imgDims, imgDims) |
| 109 | print(img) |
| 110 | |
| 111 | for j in range(0, imgDims): |
| 112 | valueArray = img[:,j] |
| 113 | print(valueArray) |
| 114 | |
| 115 | if j == 0: |
| 116 | input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory) |
| 117 | Output = parallel_rc_output(WeightMatrix, input_array) |
| 118 | else: |
| 119 | input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue) |
| 120 | Output = parallel_rc_output(WeightMatrix, input_array) |
| 121 | output_matrix.append(Output) |
| 122 | |
| 123 | with open('ImageClassification/output_matrix.pkl', 'wb') as f: |
| 124 | pk.dump(output_matrix, f) |
| 125 | |
| 126 | #to csv |
| 127 | #output_matrixDF = pd.DataFrame(output_matrix) |
| 128 | |
| 129 | #add column names |
| 130 | #output_matrixDF.columns = GeneLegend |
| 131 | #print(output_matrixDF.head()) |
| 132 | #output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv') |