MNISTGRNN_multi.py
· 3.9 KiB · Python
Raw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pk
import multiprocessing
import time
MemoryValue = 0.55
start_time = time.time()
reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
print(time.time() - start_time)
print(imageData.head())
print(transcriptomicsDF.head())
# exit()
GeneLegend = list(reservoir_weightsDF.columns)
WeightMatrix = reservoir_weightsDF.values
# print(GeneLegend)
TranscriptomicData = transcriptomicsDF[GeneLegend]
InitialMemory = list(TranscriptomicData.iloc[0])
print("InitialMemory", InitialMemory)
def getMaxExpression(TranscriptomicDF, GeneID):
return TranscriptomicDF[GeneID].max()
def worker(a, b, result, row):
n = len(b[0])
for j in range(n):
result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
def worker_multi(a, b, result, start_index, end_index):
n = len(b[0])
for row in range(start_index, end_index):
for j in range(n):
result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
def rc_output(rc_weights, input_array):
memory = np.maximum(rc_weights.dot(input_array), 0)
return memory
NUM_PROCESSES=4
def split(a, n):
k, m = divmod(len(a), n)
return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
def parallel_rc_output(rc_weights, input_array):
a = rc_weights
b = input_array
#reservoir_weights[:len(expanded_inputs), :]
rows = len(a)
print(b[0])
result = multiprocessing.Array('d', rows * len(b[0]))
result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
processes = []
# num_rows_per_proc = ceil(rows / NUM_PROCESSES)
# index = 0
# for i in range(NUM_PROCESSES):
# start_index = index
# end_index = max(index + num_rows_per_proc, rows)
# index = end_index + 1
# p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
# processes.append(p)
# p.start()
for i in range(rows):
# only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
# merge reults
p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
processes.append(p)
p.start()
for p in processes:
p.join()
return np.maximum(result_np, 0)
def input_padding(input_geneID, input_value, GeneLegend, initMem):
input_matrix = np.array(initMem, dtype=float)
for ig in range(0, len(input_geneID)):
input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
return input_matrix
#pick 15 random genes from GeneLegend
InputGenes = np.random.choice(GeneLegend, 15, replace=False)
print("InputGene", InputGenes)
# exit()
output_matrix = []
for i in range(0, 1):
# for i in range(0, len(imageData)):
imgTemp = imageData.iloc[i].values
# normalize the image
# imgTemp = imgTemp / 255
imgDims = np.sqrt(len(imgTemp)).astype(int)
img = imgTemp.reshape(imgDims, imgDims)
print(img)
for j in range(0, imgDims):
valueArray = img[:,j]
print(valueArray)
if j == 0:
input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
Output = parallel_rc_output(WeightMatrix, input_array)
else:
input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
Output = parallel_rc_output(WeightMatrix, input_array)
output_matrix.append(Output)
with open('ImageClassification/output_matrix.pkl', 'wb') as f:
pk.dump(output_matrix, f)
#to csv
#output_matrixDF = pd.DataFrame(output_matrix)
#add column names
#output_matrixDF.columns = GeneLegend
#print(output_matrixDF.head())
#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')
1 | import numpy as np |
2 | import pandas as pd |
3 | import matplotlib.pyplot as plt |
4 | import pickle as pk |
5 | import multiprocessing |
6 | import time |
7 | |
8 | MemoryValue = 0.55 |
9 | |
10 | start_time = time.time() |
11 | reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0) |
12 | transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0) |
13 | imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0) |
14 | print(time.time() - start_time) |
15 | print(imageData.head()) |
16 | |
17 | print(transcriptomicsDF.head()) |
18 | # exit() |
19 | GeneLegend = list(reservoir_weightsDF.columns) |
20 | |
21 | WeightMatrix = reservoir_weightsDF.values |
22 | # print(GeneLegend) |
23 | |
24 | TranscriptomicData = transcriptomicsDF[GeneLegend] |
25 | |
26 | InitialMemory = list(TranscriptomicData.iloc[0]) |
27 | print("InitialMemory", InitialMemory) |
28 | |
29 | |
30 | |
31 | def getMaxExpression(TranscriptomicDF, GeneID): |
32 | return TranscriptomicDF[GeneID].max() |
33 | |
34 | def worker(a, b, result, row): |
35 | n = len(b[0]) |
36 | for j in range(n): |
37 | result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) |
38 | |
39 | def worker_multi(a, b, result, start_index, end_index): |
40 | n = len(b[0]) |
41 | for row in range(start_index, end_index): |
42 | for j in range(n): |
43 | result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) |
44 | |
45 | def rc_output(rc_weights, input_array): |
46 | memory = np.maximum(rc_weights.dot(input_array), 0) |
47 | return memory |
48 | |
49 | NUM_PROCESSES=4 |
50 | |
51 | def split(a, n): |
52 | k, m = divmod(len(a), n) |
53 | return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) |
54 | |
55 | def parallel_rc_output(rc_weights, input_array): |
56 | a = rc_weights |
57 | b = input_array |
58 | #reservoir_weights[:len(expanded_inputs), :] |
59 | rows = len(a) |
60 | print(b[0]) |
61 | result = multiprocessing.Array('d', rows * len(b[0])) |
62 | result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0]))) |
63 | processes = [] |
64 | |
65 | # num_rows_per_proc = ceil(rows / NUM_PROCESSES) |
66 | # index = 0 |
67 | # for i in range(NUM_PROCESSES): |
68 | # start_index = index |
69 | # end_index = max(index + num_rows_per_proc, rows) |
70 | # index = end_index + 1 |
71 | |
72 | # p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index)) |
73 | # processes.append(p) |
74 | # p.start() |
75 | for i in range(rows): |
76 | # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...] |
77 | # merge reults |
78 | p = multiprocessing.Process(target=worker, args=(a, b, result_np, i)) |
79 | processes.append(p) |
80 | p.start() |
81 | for p in processes: |
82 | p.join() |
83 | return np.maximum(result_np, 0) |
84 | |
85 | def input_padding(input_geneID, input_value, GeneLegend, initMem): |
86 | input_matrix = np.array(initMem, dtype=float) |
87 | |
88 | for ig in range(0, len(input_geneID)): |
89 | input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig]) |
90 | |
91 | |
92 | return input_matrix |
93 | |
94 | |
95 | #pick 15 random genes from GeneLegend |
96 | InputGenes = np.random.choice(GeneLegend, 15, replace=False) |
97 | print("InputGene", InputGenes) |
98 | # exit() |
99 | |
100 | output_matrix = [] |
101 | for i in range(0, 1): |
102 | # for i in range(0, len(imageData)): |
103 | |
104 | imgTemp = imageData.iloc[i].values |
105 | # normalize the image |
106 | # imgTemp = imgTemp / 255 |
107 | imgDims = np.sqrt(len(imgTemp)).astype(int) |
108 | img = imgTemp.reshape(imgDims, imgDims) |
109 | print(img) |
110 | |
111 | for j in range(0, imgDims): |
112 | valueArray = img[:,j] |
113 | print(valueArray) |
114 | |
115 | if j == 0: |
116 | input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory) |
117 | Output = parallel_rc_output(WeightMatrix, input_array) |
118 | else: |
119 | input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue) |
120 | Output = parallel_rc_output(WeightMatrix, input_array) |
121 | output_matrix.append(Output) |
122 | |
123 | with open('ImageClassification/output_matrix.pkl', 'wb') as f: |
124 | pk.dump(output_matrix, f) |
125 | |
126 | #to csv |
127 | #output_matrixDF = pd.DataFrame(output_matrix) |
128 | |
129 | #add column names |
130 | #output_matrixDF.columns = GeneLegend |
131 | #print(output_matrixDF.head()) |
132 | #output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv') |