Last active 1741880539

MNISTGRNN_multi.py Raw
1import numpy as np
2import pandas as pd
3import matplotlib.pyplot as plt
4import pickle as pk
5import multiprocessing
6import time
7
8MemoryValue = 0.55
9
10start_time = time.time()
11reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
12transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
13imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
14print(time.time() - start_time)
15print(imageData.head())
16
17print(transcriptomicsDF.head())
18# exit()
19GeneLegend = list(reservoir_weightsDF.columns)
20
21WeightMatrix = reservoir_weightsDF.values
22# print(GeneLegend)
23
24TranscriptomicData = transcriptomicsDF[GeneLegend]
25
26InitialMemory = list(TranscriptomicData.iloc[0])
27print("InitialMemory", InitialMemory)
28
29
30
31def getMaxExpression(TranscriptomicDF, GeneID):
32 return TranscriptomicDF[GeneID].max()
33
34def worker(a, b, result, row):
35 n = len(b[0])
36 for j in range(n):
37 result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
38
39def worker_multi(a, b, result, start_index, end_index):
40 n = len(b[0])
41 for row in range(start_index, end_index):
42 for j in range(n):
43 result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
44
45def rc_output(rc_weights, input_array):
46 memory = np.maximum(rc_weights.dot(input_array), 0)
47 return memory
48
49NUM_PROCESSES=4
50
51def split(a, n):
52 k, m = divmod(len(a), n)
53 return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
54
55def parallel_rc_output(rc_weights, input_array):
56 a = rc_weights
57 b = input_array
58 #reservoir_weights[:len(expanded_inputs), :]
59 rows = len(a)
60 print(b[0])
61 result = multiprocessing.Array('d', rows * len(b[0]))
62 result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
63 processes = []
64
65 # num_rows_per_proc = ceil(rows / NUM_PROCESSES)
66 # index = 0
67 # for i in range(NUM_PROCESSES):
68 # start_index = index
69 # end_index = max(index + num_rows_per_proc, rows)
70 # index = end_index + 1
71
72 # p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
73 # processes.append(p)
74 # p.start()
75 for i in range(rows):
76 # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
77 # merge reults
78 p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
79 processes.append(p)
80 p.start()
81 for p in processes:
82 p.join()
83 return np.maximum(result_np, 0)
84
85def input_padding(input_geneID, input_value, GeneLegend, initMem):
86 input_matrix = np.array(initMem, dtype=float)
87
88 for ig in range(0, len(input_geneID)):
89 input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
90
91
92 return input_matrix
93
94
95#pick 15 random genes from GeneLegend
96InputGenes = np.random.choice(GeneLegend, 15, replace=False)
97print("InputGene", InputGenes)
98# exit()
99
100output_matrix = []
101for i in range(0, 1):
102# for i in range(0, len(imageData)):
103
104 imgTemp = imageData.iloc[i].values
105 # normalize the image
106 # imgTemp = imgTemp / 255
107 imgDims = np.sqrt(len(imgTemp)).astype(int)
108 img = imgTemp.reshape(imgDims, imgDims)
109 print(img)
110
111 for j in range(0, imgDims):
112 valueArray = img[:,j]
113 print(valueArray)
114
115 if j == 0:
116 input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
117 Output = parallel_rc_output(WeightMatrix, input_array)
118 else:
119 input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
120 Output = parallel_rc_output(WeightMatrix, input_array)
121 output_matrix.append(Output)
122
123with open('ImageClassification/output_matrix.pkl', 'wb') as f:
124 pk.dump(output_matrix, f)
125
126#to csv
127#output_matrixDF = pd.DataFrame(output_matrix)
128
129#add column names
130#output_matrixDF.columns = GeneLegend
131#print(output_matrixDF.head())
132#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')