Last active 1741880539

jackz's Avatar jackz revised this gist 1741880539. Go to revision

No changes

jackz's Avatar Jackz revised this gist 1732638768. Go to revision

1 file changed, 132 insertions

MNISTGRNN_multi.py(file created)

@@ -0,0 +1,132 @@
1 + import numpy as np
2 + import pandas as pd
3 + import matplotlib.pyplot as plt
4 + import pickle as pk
5 + import multiprocessing
6 + import time
7 +
8 + MemoryValue = 0.55
9 +
10 + start_time = time.time()
11 + reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
12 + transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
13 + imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
14 + print(time.time() - start_time)
15 + print(imageData.head())
16 +
17 + print(transcriptomicsDF.head())
18 + # exit()
19 + GeneLegend = list(reservoir_weightsDF.columns)
20 +
21 + WeightMatrix = reservoir_weightsDF.values
22 + # print(GeneLegend)
23 +
24 + TranscriptomicData = transcriptomicsDF[GeneLegend]
25 +
26 + InitialMemory = list(TranscriptomicData.iloc[0])
27 + print("InitialMemory", InitialMemory)
28 +
29 +
30 +
31 + def getMaxExpression(TranscriptomicDF, GeneID):
32 + return TranscriptomicDF[GeneID].max()
33 +
34 + def worker(a, b, result, row):
35 + n = len(b[0])
36 + for j in range(n):
37 + result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
38 +
39 + def worker_multi(a, b, result, start_index, end_index):
40 + n = len(b[0])
41 + for row in range(start_index, end_index):
42 + for j in range(n):
43 + result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
44 +
45 + def rc_output(rc_weights, input_array):
46 + memory = np.maximum(rc_weights.dot(input_array), 0)
47 + return memory
48 +
49 + NUM_PROCESSES=4
50 +
51 + def split(a, n):
52 + k, m = divmod(len(a), n)
53 + return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
54 +
55 + def parallel_rc_output(rc_weights, input_array):
56 + a = rc_weights
57 + b = input_array
58 + #reservoir_weights[:len(expanded_inputs), :]
59 + rows = len(a)
60 + print(b[0])
61 + result = multiprocessing.Array('d', rows * len(b[0]))
62 + result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
63 + processes = []
64 +
65 + # num_rows_per_proc = ceil(rows / NUM_PROCESSES)
66 + # index = 0
67 + # for i in range(NUM_PROCESSES):
68 + # start_index = index
69 + # end_index = max(index + num_rows_per_proc, rows)
70 + # index = end_index + 1
71 +
72 + # p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
73 + # processes.append(p)
74 + # p.start()
75 + for i in range(rows):
76 + # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
77 + # merge reults
78 + p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
79 + processes.append(p)
80 + p.start()
81 + for p in processes:
82 + p.join()
83 + return np.maximum(result_np, 0)
84 +
85 + def input_padding(input_geneID, input_value, GeneLegend, initMem):
86 + input_matrix = np.array(initMem, dtype=float)
87 +
88 + for ig in range(0, len(input_geneID)):
89 + input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
90 +
91 +
92 + return input_matrix
93 +
94 +
95 + #pick 15 random genes from GeneLegend
96 + InputGenes = np.random.choice(GeneLegend, 15, replace=False)
97 + print("InputGene", InputGenes)
98 + # exit()
99 +
100 + output_matrix = []
101 + for i in range(0, 1):
102 + # for i in range(0, len(imageData)):
103 +
104 + imgTemp = imageData.iloc[i].values
105 + # normalize the image
106 + # imgTemp = imgTemp / 255
107 + imgDims = np.sqrt(len(imgTemp)).astype(int)
108 + img = imgTemp.reshape(imgDims, imgDims)
109 + print(img)
110 +
111 + for j in range(0, imgDims):
112 + valueArray = img[:,j]
113 + print(valueArray)
114 +
115 + if j == 0:
116 + input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
117 + Output = parallel_rc_output(WeightMatrix, input_array)
118 + else:
119 + input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
120 + Output = parallel_rc_output(WeightMatrix, input_array)
121 + output_matrix.append(Output)
122 +
123 + with open('ImageClassification/output_matrix.pkl', 'wb') as f:
124 + pk.dump(output_matrix, f)
125 +
126 + #to csv
127 + #output_matrixDF = pd.DataFrame(output_matrix)
128 +
129 + #add column names
130 + #output_matrixDF.columns = GeneLegend
131 + #print(output_matrixDF.head())
132 + #output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')
Newer Older