Revision of MNISTGRNN_multi.py

jackz revised this gist 1741880539. Go to revision

No changes

Jackz revised this gist 1732638768. Go to revision

1 file changed, 132 insertions

MNISTGRNN_multi.py(file created)

		@@ -0,0 +1,132 @@
1	+	import numpy as np
2	+	import pandas as pd
3	+	import matplotlib.pyplot as plt
4	+	import pickle as pk
5	+	import multiprocessing
6	+	import time
7	+
8	+	MemoryValue = 0.55
9	+
10	+	start_time = time.time()
11	+	reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
12	+	transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
13	+	imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
14	+	print(time.time() - start_time)
15	+	print(imageData.head())
16	+
17	+	print(transcriptomicsDF.head())
18	+	# exit()
19	+	GeneLegend = list(reservoir_weightsDF.columns)
20	+
21	+	WeightMatrix = reservoir_weightsDF.values
22	+	# print(GeneLegend)
23	+
24	+	TranscriptomicData = transcriptomicsDF[GeneLegend]
25	+
26	+	InitialMemory = list(TranscriptomicData.iloc[0])
27	+	print("InitialMemory", InitialMemory)
28	+
29	+
30	+
31	+	def getMaxExpression(TranscriptomicDF, GeneID):
32	+	return TranscriptomicDF[GeneID].max()
33	+
34	+	def worker(a, b, result, row):
35	+	n = len(b[0])
36	+	for j in range(n):
37	+	result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
38	+
39	+	def worker_multi(a, b, result, start_index, end_index):
40	+	n = len(b[0])
41	+	for row in range(start_index, end_index):
42	+	for j in range(n):
43	+	result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
44	+
45	+	def rc_output(rc_weights, input_array):
46	+	memory = np.maximum(rc_weights.dot(input_array), 0)
47	+	return memory
48	+
49	+	NUM_PROCESSES=4
50	+
51	+	def split(a, n):
52	+	k, m = divmod(len(a), n)
53	+	return (a[ik+min(i, m):(i+1)k+min(i+1, m)] for i in range(n))
54	+
55	+	def parallel_rc_output(rc_weights, input_array):
56	+	a = rc_weights
57	+	b = input_array
58	+	#reservoir_weights[:len(expanded_inputs), :]
59	+	rows = len(a)
60	+	print(b[0])
61	+	result = multiprocessing.Array('d', rows * len(b[0]))
62	+	result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
63	+	processes = []
64	+
65	+	# num_rows_per_proc = ceil(rows / NUM_PROCESSES)
66	+	# index = 0
67	+	# for i in range(NUM_PROCESSES):
68	+	# start_index = index
69	+	# end_index = max(index + num_rows_per_proc, rows)
70	+	# index = end_index + 1
71	+
72	+	# p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
73	+	# processes.append(p)
74	+	# p.start()
75	+	for i in range(rows):
76	+	# only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
77	+	# merge reults
78	+	p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
79	+	processes.append(p)
80	+	p.start()
81	+	for p in processes:
82	+	p.join()
83	+	return np.maximum(result_np, 0)
84	+
85	+	def input_padding(input_geneID, input_value, GeneLegend, initMem):
86	+	input_matrix = np.array(initMem, dtype=float)
87	+
88	+	for ig in range(0, len(input_geneID)):
89	+	input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
90	+
91	+
92	+	return input_matrix
93	+
94	+
95	+	#pick 15 random genes from GeneLegend
96	+	InputGenes = np.random.choice(GeneLegend, 15, replace=False)
97	+	print("InputGene", InputGenes)
98	+	# exit()
99	+
100	+	output_matrix = []
101	+	for i in range(0, 1):
102	+	# for i in range(0, len(imageData)):
103	+
104	+	imgTemp = imageData.iloc[i].values
105	+	# normalize the image
106	+	# imgTemp = imgTemp / 255
107	+	imgDims = np.sqrt(len(imgTemp)).astype(int)
108	+	img = imgTemp.reshape(imgDims, imgDims)
109	+	print(img)
110	+
111	+	for j in range(0, imgDims):
112	+	valueArray = img[:,j]
113	+	print(valueArray)
114	+
115	+	if j == 0:
116	+	input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
117	+	Output = parallel_rc_output(WeightMatrix, input_array)
118	+	else:
119	+	input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
120	+	Output = parallel_rc_output(WeightMatrix, input_array)
121	+	output_matrix.append(Output)
122	+
123	+	with open('ImageClassification/output_matrix.pkl', 'wb') as f:
124	+	pk.dump(output_matrix, f)
125	+
126	+	#to csv
127	+	#output_matrixDF = pd.DataFrame(output_matrix)
128	+
129	+	#add column names
130	+	#output_matrixDF.columns = GeneLegend
131	+	#print(output_matrixDF.head())
132	+	#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')

Newer Older