MNISTGRNN_multi.py

MNISTGRNN_multi.py · 3.9 KiB · Python Raw

import numpy as np import pandas as pd import matplotlib.pyplot as plt import pickle as pk import multiprocessing import time MemoryValue = 0.55 start_time = time.time() reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0) transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0) imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0) print(time.time() - start_time) print(imageData.head()) print(transcriptomicsDF.head()) # exit() GeneLegend = list(reservoir_weightsDF.columns) WeightMatrix = reservoir_weightsDF.values # print(GeneLegend) TranscriptomicData = transcriptomicsDF[GeneLegend] InitialMemory = list(TranscriptomicData.iloc[0]) print("InitialMemory", InitialMemory) def getMaxExpression(TranscriptomicDF, GeneID): return TranscriptomicDF[GeneID].max() def worker(a, b, result, row): n = len(b[0]) for j in range(n): result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) def worker_multi(a, b, result, start_index, end_index): n = len(b[0]) for row in range(start_index, end_index): for j in range(n): result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b))) def rc_output(rc_weights, input_array): memory = np.maximum(rc_weights.dot(input_array), 0) return memory NUM_PROCESSES=4 def split(a, n): k, m = divmod(len(a), n) return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) def parallel_rc_output(rc_weights, input_array): a = rc_weights b = input_array #reservoir_weights[:len(expanded_inputs), :] rows = len(a) print(b[0]) result = multiprocessing.Array('d', rows * len(b[0])) result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0]))) processes = [] # num_rows_per_proc = ceil(rows / NUM_PROCESSES) # index = 0 # for i in range(NUM_PROCESSES): # start_index = index # end_index = max(index + num_rows_per_proc, rows) # index = end_index + 1 # p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index)) # processes.append(p) # p.start() for i in range(rows): # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...] # merge reults p = multiprocessing.Process(target=worker, args=(a, b, result_np, i)) processes.append(p) p.start() for p in processes: p.join() return np.maximum(result_np, 0) def input_padding(input_geneID, input_value, GeneLegend, initMem): input_matrix = np.array(initMem, dtype=float) for ig in range(0, len(input_geneID)): input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig]) return input_matrix #pick 15 random genes from GeneLegend InputGenes = np.random.choice(GeneLegend, 15, replace=False) print("InputGene", InputGenes) # exit() output_matrix = [] for i in range(0, 1): # for i in range(0, len(imageData)): imgTemp = imageData.iloc[i].values # normalize the image # imgTemp = imgTemp / 255 imgDims = np.sqrt(len(imgTemp)).astype(int) img = imgTemp.reshape(imgDims, imgDims) print(img) for j in range(0, imgDims): valueArray = img[:,j] print(valueArray) if j == 0: input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory) Output = parallel_rc_output(WeightMatrix, input_array) else: input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue) Output = parallel_rc_output(WeightMatrix, input_array) output_matrix.append(Output) with open('ImageClassification/output_matrix.pkl', 'wb') as f: pk.dump(output_matrix, f) #to csv #output_matrixDF = pd.DataFrame(output_matrix) #add column names #output_matrixDF.columns = GeneLegend #print(output_matrixDF.head()) #output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')

1	import numpy as np
2	import pandas as pd
3	import matplotlib.pyplot as plt
4	import pickle as pk
5	import multiprocessing
6	import time
7
8	MemoryValue = 0.55
9
10	start_time = time.time()
11	reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
12	transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
13	imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
14	print(time.time() - start_time)
15	print(imageData.head())
16
17	print(transcriptomicsDF.head())
18	# exit()
19	GeneLegend = list(reservoir_weightsDF.columns)
20
21	WeightMatrix = reservoir_weightsDF.values
22	# print(GeneLegend)
23
24	TranscriptomicData = transcriptomicsDF[GeneLegend]
25
26	InitialMemory = list(TranscriptomicData.iloc[0])
27	print("InitialMemory", InitialMemory)
28
29
30
31	def getMaxExpression(TranscriptomicDF, GeneID):
32	return TranscriptomicDF[GeneID].max()
33
34	def worker(a, b, result, row):
35	n = len(b[0])
36	for j in range(n):
37	result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
38
39	def worker_multi(a, b, result, start_index, end_index):
40	n = len(b[0])
41	for row in range(start_index, end_index):
42	for j in range(n):
43	result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
44
45	def rc_output(rc_weights, input_array):
46	memory = np.maximum(rc_weights.dot(input_array), 0)
47	return memory
48
49	NUM_PROCESSES=4
50
51	def split(a, n):
52	k, m = divmod(len(a), n)
53	return (a[ik+min(i, m):(i+1)k+min(i+1, m)] for i in range(n))
54
55	def parallel_rc_output(rc_weights, input_array):
56	a = rc_weights
57	b = input_array
58	#reservoir_weights[:len(expanded_inputs), :]
59	rows = len(a)
60	print(b[0])
61	result = multiprocessing.Array('d', rows * len(b[0]))
62	result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
63	processes = []
64
65	# num_rows_per_proc = ceil(rows / NUM_PROCESSES)
66	# index = 0
67	# for i in range(NUM_PROCESSES):
68	# start_index = index
69	# end_index = max(index + num_rows_per_proc, rows)
70	# index = end_index + 1
71
72	# p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
73	# processes.append(p)
74	# p.start()
75	for i in range(rows):
76	# only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
77	# merge reults
78	p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
79	processes.append(p)
80	p.start()
81	for p in processes:
82	p.join()
83	return np.maximum(result_np, 0)
84
85	def input_padding(input_geneID, input_value, GeneLegend, initMem):
86	input_matrix = np.array(initMem, dtype=float)
87
88	for ig in range(0, len(input_geneID)):
89	input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
90
91
92	return input_matrix
93
94
95	#pick 15 random genes from GeneLegend
96	InputGenes = np.random.choice(GeneLegend, 15, replace=False)
97	print("InputGene", InputGenes)
98	# exit()
99
100	output_matrix = []
101	for i in range(0, 1):
102	# for i in range(0, len(imageData)):
103
104	imgTemp = imageData.iloc[i].values
105	# normalize the image
106	# imgTemp = imgTemp / 255
107	imgDims = np.sqrt(len(imgTemp)).astype(int)
108	img = imgTemp.reshape(imgDims, imgDims)
109	print(img)
110
111	for j in range(0, imgDims):
112	valueArray = img[:,j]
113	print(valueArray)
114
115	if j == 0:
116	input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
117	Output = parallel_rc_output(WeightMatrix, input_array)
118	else:
119	input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
120	Output = parallel_rc_output(WeightMatrix, input_array)
121	output_matrix.append(Output)
122
123	with open('ImageClassification/output_matrix.pkl', 'wb') as f:
124	pk.dump(output_matrix, f)
125
126	#to csv
127	#output_matrixDF = pd.DataFrame(output_matrix)
128
129	#add column names
130	#output_matrixDF.columns = GeneLegend
131	#print(output_matrixDF.head())
132	#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')