这是我的代码,我尝试了几种方法无法解决,想知道这样做的好方法是什么,最好的方法是什么?
我一直怀疑我在某些地方添加了一个新节点,但我找不到原因。这个问题我以前从未遇到过,我也尝试手动清除内存,但没有成功。有一个问题,运行这段代码时,我的GPU工作效率只有2%,可以提高GPU的效率吗?
import tensorflow as tf
import numpy as np
import sklearn.metrics
import copy
import time
import gc
class DataDeal():
def __init__(self, batch_size):
self._batch_size = batch_size
def readTfrecord(self, file, epoch=None, isTrain=True):
fileQueue = tf.train.string_input_producer(string_tensor=[file], num_epochs=epoch, shuffle=True)
reader = tf.TFRecordReader()
_, example_series = reader.read(queue=fileQueue)
features = tf.parse_single_example(serialized=example_series,
features={"label": tf.FixedLenFeature([], tf.string),
"data_raw": tf.FixedLenFeature([], tf.string)})
data = tf.decode_raw(features["data_raw"], out_type=tf.float32)
label = tf.decode_raw(features["label"], out_type=tf.int32)
data = tf.reshape(data, shape=(1, 600))
label = tf.reshape(label, shape=(1, 2))
if isTrain:
data, label = tf.train.shuffle_batch([data, label], batch_size=self._batch_size, capacity=2000,
min_after_dequeue=500, num_threads=3)
else:
assert epoch == None, "wrong!"
data, label = tf.train.batch([data, label], batch_size=500)
return (data, label)
class DNN(DataDeal):
def __init__(self, layer_shape, epoch, eta, batch_size, norm=True, L2_loss=True):
super(DNN, self).__init__(batch_size)
self._layer_shape = [600] + layer_shape + [2]
self.batch_size = batch_size
self._norm = norm
self.L2_loss = L2_loss
self.eta = eta
self.epoch = epoch
with tf.name_scope("input"):
self.x = tf.placeholder(dtype=tf.float32, shape=(None, 600), name="input_X")
self.y = tf.placeholder(dtype=tf.int32, shape=(None, 2), name="label")
self.keep_pro = tf.placeholder(dtype=tf.float32, name="keep_pro")
self._y = tf.cast(self.y, dtype=tf.float32)
with tf.variable_scope("dnn"):
self._W = [tf.get_variable(name="layerW_%d" % index, shape=(x[0], x[1]), dtype=tf.float32,
initializer=tf.truncated_normal_initializer())
for index, x in enumerate(list(zip(self._layer_shape[:-1], self._layer_shape[1:])))]
self._B = [tf.get_variable(name="layerB_%d" % index, shape=(1, x1), dtype=tf.float32,
initializer=tf.truncated_normal_initializer())
for index, x1 in enumerate(self._layer_shape[1:])]
self.global_step = tf.get_variable(name="global_step", dtype=tf.int32, initializer=0,
trainable=False) # GLOBAL STEP
def batch_normalization(self, input_):
mean, var = tf.nn.moments(input_, [0, 1], keep_dims=True)
shift = tf.get_variable(shape=[1, input_.get_shape().as_list()[-1]], dtype=tf.float32,
initializer=tf.zeros_initializer(), name="shift_1")
scale = tf.get_variable(shape=[1, input_.get_shape().as_list()[-1]], dtype=tf.float32,
initializer=tf.constant_initializer(1.0), name="scale_1")
# shift= tf.Variable(initial_value=tf.truncated_normal(shape=[1, input_.get_shape().as_list()[-1]],dtype=tf.float32))
epsilon = 1e-3
output = tf.nn.batch_normalization(input_, mean, var, shift, scale, epsilon)
return output
def run(self):
first_output = tf.add(tf.matmul(self.x, self._W[0]), self._B[0])
if self._norm:
first_output = tf.nn.relu(self.batch_normalization(input_=first_output)) # relu+BN
else:
first_output = tf.nn.sigmoid(first_output)
for i in range(1, len(self._W) - 1):
if self._norm:
first_output = tf.add(tf.matmul(first_output, self._W[i]), self._B[i])
with tf.variable_scope("layer%d" % i):
first_output = tf.nn.relu(self.batch_normalization(input_=first_output))
else:
first_output = tf.sigmoid(tf.add(tf.matmul(first_output, self._W[i]), self._B[i]))
first_output = tf.nn.dropout(first_output, keep_prob=self.keep_pro)
last_output = tf.add(tf.matmul(first_output, self._W[-1]), self._B[-1])
return last_output
if __name__ == "__main__":
file_train = "D:/traindata/XIEBO/train.tfrecords"
file_test = "D:/traindata/XIEBO/test.tfrecords"
dnn_object = DNN(layer_shape=[512, 128],
epoch=5000,
eta=0.001,
batch_size=128,
norm=False,
L2_loss=True)
data, label = dnn_object.readTfrecord(file=file_train, epoch=dnn_object.epoch,
isTrain=True)
data_test, label_test = dnn_object.readTfrecord(file=file_test, epoch=None, isTrain=False)
output = tf.nn.softmax(dnn_object.run())
tvars = copy.copy(tf.trainable_variables())
loss = tf.nn.softmax_cross_entropy_with_logits(labels=dnn_object.y, logits=output)
if dnn_object.L2_loss:
loss_l2 = 0.0005 * tf.reduce_sum([tf.nn.l2_loss(x) for x in tvars])
loss += loss_l2
train_first_op = tf.train.GradientDescentOptimizer(learning_rate=dnn_object.eta).minimize(loss=loss,
global_step=dnn_object.global_step)
variable_averages = tf.train.ExponentialMovingAverage(decay=0.999, num_updates=dnn_object.global_step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([train_first_op]):
train_step = tf.group(variable_averages_op)
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
# sess.graph.finalize()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
data_test, label_test = sess.run([data_test, label_test])
data_test = np.reshape(data_test, newshape=(-1, 600))
label_test = np.reshape(label_test, newshape=(-1, 2))
step = 0
try:
while not coord.should_stop():
data_batch, label_batch = sess.run([data, label])
data_batch = np.reshape(data_batch, newshape=(-1, 600))
label_batch = np.reshape(label_batch, newshape=(-1, 2))
_, pred_label, loss_look = sess.run([train_step, output, tf.reduce_mean(loss)], feed_dict={
dnn_object.x: data_batch, dnn_object.y: label_batch, dnn_object.keep_pro: 0.8})
if (step != 0) & (step % 100 == 0):
print("now is step %d, trainning acc is %s, trainning loss is %s" % (
step, np.mean(np.equal(np.argmax(pred_label, axis=1), np.argmax(label_batch, axis=1))),
loss_look
))
del data_batch, label_batch, pred_label, loss_look
gc.collect()
pred_label, loss_look = sess.run([output, tf.reduce_mean(loss)], feed_dict={
dnn_object.x: data_test, dnn_object.y: label_test, dnn_object.keep_pro: 1.0})
print("now is step %d, testing acc is %s, testing loss is %s" % (
step, np.mean(np.equal(np.argmax(pred_label, axis=1), np.argmax(label_test, axis=1))),
loss_look
))
del pred_label, loss_look
gc.collect()
print(step, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
else:
del data_batch, label_batch, pred_label, loss_look
gc.collect()
step += 1
except tf.errors.OutOfRangeError as e:
print(e, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
finally:
coord.request_stop()
coord.join(threads=threads)
print("over", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
我只是解决了这个问题。原因是我用这个代码:
_, pred_label, loss_look = sess.run([train_step, output, tf.reduce_mean(loss)], feed_dict={dnn_object.x: data_batch, dnn_object.y: label_batch, dnn_object.keep_pro: 0.8})\
当我训练我的模型时,tf.reduce_mean(loss)在每个循环中构建了一个新的Variable或Opt。现在我想知道如何提高GPU效率?或者它可能是不能人为改进的。