Tensorflow multi layer Regression

티스토리 뷰

Tensorflow Step By Step

Tensorflow multi layer Regression

꿈이있는 2017. 2. 26. 00:16

이번에는 층을 늘여서 Regression을 해 보겠습니다.

층을 늘이기 위해서 마지막에 Wx+b 층 전 까지 Activation Function을 사용 해서

네트워크들을 연결 해 주었습니다.

Activation 은 Relu를 사용 했습니다.

Relu 를 사용하면서 부터 Vanishing gradient라는 현상을 피할 수 있어서

학습 layer를 깊게 쌓을수 있게 되었다고 합니다.

그림으로 표현해 보면 아래와 같습니다.

그리고 이렇게 층이 많아지면 W,B 도 같이 많아지게되고

이러한 변수들의 초기값을 정하는 것이 중요한 문제가 됩니다.

왜냐하면 위의 그림만 하더라도 W 값이 19*50 + 50*50*18 + 19 개가 되고

초기값이 적절하게 정해지지 못하면 학습 시간이 많이 걸리게 되기 때문 입니다.

이런 Weight initialize 방법또한 여러가지가 있고

계속 더 좋은 방법들이 발견 되고 있습니다.

오늘은 xavier initialize를 사용 해보았습니다.

실제로 해 보면 이런 initialize를 사용하고 사용하지 않고에 따라

초기에 시작하는 cost의 값이 많이 차이가 납니다.

사용하지 않는 경우에는 cost가 4 이상에서 시작하지만 (layer가 많을 수록 보통 커집니다.)

사용하는 경우에서는 cost가 2 이하에서 시작되는 것을 볼 수 있었습니다.

Activation 에 대한 내용이나 Weight 초기화에 대해서

저는 김성훈 교수님의 모두를 위한 딥러닝 강좌가 도움이 많이 되었습니다.

코드는 아래와 같습니다.

import tensorflow as tf

import numpy as np

import math

def xavier_init(n_inputs, n_outputs, uniform=True):

if uniform:

# 6 was used in the paper.

init_range = math.sqrt(6.0 / (n_inputs + n_outputs))

return tf.random_uniform_initializer(-init_range, init_range)

else:

# 3 gives us approximately the same limits as above since this repicks

# values greater than 2 standard deviations from the mean.

stddev = math.sqrt(3.0 / (n_inputs + n_outputs))

return tf.truncated_normal_initializer(stddev=stddev)

x = tf.placeholder(tf.float32, [None, 19])

y_ = tf.placeholder(tf.float32, [None, 1])

W1 = tf.get_variable("W1", shape=[19, 50], initializer=xavier_init(19, 50))

W2 = tf.get_variable("W2", shape=[50, 50], initializer=xavier_init(50, 50))

W3 = tf.get_variable("W3", shape=[50, 50], initializer=xavier_init(50, 50))

W4 = tf.get_variable("W4", shape=[50, 50], initializer=xavier_init(50, 50))

W5 = tf.get_variable("W5", shape=[50, 50], initializer=xavier_init(50, 50))

W6 = tf.get_variable("W6", shape=[50, 50], initializer=xavier_init(50, 50))

W7 = tf.get_variable("W7", shape=[50, 50], initializer=xavier_init(50, 50))

W8 = tf.get_variable("W8", shape=[50, 50], initializer=xavier_init(50, 50))

W9 = tf.get_variable("W9", shape=[50, 50], initializer=xavier_init(50, 50))

W10 = tf.get_variable("W10", shape=[50, 50], initializer=xavier_init(50, 50))

W11 = tf.get_variable("W11", shape=[50, 50], initializer=xavier_init(50, 50))

W12 = tf.get_variable("W12", shape=[50, 50], initializer=xavier_init(50, 50))

W13 = tf.get_variable("W13", shape=[50, 50], initializer=xavier_init(50, 50))

W14 = tf.get_variable("W14", shape=[50, 50], initializer=xavier_init(50, 50))

W15 = tf.get_variable("W15", shape=[50, 50], initializer=xavier_init(50, 50))

W16 = tf.get_variable("W16", shape=[50, 50], initializer=xavier_init(50, 50))

W17 = tf.get_variable("W17", shape=[50, 50], initializer=xavier_init(50, 50))

W18 = tf.get_variable("W18", shape=[50, 50], initializer=xavier_init(50, 50))

W19 = tf.get_variable("W19", shape=[50, 50], initializer=xavier_init(50, 50))

W20 = tf.get_variable("W20", shape=[50, 1], initializer=xavier_init(50, 1))

B1 = tf.Variable(tf.random_normal([50]))

B2 = tf.Variable(tf.random_normal([50]))

B3 = tf.Variable(tf.random_normal([50]))

B4 = tf.Variable(tf.random_normal([50]))

B5 = tf.Variable(tf.random_normal([50]))

B6 = tf.Variable(tf.random_normal([50]))

B7 = tf.Variable(tf.random_normal([50]))

B8 = tf.Variable(tf.random_normal([50]))

B9 = tf.Variable(tf.random_normal([50]))

B10 = tf.Variable(tf.random_normal([50]))

B11 = tf.Variable(tf.random_normal([50]))

B12 = tf.Variable(tf.random_normal([50]))

B13 = tf.Variable(tf.random_normal([50]))

B14 = tf.Variable(tf.random_normal([50]))

B15 = tf.Variable(tf.random_normal([50]))

B16 = tf.Variable(tf.random_normal([50]))

B17 = tf.Variable(tf.random_normal([50]))

B18 = tf.Variable(tf.random_normal([50]))

B19 = tf.Variable(tf.random_normal([50]))

B20 = tf.Variable(tf.random_normal([1]))

L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), B1))

L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2)) # Hidden layer with ReLU activation

L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), B3)) # Hidden layer with ReLU activation

L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), B4)) # Hidden layer with ReLU activation

L5 = tf.nn.relu(tf.add(tf.matmul(L4, W5), B5)) # Hidden layer with ReLU activation

L6 = tf.nn.relu(tf.add(tf.matmul(L5, W6), B6)) # Hidden layer with ReLU activation

L7 = tf.nn.relu(tf.add(tf.matmul(L6, W7), B7)) # Hidden layer with ReLU activation

L8 = tf.nn.relu(tf.add(tf.matmul(L7, W8), B8)) # Hidden layer with ReLU activation

L9 = tf.nn.relu(tf.add(tf.matmul(L8, W9), B9)) # Hidden layer with ReLU activation

L10 = tf.nn.relu(tf.add(tf.matmul(L9, W10), B10)) # Hidden layer with ReLU activation

L11 = tf.nn.relu(tf.add(tf.matmul(L10, W11), B11)) # Hidden layer with ReLU activation

L12 = tf.nn.relu(tf.add(tf.matmul(L11, W12), B12)) # Hidden layer with ReLU activation

L13 = tf.nn.relu(tf.add(tf.matmul(L12, W13), B13)) # Hidden layer with ReLU activation

L14 = tf.nn.relu(tf.add(tf.matmul(L13, W14), B14)) # Hidden layer with ReLU activation

L15 = tf.nn.relu(tf.add(tf.matmul(L14, W15), B15)) # Hidden layer with ReLU activation

L16 = tf.nn.relu(tf.add(tf.matmul(L15, W16), B16)) # Hidden layer with ReLU activation

L17 = tf.nn.relu(tf.add(tf.matmul(L16, W17), B17)) # Hidden layer with ReLU activation

L18 = tf.nn.relu(tf.add(tf.matmul(L17, W18), B18)) # Hidden layer with ReLU activation

L19 = tf.nn.relu(tf.add(tf.matmul(L18, W19), B19)) # Hidden layer with ReLU activation

hypothesis = tf.add(tf.matmul(L19,W20), B20) # No need to use softmax here

squared_deltas1 = tf.square(y_ - hypothesis)

squared_deltas = tf.sqrt(squared_deltas1)

cost = tf.reduce_mean(squared_deltas)

optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)

cost_sum = tf.summary.scalar("cost",cost)

def read_my_file_format(filename_queue):

reader = tf.TextLineReader(skip_header_lines=1)

_, value = reader.read(filename_queue)

record_defaults = [[1], [1], [1], [1], [1],[1], [1], [1], [1], [1],[1], [1], [1], [1], [1],[1], [1], [1], [1], [1]]

record_defaults = [tf.constant([1], dtype=tf.float32),

tf.constant([1], dtype=tf.float32),

tf.constant([1], dtype=tf.float32)]

col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15 ,col16, col17, col18, col19, col20 = tf.decode_csv(value, record_defaults=record_defaults)

features = tf.pack([col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15 ,col16, col17, col18, col19])

label = tf.pack([col20])

return features, label

def input_pipeline(batch_size, num_epochs):

min_after_dequeue = 10000

capacity = min_after_dequeue + 3 * batch_size

filename_queue = tf.train.string_input_producer(["sampledata1999_2008_margin.csv"], num_epochs=num_epochs, shuffle=True)

example, label = read_my_file_format(filename_queue)

example_batch, label_batch = tf.train.shuffle_batch([example, label],

batch_size=batch_size,

capacity=capacity,

min_after_dequeue=min_after_dequeue)

return example_batch, label_batch

examples, labels = input_pipeline(30000,20)

i = 0;

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

#init_op = tf.global_variables_initializer()

sess = tf.Session()

merged = tf.summary.merge_all()

trainwriter =tf.summary.FileWriter("./board/custom", sess.graph)

# Initialize the variables (like the epoch counter).

sess.run(init_op)

print(W1.eval(session = sess))

# Start input enqueue threads.

coord = tf.train.Coordinator()

threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:

while not coord.should_stop():

i = i + 1

example_batch, label_batch = sess.run([examples, labels])

sess.run(optimizer, feed_dict={x: example_batch, y_: label_batch})

if i % 1 == 0:

summary = sess.run(merged, feed_dict={x: example_batch, y_: label_batch})

trainwriter.add_summary(summary,i)

print(cost.eval(feed_dict={x: example_batch, y_: label_batch}, session = sess))

'''

loss = tf.abs(y-y_)

accuracy = tf.reduce_mean(loss)

print(cross_entropy.eval(feed_dict={x: example_batch, y_: label_batch}, session = sess))

'''

except tf.errors.OutOfRangeError:

print('Done training -- epoch limit reached')

finally:

# When done, ask the threads to stop.

coord.request_stop()

# Wait for threads to finish.

coord.join(threads)

sess.close()

Graph를 살펴보면 기존보다 많은 연결들이 보입니다.

그렇지만 연결이 단순해서 사실 아직 텐서보드가 유용한지는 잘 모르겠습니다.

텐서플로우를 사용하면서 좋다고 느껴지는 점이 바로 자동으로 CPU를 병렬 연산 해 주는 부분 입니다.

병렬 처리를 위한 프로그램을 만들어보면 여러가지 어려운 점도 많고

막상 구현해도 속도가 생각했던 것 만큼 나오지 않는 경우도 많았는데

텐서플로우는 쉽게 병렬처리를 가능하게 해 줍니다.

학습중에 CPU 부하입니다.

학습 결과는

1 layer가 오차가 30% 정도 였고

7 layer가 25% 정도 였습니다. (20분 소요)

지금 20 Layer를 돌리고 있는데 시간이 꽤 걸릴 것 같습니다.

1 layer에서 7 layer에서 5% 가 향상된 것이 별것 아닌 것 같으면서도

복잡한 프로그램을 만든 것이 아니라 단순히 층을 늘인 것 만으로도

향상이 있다는 것은 놀라운 일 인것 같습니다.

저작자표시 비영리 변경금지 (새창열림)

'Tensorflow Step By Step' 카테고리의 다른 글

Tensorflow 1 layer Regression (0)	2017.02.25
Tensorflow CSV File Read 2 (0)	2017.02.22
Tensorflow CSV File Read 1 (0)	2017.02.16
Tensorboard 사용하기 2 (0)	2017.02.12
Tensorboard 사용하기 1 (5)	2017.02.12

공유하기 링크

페이스북
카카오스토리
트위터

Total

Today

Yesterday

최근에 올라온 글

최근에 달린 댓글

TAG more

Tensorflow step by step

티스토리 뷰

Tensorflow multi layer Regression

'Tensorflow Step By Step' 카테고리의 다른 글

티스토리툴바