Linear regression

Goal

Make linear model to predict y value.

Data

  • x: birth_rate
  • y: life_expectancy
  • M: 190
Country Birth_rate  Life_expectancy
Vietnam 1.822   74.828243902
Vanuatu 3.869   70.819487805
Tonga   3.911   72.150658537

Plan

  • Load data
  • define input and target.
  • Create model
  • define forward model
  • define optimize model
    • define loss function.
  • Train data
  • Optimize loss function.
  • Measure performance.
  • predict data.
  • Get measure cost.

Model

  • Linear regression.

$f(x) = w * X + b$

$loss = (y - \hat y)^2$

$cost(\theta) = \sum_{i=0}^{M}loss(x^{(i)})$

Code

Use Place holder.

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

df = pd.read_csv('./examples/data/birth_life_2010.txt', delimiter='\t')
df.describe()
df.shape[0]
input_label = 'Birthrate'
target_label = 'Lifeexpectancy'

X = tf.placeholder(tf.float32, name='X')
y = tf.placeholder(tf.float32, name='y')
W = tf.get_variable('w', initializer=tf.constant(0.0))
b = tf.get_variable('b', initializer=tf.constant(0.0))
init_variables = tf.global_variables_initializer()

y_hat = tf.multiply(W, X) + b
loss = tf.sqrt((y - y_hat)**2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2).minimize(loss)

with tf.Session() as sess:
    sess.run(init_variables)
    writer = tf.summary.FileWriter('./graphs/linear_reg2', sess.graph)
    for batch in range(100):
        cost = 0
        for i in range(df.shape[0]):
            _, loss_out = sess.run([optimizer, loss], feed_dict={X: df[input_label][i], y: df[target_label][i]})
            cost += loss_out
        W_out, b_out = sess.run([W, b])
        print(W_out, b_out, cost)
    writer.close()


plt.scatter(df[input_label], df[target_label])
x_min = df[input_label].min()
x_max = df[input_label].max()
plt.plot([x_min, x_max], [W_out * x_min + b_out, W_out * x_max + b_out])
plt.show()

Use dataset

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

df = pd.read_csv('./examples/data/birth_life_2010.txt', delimiter='\t')
df.describe()
dataset = tf.data.Dataset.from_tensor_slices((df['Birthrate'], df['Lifeexpectancy']))
iterator = dataset.make_initializable_iterator()
X, y = iterator.get_next()

# with tf.Session() as sess:
#     sess.run(iterator.initializer)
#     sess.run(iterator.get_next())

W = tf.get_variable('weights', initializer=tf.constant(0.0, dtype=tf.float64))
b = tf.get_variable('bias', initializer=tf.constant(0.0, dtype=tf.float64))
y_hat = W * X + b
loss = tf.square(y - y_hat)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3).minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(100):
        sess.run(iterator.initializer)
        cost = 0
        try:
            while True:
                _, out_loss = sess.run([optimizer, loss])
                cost += out_loss
        except:
            pass
        out_W, out_b = sess.run([W, b])
        print('epoch %d out_W %f out_b %f cost %f' % (epoch, out_W, out_b, cost))

Resource