Discuss@GL4L

Linear Regression Model- Hands on


#1

Estimate likely insurance expenses given attributes of a customer)

import org.apache.spark.mllib.linalg.Vectors

import org.apache.spark.mllib.regression.LabeledPoint

import org.apache.spark.mllib.regression.LinearRegressionModel

import org.apache.spark.mllib.regression.LinearRegressionWithSGD

// Load and parse the data

val data = sc.textFile("data/mllib/ridge-data/lpsa.data")

val parsedData = data.map { line =>

val parts = line.split(’,’)

LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(’ ').map(_.toDouble)))

}.cache()

// Building the model

val numIterations = 100

val stepSize = 0.00000001

val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)

// Evaluate model on training examples and compute training error

val valuesAndPreds = parsedData.map { point =>

val prediction = model.predict(point.features)

(point.label, prediction)

}

val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()

println("training Mean Squared Error = " + MSE)

// Save and load model

model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")

val sameModel = LinearRegressionModel.load(sc, "target/tmp/scalaLinearRegressionWithSGDModel")

http://spark.apache.org/docs/latest/mllib-linear-methods.html#linear-least-squares-lasso-and-ridge-regression