From 56e1b2440728773ade3b8a384ec887834be6f856 Mon Sep 17 00:00:00 2001 From: dmsuehir Date: Mon, 19 Dec 2016 09:55:55 -0800 Subject: [PATCH] Renaming the PCA model scala parameter from right_singlar_vectors to rightSinglarVectors --- .../models/dimreduction/pca/PcaModel.scala | 20 +++++++++---------- .../dimreduction/pca/SvdAlgorithm.scala | 6 +++--- .../models/dimreduction/PcaModelTest.scala | 2 +- .../dimensionality_reduction/__init__.py | 17 ---------------- python/daaltk/models/dimreduction/__init__.py | 3 ++- python/daaltk/models/dimreduction/pca.py | 2 +- 6 files changed, 17 insertions(+), 33 deletions(-) delete mode 100644 python/daaltk/models/dimensionality_reduction/__init__.py diff --git a/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/PcaModel.scala b/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/PcaModel.scala index cd94914..83db01a 100644 --- a/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/PcaModel.scala +++ b/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/PcaModel.scala @@ -88,7 +88,7 @@ object PcaModel extends TkSaveableObject { m.meanCentered, new MllibDenseVector(m.meanVector), new MllibDenseVector(m.singularValues), - new DenseMatrix(m.vFactorRows, m.vFactorCols, m.right_singular_vectors), + new DenseMatrix(m.vFactorRows, m.vFactorCols, m.rightSingularVectors), m.leftSingularMatrix) // Create PrincipalComponentsModel to return @@ -141,8 +141,8 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM /** * Right singular vectors of the specified columns in the input frame */ - def right_singular_vectors: Array[Array[Double]] = { - val lists = svdData.right_singular_vectors.toListOfList() + def rightSingularVectors: Array[Array[Double]] = { + val lists = svdData.rightSingularVectors.toListOfList() lists.map(list => list.toArray).toArray } @@ -192,7 +192,7 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM predictColumns, meanCentered, columnMeans) - val principalComponents = PrincipalComponentsFunctions.computePrincipalComponents(svdData.right_singular_vectors, predictC, indexedRowMatrix) + val principalComponents = PrincipalComponentsFunctions.computePrincipalComponents(svdData.rightSingularVectors, predictC, indexedRowMatrix) val pcaColumns = for (i <- 1 to predictC) yield Column("p_" + i.toString, DataTypes.float64) val (componentColumns, components) = tSquaredIndex match { @@ -223,9 +223,9 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM svdData.meanCentered, svdData.meanVector.toArray, svdData.singularValues.toArray, - svdData.right_singular_vectors.toArray, - svdData.right_singular_vectors.numRows, - svdData.right_singular_vectors.numCols, + svdData.rightSingularVectors.toArray, + svdData.rightSingularVectors.numRows, + svdData.rightSingularVectors.numCols, svdData.leftSingularMatrix) TkSaveLoad.saveTk(sc, path, PcaModel.formatId, PcaModel.currentFormatVersion, tkMetadata) } @@ -243,7 +243,7 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM val meanCenteredVector: Array[Double] = (new DenseVector(x) - new DenseVector(columnMeans.toArray)).toArray inputVector = new MllibDenseVector(meanCenteredVector) } - val y = new MllibDenseMatrix(1, inputVector.size, inputVector.toArray).multiply(svdData.right_singular_vectors.asInstanceOf[MllibDenseMatrix]) + val y = new MllibDenseMatrix(1, inputVector.size, inputVector.toArray).multiply(svdData.rightSingularVectors.asInstanceOf[MllibDenseMatrix]) val yArray: Array[Double] = y.values var t_squared_index: Double = 0.0 for (i <- 0 until k) { @@ -288,7 +288,7 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM * @param meanCentered Indicator whether the columns were mean centered for training * @param meanVector Means of the columns * @param singularValues Singular values of the specified columns in the input frame - * @param right_singular_vectors Right singular vectors of the specified columns in the input frame + * @param rightSingularVectors Right singular vectors of the specified columns in the input frame * @param vFactorRows Number of rows in vFactor matrix * @param vFactorCols Number of columns in vFactor matrix * @param leftSingularMatrix Optional RDD with left singular vectors of the specified columns in the input frame @@ -298,7 +298,7 @@ case class PrincipalComponentsTkMetaData(k: Int, meanCentered: Boolean, meanVector: Array[Double], singularValues: Array[Double], - right_singular_vectors: Array[Double], + rightSingularVectors: Array[Double], vFactorRows: Int, vFactorCols: Int, leftSingularMatrix: Option[RDD[Vector]]) extends Serializable \ No newline at end of file diff --git a/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/SvdAlgorithm.scala b/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/SvdAlgorithm.scala index 3a01f7d..994ef40 100644 --- a/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/SvdAlgorithm.scala +++ b/daaltk-core/src/main/scala/org/trustedanalytics/daaltk/models/dimreduction/pca/SvdAlgorithm.scala @@ -221,7 +221,7 @@ case class PrincipalComponentsData(k: Int, * @param meanCentered Indicator whether the columns were mean centered for training * @param meanVector Means of the columns * @param singularValues Singular values of the specified columns in the input frame - * @param right_singular_vectors Right singular vectors of the specified columns in the input frame + * @param rightSingularVectors Right singular vectors of the specified columns in the input frame * @param leftSingularMatrix Optional RDD with left singular vectors of the specified columns in the input frame */ case class SvdData(k: Int, @@ -229,7 +229,7 @@ case class SvdData(k: Int, meanCentered: Boolean, meanVector: Vector, singularValues: Vector, - right_singular_vectors: Matrix, + rightSingularVectors: Matrix, leftSingularMatrix: Option[RDD[Vector]]) { require(observationColumns != null && observationColumns.nonEmpty, "observationColumns must not be null nor empty") require(k >= 1, "number of Eigen values to use must be greater than equal to 1") @@ -245,7 +245,7 @@ case class SvdData(k: Int, meanCentered, meanVector, singularValues, - right_singular_vectors) + rightSingularVectors) } } diff --git a/daaltk-core/src/test/scala/org/trustedanalytics/daaltk/models/dimreduction/PcaModelTest.scala b/daaltk-core/src/test/scala/org/trustedanalytics/daaltk/models/dimreduction/PcaModelTest.scala index f66c4a8..a46cbd2 100644 --- a/daaltk-core/src/test/scala/org/trustedanalytics/daaltk/models/dimreduction/PcaModelTest.scala +++ b/daaltk-core/src/test/scala/org/trustedanalytics/daaltk/models/dimreduction/PcaModelTest.scala @@ -37,7 +37,7 @@ class PcaModelTest extends TestingSparkContextWordSpec with Matchers { Column("5", DataTypes.float32), Column("6", DataTypes.float32))) - "PrincipalComponentsModel" should { + "PcaModel" should { "create a DAAL PrincipalComponentsModel train and predict" in { val rdd = sparkContext.parallelize(frameData) diff --git a/python/daaltk/models/dimensionality_reduction/__init__.py b/python/daaltk/models/dimensionality_reduction/__init__.py deleted file mode 100644 index cb7fdb5..0000000 --- a/python/daaltk/models/dimensionality_reduction/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# vim: set encoding=utf-8 - -# Copyright (c) 2016 Intel Corporation  -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -#       http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - diff --git a/python/daaltk/models/dimreduction/__init__.py b/python/daaltk/models/dimreduction/__init__.py index 9b363b1..cb7fdb5 100644 --- a/python/daaltk/models/dimreduction/__init__.py +++ b/python/daaltk/models/dimreduction/__init__.py @@ -13,4 +13,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# \ No newline at end of file +# + diff --git a/python/daaltk/models/dimreduction/pca.py b/python/daaltk/models/dimreduction/pca.py index e5fa6f7..2d7affa 100644 --- a/python/daaltk/models/dimreduction/pca.py +++ b/python/daaltk/models/dimreduction/pca.py @@ -246,7 +246,7 @@ def right_singular_vectors(self): """ Right singular vectors of the specified columns in the input frame """ - return [list(i) for i in list(self._scala.right_singular_vectors())] + return [list(i) for i in list(self._scala.rightSingularVectors())] def predict(self, frame, mean_centered=True, t_squared_index=False, observation_columns=None, c=None): """