Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ object PcaModel extends TkSaveableObject {
m.meanCentered,
new MllibDenseVector(m.meanVector),
new MllibDenseVector(m.singularValues),
new DenseMatrix(m.vFactorRows, m.vFactorCols, m.right_singular_vectors),
new DenseMatrix(m.vFactorRows, m.vFactorCols, m.rightSingularVectors),
m.leftSingularMatrix)

// Create PrincipalComponentsModel to return
Expand Down Expand Up @@ -141,8 +141,8 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM
/**
* Right singular vectors of the specified columns in the input frame
*/
def right_singular_vectors: Array[Array[Double]] = {
val lists = svdData.right_singular_vectors.toListOfList()
def rightSingularVectors: Array[Array[Double]] = {
val lists = svdData.rightSingularVectors.toListOfList()

lists.map(list => list.toArray).toArray
}
Expand Down Expand Up @@ -192,7 +192,7 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM
predictColumns,
meanCentered,
columnMeans)
val principalComponents = PrincipalComponentsFunctions.computePrincipalComponents(svdData.right_singular_vectors, predictC, indexedRowMatrix)
val principalComponents = PrincipalComponentsFunctions.computePrincipalComponents(svdData.rightSingularVectors, predictC, indexedRowMatrix)

val pcaColumns = for (i <- 1 to predictC) yield Column("p_" + i.toString, DataTypes.float64)
val (componentColumns, components) = tSquaredIndex match {
Expand Down Expand Up @@ -223,9 +223,9 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM
svdData.meanCentered,
svdData.meanVector.toArray,
svdData.singularValues.toArray,
svdData.right_singular_vectors.toArray,
svdData.right_singular_vectors.numRows,
svdData.right_singular_vectors.numCols,
svdData.rightSingularVectors.toArray,
svdData.rightSingularVectors.numRows,
svdData.rightSingularVectors.numCols,
svdData.leftSingularMatrix)
TkSaveLoad.saveTk(sc, path, PcaModel.formatId, PcaModel.currentFormatVersion, tkMetadata)
}
Expand All @@ -243,7 +243,7 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM
val meanCenteredVector: Array[Double] = (new DenseVector(x) - new DenseVector(columnMeans.toArray)).toArray
inputVector = new MllibDenseVector(meanCenteredVector)
}
val y = new MllibDenseMatrix(1, inputVector.size, inputVector.toArray).multiply(svdData.right_singular_vectors.asInstanceOf[MllibDenseMatrix])
val y = new MllibDenseMatrix(1, inputVector.size, inputVector.toArray).multiply(svdData.rightSingularVectors.asInstanceOf[MllibDenseMatrix])
val yArray: Array[Double] = y.values
var t_squared_index: Double = 0.0
for (i <- 0 until k) {
Expand Down Expand Up @@ -288,7 +288,7 @@ case class PcaModel(svdData: SvdData) extends Serializable with Model with DaalM
* @param meanCentered Indicator whether the columns were mean centered for training
* @param meanVector Means of the columns
* @param singularValues Singular values of the specified columns in the input frame
* @param right_singular_vectors Right singular vectors of the specified columns in the input frame
* @param rightSingularVectors Right singular vectors of the specified columns in the input frame
* @param vFactorRows Number of rows in vFactor matrix
* @param vFactorCols Number of columns in vFactor matrix
* @param leftSingularMatrix Optional RDD with left singular vectors of the specified columns in the input frame
Expand All @@ -298,7 +298,7 @@ case class PrincipalComponentsTkMetaData(k: Int,
meanCentered: Boolean,
meanVector: Array[Double],
singularValues: Array[Double],
right_singular_vectors: Array[Double],
rightSingularVectors: Array[Double],
vFactorRows: Int,
vFactorCols: Int,
leftSingularMatrix: Option[RDD[Vector]]) extends Serializable
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,15 @@ case class PrincipalComponentsData(k: Int,
* @param meanCentered Indicator whether the columns were mean centered for training
* @param meanVector Means of the columns
* @param singularValues Singular values of the specified columns in the input frame
* @param right_singular_vectors Right singular vectors of the specified columns in the input frame
* @param rightSingularVectors Right singular vectors of the specified columns in the input frame
* @param leftSingularMatrix Optional RDD with left singular vectors of the specified columns in the input frame
*/
case class SvdData(k: Int,
observationColumns: Seq[String],
meanCentered: Boolean,
meanVector: Vector,
singularValues: Vector,
right_singular_vectors: Matrix,
rightSingularVectors: Matrix,
leftSingularMatrix: Option[RDD[Vector]]) {
require(observationColumns != null && observationColumns.nonEmpty, "observationColumns must not be null nor empty")
require(k >= 1, "number of Eigen values to use must be greater than equal to 1")
Expand All @@ -245,7 +245,7 @@ case class SvdData(k: Int,
meanCentered,
meanVector,
singularValues,
right_singular_vectors)
rightSingularVectors)
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class PcaModelTest extends TestingSparkContextWordSpec with Matchers {
Column("5", DataTypes.float32),
Column("6", DataTypes.float32)))

"PrincipalComponentsModel" should {
"PcaModel" should {

"create a DAAL PrincipalComponentsModel train and predict" in {
val rdd = sparkContext.parallelize(frameData)
Expand Down
17 changes: 0 additions & 17 deletions python/daaltk/models/dimensionality_reduction/__init__.py

This file was deleted.

3 changes: 2 additions & 1 deletion python/daaltk/models/dimreduction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#

2 changes: 1 addition & 1 deletion python/daaltk/models/dimreduction/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def right_singular_vectors(self):
"""
Right singular vectors of the specified columns in the input frame
"""
return [list(i) for i in list(self._scala.right_singular_vectors())]
return [list(i) for i in list(self._scala.rightSingularVectors())]

def predict(self, frame, mean_centered=True, t_squared_index=False, observation_columns=None, c=None):
"""
Expand Down