Friday, January 26, 2018

Azure Machine Learning Webinars

As some of you may know, we've been giving Azure Machine Learning presentations for about a year now.  As promised, we wanted to include links to the videos, as well as any supplemental material for the presentations.

Azure Machine Learning Studio: Making Data Science Easy(er)

https://www.youtube.com/watch?v=QMj_dL64xCA

There are no supplemental materials for this presentation.

Azure Machine Learning Studio: Four Tips from the Pros

https://www.youtube.com/watch?v=d25wmQ_dSQg
https://drive.google.com/open?id=12xodphzcK1Oy7TBDDSzHPXe8GIiBIgbr

R Code for Creating Interaction Features

<R CODE START>

#####################
## Import Data
#####################

ignore <- c("income")

dat1 <- maml.mapInputPort(1)
dat.full <- dat1[,-which(names(dat1) %in% ignore)]

dat2 <- maml.mapInputPort(2)

vars.dummy <- names(dat.full)
vars.orig <- names(dat2[,-which(names(dat2) %in% ignore)])

temp <- dat.full[,1]
dat.int <- data.frame(temp)

################################################
## Loop through all possible combinations
################################################

for(i in 1:(length(vars.dummy) - 1)){
    for(j in 2:length(vars.dummy)){

        var1 <- vars.dummy[i]
        var2 <- vars.dummy[j]
        
        base1 <- substr(var1, 1, regexpr("-", var1) - 1)
        base2 <- substr(var2, 1, regexpr("-", var2) - 1)
        
        if( base1 != base2 ){
            val1 <- dat.full[,which(names(dat.full) %in% var1)]
            val2 <- dat.full[,which(names(dat.full) %in% var2)]
            dat.int[,length(dat.int) + 1] <- val1 * val2
            names(dat.int)[length(dat.int)] <- paste(var1, " * ", var2)
        }
    }
}

###################
## Output Data
###################

dat.out <- data.frame(dat1, dat.int[,-1])
maml.mapOutputPort("dat.out");

<R CODE END>

SQL Code for Combining Tune Model Hyperparameters Results

<SQL CODE 1 START>

SELECT
    'Two-Class Locally Deep Support Vector Machine - Binning' AS [Model Type]
,'LD-SVM Tree Depth' AS [Par 1 Name]
,[LD-SVM Tree Depth] AS [Par 1 Value]
,'Lambda W' AS [Par 2 Name]
,[Lambda W] AS [Par 2 Value]
,'Lambda Theta' AS [Par 3 Name]
,[Lambda Theta] AS [Par 3 Value]
,'Lambda Theta Prime' AS [Par 4 Name]
,[Lambda Theta Prime] AS [Par 4 Value]
,'Sigma' AS [Par 5 Name]
,[Sigma] AS [Par 5 Value]
,'Num Iterations' AS [Par 6 Name]
,[Num Iterations] AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t1
UNION ALL
SELECT
'Two-Class Neural Network - Binning' AS [Model Type]
,'Learning rate' AS [Par 1 Name]
,[Learning rate] AS [Par 1 Value]
    ,'None' AS [Par 2 Name]
    ,0 AS [Par 2 Value]
,'Number of iterations' AS [Par 3 Name]
,[Number of iterations] AS [Par 3 Value]
,'None' AS [Par 4 Name]
,0 AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
,'LossFunction' AS [Par 7 Name]
,[LossFunction] AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t2
UNION ALL
SELECT
'Two-Class Decision Jungle - Replicate' AS [Model Type]
,'Number of optimization steps per decision DAG layer' AS [Par 1 Name]
,[Number of optimization steps per decision DAG layer] AS [Par 1 Value]
,'Maximum width of the decision DAGs' AS [Par 2 Name]
,[Maximum width of the decision DAGs] AS [Par 2 Value]
,'Maximum depth of the decision DAGs' AS [Par 3 Name]
,[Maximum depth of the decision DAGs] AS [Par 3 Value]
,'Number of decision DAGs' AS [Par 4 Name]
,[Number of decision DAGs] AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]

FROM t3

<SQL CODE 1 END>


<SQL CODE 2 START>

SELECT
'Two-Class Locally Deep Support Vector Machine - Gaussian' AS [Model Type]
,'LD-SVM Tree Depth' AS [Par 1 Name]
,[LD-SVM Tree Depth] AS [Par 1 Value]
,'Lambda W' AS [Par 2 Name]
,[Lambda W] AS [Par 2 Value]
,'Lambda Theta' AS [Par 3 Name]
,[Lambda Theta] AS [Par 3 Value]
,'Lambda Theta Prime' AS [Par 4 Name]
,[Lambda Theta Prime] AS [Par 4 Value]
,'Sigma' AS [Par 5 Name]
,[Sigma] AS [Par 5 Value]
,'Num Iterations' AS [Par 6 Name]
,[Num Iterations] AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t1
UNION ALL
SELECT
'Two-Class Neural Network - Gaussian' AS [Model Type]
,'Learning rate' AS [Par 1 Name]
,[Learning rate] AS [Par 1 Value]
    ,'None' AS [Par 2 Name]
    ,0 AS [Par 2 Value]
,'Number of iterations' AS [Par 3 Name]
,[Number of iterations] AS [Par 3 Value]
,'None' AS [Par 4 Name]
,0 AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
,'LossFunction' AS [Par 7 Name]
,[LossFunction] AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t2
UNION ALL
SELECT
'Two-Class Decision Jungle - Bagging' AS [Model Type]
,'Number of optimization steps per decision DAG layer' AS [Par 1 Name]
,[Number of optimization steps per decision DAG layer] AS [Par 1 Value]
,'Maximum width of the decision DAGs' AS [Par 2 Name]
,[Maximum width of the decision DAGs] AS [Par 2 Value]
,'Maximum depth of the decision DAGs' AS [Par 3 Name]
,[Maximum depth of the decision DAGs] AS [Par 3 Value]
,'Number of decision DAGs' AS [Par 4 Name]
,[Number of decision DAGs] AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]

FROM t3

<SQL CODE 2 END>


<SQL CODE 3 START>

SELECT
'Two-Class Locally Deep Support Vector Machine - Min-Max' AS [Model Type]
,'LD-SVM Tree Depth' AS [Par 1 Name]
,[LD-SVM Tree Depth] AS [Par 1 Value]
,'Lambda W' AS [Par 2 Name]
,[Lambda W] AS [Par 2 Value]
,'Lambda Theta' AS [Par 3 Name]
,[Lambda Theta] AS [Par 3 Value]
,'Lambda Theta Prime' AS [Par 4 Name]
,[Lambda Theta Prime] AS [Par 4 Value]
,'Sigma' AS [Par 5 Name]
,[Sigma] AS [Par 5 Value]
,'Num Iterations' AS [Par 6 Name]
,[Num Iterations] AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t1
UNION ALL
SELECT
'Two-Class Neural Network - Min-Max' AS [Model Type]
,'Learning rate' AS [Par 1 Name]
,[Learning rate] AS [Par 1 Value]
,'None' AS [Par 2 Name]
,0 AS [Par 2 Value]
,'Number of iterations' AS [Par 3 Name]
,[Number of iterations] AS [Par 3 Value]
,'None' AS [Par 4 Name]
,0 AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
,'LossFunction' AS [Par 7 Name]
,[LossFunction] AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t2
UNION ALL
SELECT
'Two-Class Boosted Decision Tree' AS [Model Type]
,'Number of leaves' AS [Par 1 Name]
,[Number of leaves] AS [Par 1 Value]
,'Minimum leaf instances' AS [Par 2 Name]
,[Minimum leaf instances] AS [Par 2 Value]
,'Learning rate' AS [Par 3 Name]
,[Learning rate] AS [Par 3 Value]
,'Number of trees' AS [Par 4 Name]
,[Number of trees] AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]

FROM t3

<SQL CODE 3 END>


<SQL CODE 4 START>

SELECT
'Two-Class Decision Forest - Replicate' AS [Model Type]
,'Minimum number of samples per leaf node' AS [Par 1 Name]
,[Minimum number of samples per leaf node] AS [Par 1 Value]
,'Number of random splits per node' AS [Par 2 Name]
,[Number of random splits per node] AS [Par 2 Value]
,'Maximum depth of the decision trees' AS [Par 3 Name]
,[Maximum depth of the decision trees] AS [Par 3 Value]
,'Number of decision trees' AS [Par 4 Name]
,[Number of decision trees] AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t1
UNION ALL
SELECT
'Two-Class Averaged Perceptron' AS [Model Type]
,'Learning rate' AS [Par 1 Name]
,[Learning rate] AS [Par 1 Value]
,'Maximum number of iterations' AS [Par 2 Name]
,[Maximum number of iterations] AS [Par 2 Value]
,'None' AS [Par 3 Name]
,0 AS [Par 3 Value]
,'None' AS [Par 4 Name]
,0 AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t2
UNION ALL
SELECT
'Two-Class Support Vector Machine' AS [Model Type]
,'Number of iterations' AS [Par 1 Name]
,[Number of iterations] AS [Par 1 Value]
,'Lambda' AS [Par 2 Name]
,[Lambda] AS [Par 2 Value]
,'None' AS [Par 3 Name]
,0 AS [Par 3 Value]
,'None' AS [Par 4 Name]
,0 AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]

FROM t3

<SQL CODE 4 END>


<SQL CODE 5 START>

SELECT
'Two-Class Decision Forest - Bagging' AS [Model Type]
,'Minimum number of samples per leaf node' AS [Par 1 Name]
,[Minimum number of samples per leaf node] AS [Par 1 Value]
,'Number of random splits per node' AS [Par 2 Name]
,[Number of random splits per node] AS [Par 2 Value]
,'Maximum depth of the decision trees' AS [Par 3 Name]
,[Maximum depth of the decision trees] AS [Par 3 Value]
,'Number of decision trees' AS [Par 4 Name]
,[Number of decision trees] AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]
FROM t1
UNION ALL
SELECT
'Two-Class Logistic Regression' AS [Model Type]
,'OptimizationTolerance' AS [Par 1 Name]
,[OptimizationTolerance] AS [Par 1 Value]
,'L1Weight' AS [Par 2 Name]
,[L1Weight] AS [Par 2 Value]
,'L2Weight' AS [Par 3 Name]
,[L2Weight] AS [Par 3 Value]
,'MemorySize' AS [Par 4 Name]
,[MemorySize] AS [Par 4 Value]
,'None' AS [Par 5 Name]
,0 AS [Par 5 Value]
,'None' AS [Par 6 Name]
,0 AS [Par 6 Value]
    ,'None' AS [Par 7 Name]
    ,'' AS [Par 7 Value]
,[Accuracy]
,[Precision]
,[Recall]
,[F-Score]
,[AUC]
,[Average Log Loss]
,[Training Log Loss]
,[Precision] * [Recall] AS [Precision * Recall]

FROM t2

<SQL CODE 5 END>


<SQL CODE 6 START>

SELECT * FROM t1
UNION ALL
SELECT * FROM t2
UNION ALL

SELECT * FROM t3

<SQL CODE 6 END>


<SQL CODE 7 START>

SELECT * FROM t1
UNION ALL

SELECT * FROM t2

<SQL CODE 7 END>


<SQL CODE 8 START>

SELECT * FROM t1
UNION ALL

SELECT * FROM t2

<SQL CODE 8 END>

<SQL CODE 9 START>

SELECT * FROM t1
ORDER BY [AUC] DESC

<SQL CODE 9 END>

Brad Llewellyn
Data Science Consultant
Valorem
@BreakingBI
www.linkedin.com/in/bradllewellyn
llewellyn.wb@gmail.com

No comments:

Post a Comment