| Derived VariablesDerived variables (also called new features), as the name suggests, are input variables 
                            derived from the original variables. They can range from simple transformations such as taking the 
                            log of input variables to complex functions involving all input variables. In GeneXproTools 
                            derived variables are created and 
                            managed in the Static UDFs Tab in the Functions Panel. In 
							Karva notation they are 
                            represented as UDF0, UDF1,…, UDFn. 
 Derived variables are programmed in the UDF Editor and must be coded in 
							JavaScript. 
							GeneXproTools allows you to import the code of derived variables from other gep 
							files, 
							giving you easy access to your favorite UDFs.
 
 The output of all derived variables created in GeneXproTools can be analyzed and 
							visualized in the Data Panel. You can analyze their scatter plots against 
							any other variable, plot their histograms, analyze different line charts, compute 
							and visualize summary statistics, and several other analyses.
 
 In modeling terms, derived variables are handled by the learning algorithms of GeneXproTools 
							in exactly the same way as the original variables, that is, during model evolution 
							GeneXproTools uses both variable types interchangeably, combining them in different ways 
							and integrating them in different model structures or modules. The 
							JavaScript code below 
							is a regression model that combines 3 derived variables and 7 of the original variables.
 
 
//------------------------------------------------------------------
// Regression model generated by GeneXproTools 5.0 on 5/20/2013 10:00:10 PM
// GEP File: D:\GeneXproTools\Version5.0\OnlineGuide\ConcreteStrength-UDFs_01a.gep
// Training Records:  687
// Validation Records:   343
// Fitness Function:  Positive Correl
// Training Fitness:  914.425772796843
// Training R-square: 0.836174493955104
// Validation Fitness:   919.274374939755
// Validation R-square:  0.845065376420878
//------------------------------------------------------------------
var terminals = new Array();
function gepModel(d)
{
    var G1C2 = -0.821369315164647;
    var G1C0 = -2.88029035889767;
    var G3C2 = 3.40349775688955;
    var G4C1 = 2.88776641366202;
    var G5C5 = 11.260200627471;
    var G5C3 = 5.7177285881222;
    var G5C9 = -10.2894261546678;
    var G5C2 = 4.06598101748711;
    var G6C3 = 6.55475921536912;
    var G6C0 = 13.2330739691954;
    terminals = d;
    var vTemp = 0.0;
    vTemp = ((UDF1()/Math.pow(Math.atan(Math.min(((G1C0+d[7])/2.0),G1C2)),2))-d[6]);
    vTemp += (1-UDF3());
    vTemp += ((gep3Rt(d[7])+Math.min(((d[2]+d[3])/2.0),(G3C2+d[7])))+UDF2());
    vTemp += (UDF1()-gep3Rt(Math.pow(((Math.max(G4C1,d[1])-d[7])+(G4C1-d[7])),2)));
    vTemp += Math.min((((((d[6]+G5C2)+d[0])/2.0)+(1-d[7]))/2.0),((G5C5*d[7])-(G5C3*G5C9)));
    vTemp += (Math.min((((d[7]+d[7])/2.0)+G6C3),(G6C0*d[4]))-(((d[7]+d[2])/2.0)+(d[3]+d[3])));
    return vTemp;
}
function gep3Rt(x)
{
    return x < 0.0 ? -Math.pow(-x,(1.0/3.0)) : Math.pow(x,(1.0/3.0));
}
function UDF1()
{    
    // Average Model    
    var nVariables = terminals.length;
    var sum = 0.0;
    var averageModel = 0.0;
    for (var nV=0; nV < nVariables; nV++)
    {
        sum += terminals[nV];
    } 
    averageModel = sum / nVariables;
    
    return averageModel;
}
function UDF2()
{    
    // Sum    
    var nVariables = terminals.length;
    var sum = 0.0;
    for (var nV=0; nV < nVariables; nV++)
    {
        sum += terminals[nV];
    } 
    
    return sum;
}
function UDF3()
{    
    // Max Model
    var nVariables = terminals.length;
    var maxModel = terminals[0];
    for (var nV=1; nV < nVariables; nV++)
    {
        if (terminals[nV] > maxModel)
            maxModel = terminals[nV];
    } 
    
    return maxModel;
}
GeneXproTools evaluates the variable importance of all the variables (original and derived) 
							in a model. The variable importance is also shown in the 
							Statistics Charts in the Data Panel.
 
 
 See Also:
 
 
 Related Tutorials:
 
 
 Related Videos:
 
 
 
 |