Normalisasi Pada Data Mining


Pada postingan kali ini saya akan berbagi mengenai normalisasi pada data mining. Proses penskalaan nilai atribut dari data sehingga bisa jatuh pada range tertentu. Hal ini berguna ketika data berada pada range berbeda dan sulit melihat apakah data tersebut memiliki kontribusi penting ketika proses learning selanjutnya.

berikut metode normalisasi data yang saya gunakan

1. MinMax
2.Zscore
3.DecimalScaling
4.Sigmoidal
5.SoftMax

data newtyroid yang di gunakan
newtyroid.txt



107 10.1 2.2 0.9 2.7 1

113 9.9 3.1 2.0 5.9 1

127 12.9 2.4 1.4 0.6 1

109 5.3 1.6 1.4 1.5 1

105 7.3 1.5 1.5 -0.1 1

105 6.1 2.1 1.4 7.0 1

110 10.4 1.6 1.6 2.7 1

114 9.9 2.4 1.5 5.7 1

106 9.4 2.2 1.5 0.0 1

107 13.0 1.1 0.9 3.1 1

106 4.2 1.2 1.6 1.4 1

110 11.3 2.3 0.9 3.3 1

116 9.2 2.7 1.0 4.2 1

112 8.1 1.9 3.7 2.0 1

122 9.7 1.6 0.9 2.2 1

109 8.4 2.1 1.1 3.6 1

111 8.4 1.5 0.8 1.2 1

114 6.7 1.5 1.0 3.5 1

119 10.6 2.1 1.3 1.1 1

115 7.1 1.3 1.3 2.0 1

101 7.8 1.2 1.0 1.7 1

103 10.1 1.3 0.7 0.1 1

109 10.4 1.9 0.4 -0.1 1

102 7.6 1.8 2.0 2.5 1

121 10.1 1.7 1.3 0.1 1

100 6.1 2.4 1.8 3.8 1

106 9.6 2.4 1.0 1.3 1

116 10.1 2.2 1.6 0.8 1

105 11.1 2.0 1.0 1.0 1

110 10.4 1.8 1.0 2.3 1

120 8.4 1.1 1.4 1.4 1

116 11.1 2.0 1.2 2.3 1

110 7.8 1.9 2.1 6.4 1

90 8.1 1.6 1.4 1.1 1

117 12.2 1.9 1.2 3.9 1

117 11.0 1.4 1.5 2.1 1

113 9.0 2.0 1.8 1.6 1

106 9.4 1.5 0.8 0.5 1

130 9.5 1.7 0.4 3.2 1

100 10.5 2.4 0.9 1.9 1

121 10.1 2.4 0.8 3.0 1

110 9.2 1.6 1.5 0.3 1

129 11.9 2.7 1.2 3.5 1

121 13.5 1.5 1.6 0.5 1

123 8.1 2.3 1.0 5.1 1

107 8.4 1.8 1.5 0.8 1

109 10.0 1.3 1.8 4.3 1

120 6.8 1.9 1.3 1.9 1

100 9.5 2.5 1.3 -0.2 1

118 8.1 1.9 1.5 13.7 1

100 11.3 2.5 0.7 -0.3 1

103 12.2 1.2 1.3 2.7 1

115 8.1 1.7 0.6 2.2 1

119 8.0 2.0 0.6 3.2 1

106 9.4 1.7 0.9 3.1 1

114 10.9 2.1 0.3 1.4 1

93 8.9 1.5 0.8 2.7 1

120 10.4 2.1 1.1 1.8 1

106 11.3 1.8 0.9 1.0 1

110 8.7 1.9 1.6 4.4 1

103 8.1 1.4 0.5 3.8 1

101 7.1 2.2 0.8 2.2 1

115 10.4 1.8 1.6 2.0 1

116 10.0 1.7 1.5 4.3 1

117 9.2 1.9 1.5 6.8 1

106 6.7 1.5 1.2 3.9 1

118 10.5 2.1 0.7 3.5 1

97 7.8 1.3 1.2 0.9 1

113 11.1 1.7 0.8 2.3 1

104 6.3 2.0 1.2 4.0 1

96 9.4 1.5 1.0 3.1 1

120 12.4 2.4 0.8 1.9 1

133 9.7 2.9 0.8 1.9 1

126 9.4 2.3 1.0 4.0 1

113 8.5 1.8 0.8 0.5 1

109 9.7 1.4 1.1 2.1 1

119 12.9 1.5 1.3 3.6 1

101 7.1 1.6 1.5 1.6 1

108 10.4 2.1 1.3 2.4 1

117 6.7 2.2 1.8 6.7 1

115 15.3 2.3 2.0 2.0 1

91 8.0 1.7 2.1 4.6 1

103 8.5 1.8 1.9 1.1 1

98 9.1 1.4 1.9 -0.3 1

111 7.8 2.0 1.8 4.1 1

107 13.0 1.5 2.8 1.7 1

119 11.4 2.3 2.2 1.6 1

122 11.8 2.7 1.7 2.3 1

105 8.1 2.0 1.9 -0.5 1

109 7.6 1.3 2.2 1.9 1

105 9.5 1.8 1.6 3.6 1

112 5.9 1.7 2.0 1.3 1

112 9.5 2.0 1.2 0.7 1

98 8.6 1.6 1.6 6.0 1

109 12.4 2.3 1.7 0.8 1

114 9.1 2.6 1.5 1.5 1

114 11.1 2.4 2.0 -0.3 1

110 8.4 1.4 1.0 1.9 1

120 7.1 1.2 1.5 4.3 1

108 10.9 1.2 1.9 1.0 1

108 8.7 1.2 2.2 2.5 1

116 11.9 1.8 1.9 1.5 1

113 11.5 1.5 1.9 2.9 1

105 7.0 1.5 2.7 4.3 1

114 8.4 1.6 1.6 -0.2 1

114 8.1 1.6 1.6 0.5 1

105 11.1 1.1 0.8 1.2 1

107 13.8 1.5 1.0 1.9 1

116 11.5 1.8 1.4 5.4 1

102 9.5 1.4 1.1 1.6 1

116 16.1 0.9 1.3 1.5 1

118 10.6 1.8 1.4 3.0 1

109 8.9 1.7 1.0 0.9 1

110 7.0 1.0 1.6 4.3 1

104 9.6 1.1 1.3 0.8 1

105 8.7 1.5 1.1 1.5 1

102 8.5 1.2 1.3 1.4 1

112 6.8 1.7 1.4 3.3 1

111 8.5 1.6 1.1 3.9 1

111 8.5 1.6 1.2 7.7 1

103 7.3 1.0 0.7 0.5 1

98 10.4 1.6 2.3 -0.7 1

117 7.8 2.0 1.0 3.9 1

111 9.1 1.7 1.2 4.1 1

101 6.3 1.5 0.9 2.9 1

106 8.9 0.7 1.0 2.3 1

102 8.4 1.5 0.8 2.4 1

115 10.6 0.8 2.1 4.6 1

130 10.0 1.6 0.9 4.6 1

101 6.7 1.3 1.0 5.7 1

110 6.3 1.0 0.8 1.0 1

103 9.5 2.9 1.4 -0.1 1

113 7.8 2.0 1.1 3.0 1

112 10.6 1.6 0.9 -0.1 1

118 6.5 1.2 1.2 1.7 1

109 9.2 1.8 1.1 4.4 1

116 7.8 1.4 1.1 3.7 1

127 7.7 1.8 1.9 6.4 1

108 6.5 1.0 0.9 1.5 1

108 7.1 1.3 1.6 2.2 1

105 5.7 1.0 0.9 0.9 1

98 5.7 0.4 1.3 2.8 1

112 6.5 1.2 1.2 2.0 1

118 12.2 1.5 1.0 2.3 1

94 7.5 1.2 1.3 4.4 1

126 10.4 1.7 1.2 3.5 1

114 7.5 1.1 1.6 4.4 1

111 11.9 2.3 0.9 3.8 1

104 6.1 1.8 0.5 0.8 1

102 6.6 1.2 1.4 1.3 1

139 16.4 3.8 1.1 -0.2 2

111 16.0 2.1 0.9 -0.1 2

113 17.2 1.8 1.0 0.0 2

65 25.3 5.8 1.3 0.2 2

88 24.1 5.5 0.8 0.1 2

65 18.2 10.0 1.3 0.1 2

134 16.4 4.8 0.6 0.1 2

110 20.3 3.7 0.6 0.2 2

67 23.3 7.4 1.8 -0.6 2

95 11.1 2.7 1.6 -0.3 2

89 14.3 4.1 0.5 0.2 2

89 23.8 5.4 0.5 0.1 2

88 12.9 2.7 0.1 0.2 2

105 17.4 1.6 0.3 0.4 2

89 20.1 7.3 1.1 -0.2 2

99 13.0 3.6 0.7 -0.1 2

80 23.0 10.0 0.9 -0.1 2

89 21.8 7.1 0.7 -0.1 2

99 13.0 3.1 0.5 -0.1 2

68 14.7 7.8 0.6 -0.2 2

97 14.2 3.6 1.5 0.3 2

84 21.5 2.7 1.1 -0.6 2

84 18.5 4.4 1.1 -0.3 2

98 16.7 4.3 1.7 0.2 2

94 20.5 1.8 1.4 -0.5 2

99 17.5 1.9 1.4 0.3 2

76 25.3 4.5 1.2 -0.1 2

110 15.2 1.9 0.7 -0.2 2

144 22.3 3.3 1.3 0.6 2

105 12.0 3.3 1.1 0.0 2

88 16.5 4.9 0.8 0.1 2

97 15.1 1.8 1.2 -0.2 2

106 13.4 3.0 1.1 0.0 2

79 19.0 5.5 0.9 0.3 2

92 11.1 2.0 0.7 -0.2 2

125 2.3 0.9 16.5 9.5 3

120 6.8 2.1 10.4 38.6 3

108 3.5 0.6 1.7 1.4 3

120 3.0 2.5 1.2 4.5 3

119 3.8 1.1 23.0 5.7 3

141 5.6 1.8 9.2 14.4 3

129 1.5 0.6 12.5 2.9 3

118 3.6 1.5 11.6 48.8 3

120 1.9 0.7 18.5 24.0 3

119 0.8 0.7 56.4 21.6 3

123 5.6 1.1 13.7 56.3 3

115 6.3 1.2 4.7 14.4 3

126 0.5 0.2 12.2 8.8 3

121 4.7 1.8 11.2 53.0 3

131 2.7 0.8 9.9 4.7 3

134 2.0 0.5 12.2 2.2 3

141 2.5 1.3 8.5 7.5 3

113 5.1 0.7 5.8 19.6 3

136 1.4 0.3 32.6 8.4 3

120 3.4 1.8 7.5 21.5 3

125 3.7 1.1 8.5 25.9 3

123 1.9 0.3 22.8 22.2 3

112 2.6 0.7 41.0 19.0 3

134 1.9 0.6 18.4 8.2 3

119 5.1 1.1 7.0 40.8 3

118 6.5 1.3 1.7 11.5 3

139 4.2 0.7 4.3 6.3 3

103 5.1 1.4 1.2 5.0 3

97 4.7 1.1 2.1 12.6 3

102 5.3 1.4 1.3 6.7 3

berikut adalah file javanya
datamining1.java

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package datamining1;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.Scanner;

/**
 *
 * @author faisalkwok
 */
public class DataMining1 {
     private static int jumkol;
     private static int counterdata;
    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        
        
        float data[][] = new float[2000][2000];
       
        String s = bacaFile("newthyroid.txt");
        
        String[] has =s.split("\n");
        for(int i=0;i<has.length;i++){
            String nilai[]=has[i].split("\t");
            jumkol=nilai.length;
            for(int j=0;j<nilai.length;j++){
                try{
                        data[i][j]=Float.valueOf(nilai[j]);
                //
                counterdata++;
                    //System.out.println(data[i][j]);
                }catch(NumberFormatException e){
                    System.out.println(e.getMessage());
                }
            }
            //System.out.println(counterdata/jumkol);
        }
        Scanner input = new Scanner( System.in );
        System.out.print( "1. Metode Normalisasi MinMax  " );
        System.out.print( "\n2. Metode Normalisasi Zscore  " ); 
        System.out.print( "\n3. Metode Normalisasi Decimal Scaling " ); 
        System.out.print( "\n4. Metode Normalisasi Sigmoidal " ); 
        System.out.print( "\n5. Metode Normalisasi Softmax " ); 
        System.out.print( "\nMasukan pilihan metode normalisasi : " );    
        int pil = input.nextInt();
        if(pil==1){
        //normalisasi menggunakan metode min-max
        minMax(data);
        }else if(pil==2){
        //normalisasi menggunakan metode Z-score
        zScore(data);
        } else if(pil==3){
        //normalisasi menggunakan metode Decimal Scaling        
          decimalScaling(data);
           }else if(pil==4){
        //normalisasi menggunakan metode sigmoidal
        sigmoidal(data);
         }else if(pil==5){
        //normalisasi menggunakan metode softmax
        softmax(data);
         }
         else{
             System.out.println("pilihan yang anda masukan salah");
         }
        
    }
     public static void tulisFile(String teks, String namaFile) {
        try {
            PrintWriter out = new PrintWriter(new BufferedWriter(
                    new FileWriter(namaFile, true)));
            out.println(teks.toString());
            out.close();
        } catch (IOException e) {
            System.out.println("Gagal menulis ke file " + namaFile);
        }
    }
    private static float max(float[][] data,int j) {
        float max = 0;
        int jumdat=counterdata/jumkol;
        for(int i=0;i<jumdat;i++){
            
            if(data[i][j]>max){
                max=data[i][j];
                
            }
        }
        //System.out.println(max);
        return max;
    }
    
    private static float min(float[][] data,int j){
     float min=1000;
            
            int jumdat=counterdata/jumkol;
            for(int i=0;i<jumdat;i++){
                if(min>data[i][j]){
                min=data[i][j];
                }
                
        }
         //System.out.println(min);   
        return min;
    }
    
    private static float mean(float[][] data,int j){
     float mean=0;
     int jumdat=counterdata/jumkol;
            for(int i=0;i<jumdat;i++){
                mean=mean+data[i][j];
                }
             mean=mean/jumdat;
             //System.out.println(mean);
    return mean;
   }
    //mencari standard deviasi
    private static float std(float[][] data,int j){
     float std;
     float s2,x=0,x2=0;
     int jumdat=counterdata/jumkol;
            for(int i=0;i<jumdat;i++){
                x=x+data[i][j];
                x2=(float) (x2+Math.pow(data[i][j], 2));
                //System.out.println(x+"\t"+x2);
                }
        //System.out.println(x);
        x=(float) Math.pow(x, 2);
        s2=((jumdat*x2)-x)/(jumdat*(jumdat-1));
        std=(float) Math.sqrt(s2);
    return std;
    }
    
    public static void minMax(float[][] data){
        float[][] newdata=new float[2000][2000];
        String s;
        int j,i,jumdat=counterdata/jumkol;
        int newmax=1;
        int newmin=0;
        for(j=0;j<jumkol;j++){
        float max=max(data,j);
        float min=min(data,j);
        for(i=0;i<jumdat;i++){
            
            newdata[i][j] = ((data[i][j]-min)*(newmax-newmin))/((max-min)+newmin);
            //newdata[i][j] = (int)(newdata[i][j]*100);
        }
      }
        DecimalFormat df = new DecimalFormat("#.##");
        for(j=0;j<jumdat;j++){
            s=""+df.format(newdata[j][0])+"\t\t"+df.format(newdata[j][1])+"\t\t"+df.format(newdata[j][2])+"\t\t"+df.format(newdata[j][3])+"\t\t"+df.format(newdata[j][4])+"\t"+data[j][jumkol-1];
             tulisFile(s,"minMax.txt");
               System.out.println(s);
        }
      }
        
      
    
    
    public static void zScore(float[][] data){
        float[][] newdata=new float[1500][1500];
        String s ;
        int j,i,jumdat=counterdata/jumkol;
        for(j=0;j<jumkol-1;j++){
            float mean=mean(data,j);
            float std=std(data,j);
            for(i=0;i<jumdat;i++){
             newdata[i][j] = (data[i][j]-mean)/std;
            //newdata[i][j] = (int)(newdata[i][j]*100);
        }
      }
    DecimalFormat df = new DecimalFormat("#.##");
        for(j=0;j<jumdat;j++){
            s=""+df.format(newdata[j][0])+"\t\t"+df.format(newdata[j][1])+"\t\t"+df.format(newdata[j][2])+"\t\t"+df.format(newdata[j][3])+"\t\t"+df.format(newdata[j][4])+"\t"+data[j][jumkol-1];
             tulisFile(s,"zscore.txt");
               System.out.println(s);
        }
    }
    
    public static void decimalScaling(float[][] data){
         float[][] newdata=new float[1500][1500];
        String s ;
        int j,i,jumdat=counterdata/jumkol;
         for(j=0;j<jumkol-1;j++){
            for(i=0;i<jumdat;i++){
         newdata[i][j] = (float) (data[i][j] /Math.pow(10, 2));
            }
         }
         
         DecimalFormat df = new DecimalFormat("#.###");
        for(j=0;j<jumdat;j++){
            s=""+df.format(newdata[j][0])+"\t\t"+df.format(newdata[j][1])+"\t\t"+df.format(newdata[j][2])+"\t\t"+df.format(newdata[j][3])+"\t\t"+df.format(newdata[j][4])+"\t"+data[j][jumkol-1];
             tulisFile(s,"Dscaling.txt");
               System.out.println(s);
        }
    }
    public static void sigmoidal(float[][] data){
        float[][] newdata=new float[1500][1500];
        String s ;
        double e;
        int j,i,jumdat=counterdata/jumkol;
        for(j=0;j<jumkol-1;j++){
            float mean=mean(data,j);
            float std=std(data,j);
            for(i=0;i<jumdat;i++){
             newdata[i][j] = (data[i][j]-mean)/std;
             e=Math.exp(2.718281828);
             newdata[i][j] = (float) ((1-Math.exp((-newdata[i][j])))/(1+ Math.exp(-newdata[i][j])));
             
        }
      }
    DecimalFormat df = new DecimalFormat("#.##");
        for(j=0;j<jumdat;j++){
            s=""+df.format(newdata[j][0])+"\t\t"+df.format(newdata[j][1])+"\t\t"+df.format(newdata[j][2])+"\t\t"+df.format(newdata[j][3])+"\t\t"+df.format(newdata[j][4])+"\t"+data[j][jumkol-1];
            tulisFile(s,"sigmoidal.txt");
               System.out.println(s);
        }
        
        
    }
    
    public static void softmax(float[][] data){
        float[][] newdata=new float[1500][1500];
        String s ;
        double transfdata;
        float x;
        int j,i,jumdat=counterdata/jumkol;
        for(j=0;j<jumkol-1;j++){
            float mean=mean(data,j);
            float std=std(data,j);
            for(i=0;i<jumdat;i++){
             //newdata[i][j] = (data[i][j]-mean)/std;
             //respon linier di deviasi standar
             x =10; 
             transfdata = (data[i][j]-mean)/(x*(std/(2*3.14)));
             newdata[i][j] = (float) (1/(1+Math.exp(-transfdata)));
        }
      }
    DecimalFormat df = new DecimalFormat("#.##");
        for(j=0;j<jumdat;j++){
            s=""+df.format(newdata[j][0])+"\t\t"+df.format(newdata[j][1])+"\t\t"+df.format(newdata[j][2])+"\t\t"+df.format(newdata[j][3])+"\t\t"+df.format(newdata[j][4])+"\t"+data[j][jumkol-1];
            tulisFile(s,"softmax.txt");
               System.out.println(s);
        }
        
        
    }
    
   
  
    // Method baca file
    public static String bacaFile(String namaFile) {
        BufferedReader br = null;
        String stringHasil = "";
 
        try {
            String sCurrentLine;
            br = new BufferedReader(new FileReader(namaFile));
            while ((sCurrentLine = br.readLine()) != null) {
                stringHasil = stringHasil + sCurrentLine + "\n";
            }
 
        } catch (IOException e) {
            System.out.println("Gagal membaca file " + namaFile);
        } finally {
            try {
                if (br != null)
                    br.close();
            } catch (IOException ex) {
                System.out.println(ex.getMessage());
            }
        }
        return stringHasil;
    }    
}

video untuk Demo Normalisasi Data Mining

3 komentar

This code is not writing in file..

Reply

this code writing in file .txt but not replacing existing data.
if you running 2 times then the function writing again in below not replacing


here the function

public static void tulisFile(String teks, String namaFile) {
try {
PrintWriter out = new PrintWriter(new BufferedWriter(
new FileWriter(namaFile, true)));
out.println(teks.toString());
out.close();
} catch (IOException e) {
System.out.println("Gagal menulis ke file " + namaFile);
}
}

Reply

Bang x dalam metode softmax cara menentukannya gimana ya ?

Reply

Posting Komentar