标准化是指以某种方式转换R数据帧的向量或列,以使其平均值变为0且标准差变为1,即应将其转换为标准正态分布。在R中,可以借助缩放功能轻松完成此操作。查看以下示例,了解其操作方法。
请看以下数据帧:
> set.seed(3665) > x1<-rnorm(20,1,0.35) > x2<-rnorm(20,50,1.25) > x3<-rnorm(20,125,10.27) > x4<-rpois(20,5) > x5<-runif(20,1,5) > x6<-rexp(20,1.35) > df<-data.frame(x1,x2,x3,x4,x5,x6) > df
输出结果
x1 x2 x3 x4 x5 x6 1 1.3958185 49.39843 128.5224 3 4.183664 2.33406246 2 1.0467979 48.90103 120.5796 7 3.526731 0.02043217 3 0.9190516 50.74664 110.4765 6 2.145181 0.04268455 4 1.1196425 47.83063 126.3711 9 4.276084 0.87234197 5 1.0033896 51.31879 144.2594 5 3.308073 0.28540083 6 0.7571435 49.92559 109.9660 5 2.349070 0.09613835 7 0.8266129 48.93754 135.5895 3 2.479160 0.15018153 8 1.2786206 50.27384 122.8543 4 4.343062 1.26431542 9 0.8661156 50.36976 122.9482 7 3.517678 0.24045191 10 0.9237285 48.55069 121.6440 4 1.619902 0.72327013 11 0.8191029 49.27937 111.8696 3 4.760655 0.97199973 12 1.2619135 50.91131 129.0021 4 3.355301 1.42184615 13 1.5297983 49.38604 133.4756 1 2.977833 0.50042231 14 0.7858227 47.92899 142.0669 3 3.262058 0.37260602 15 0.5626517 51.22160 107.5586 2 3.194546 0.21176125 16 1.2106700 51.65911 132.4945 3 1.088987 0.78318970 17 1.5351378 48.60769 116.1427 7 3.423079 0.65904040 18 0.4134951 52.09415 125.1567 5 4.309763 0.03658430 19 1.0490230 49.87242 125.6695 6 2.255468 1.32173240 20 0.9521718 50.96409 131.8025 1 4.312514 0.26955446
标准化数据框df的列:
> scale(df$x1)
输出结果
[,1] [1,] 1.29357316 [2,] 0.11471258 [3,] -0.31676647 [4,] 0.36075419 [5,] -0.03190435 [6,] -0.86363126 [7,] -0.62898965 [8,] 0.89772243 [9,] -0.49556454 [10,] -0.30096951 [11,] -0.65435564 [12,] 0.84129223 [13,] 1.74610668 [14,] -0.76676371 [15,] -1.52055135 [16,] 0.66821113 [17,] 1.76414130 [18,] -2.02434637 [19,] 0.12222816 [20,] -0.20489902 attr(,"scaled:center") [1] 1.012835 attr(,"scaled:scale") [1] 0.2960661
> scale(df$x2)
输出结果
[,1] [1,] -0.41047444 [2,] -0.81044801 [3,] 0.67365908 [4,] -1.67118308 [5,] 1.13374554 [6,] 0.01343048 [7,] -0.78108246 [8,] 0.29346832 [9,] 0.37059777 [10,] -1.09216479 [11,] -0.50620790 [12,] 0.80607839 [13,] -0.42043493 [14,] -1.59208513 [15,] 1.05558665 [16,] 1.40740638 [17,] -1.04632509 [18,] 1.75723305 [19,] -0.02932018 [20,] 0.84852032 attr(,"scaled:center") [1] 49.90889 attr(,"scaled:scale") [1] 1.243585
> scale(df$x3)
输出结果
[,1] [1,] 0.34992259 [2,] -0.42214208 [3,] -1.40419396 [4,] 0.14081369 [5,] 1.87961161 [6,] -1.45381494 [7,] 1.03687396 [8,] -0.20103605 [9,] -0.19190685 [10,] -0.31867565 [11,] -1.26878469 [12,] 0.39654942 [13,] 0.83139424 [14,] 1.66649197 [15,] -1.68782930 [16,] 0.73602324 [17,] -0.85342655 [18,] 0.02276361 [19,] 0.07260960 [20,] 0.66875614 attr(,"scaled:center") [1] 124.9225 attr(,"scaled:scale") [1] 10.28772
> scale(df$x4)
输出结果
[,1] [1,] -0.6550055 [2,] 1.2164389 [3,] 0.7485778 [4,] 2.1521611 [5,] 0.2807167 [6,] 0.2807167 [7,] -0.6550055 [8,] -0.1871444 [9,] 1.2164389 [10,] -0.1871444 [11,] -0.6550055 [12,] -0.1871444 [13,] -1.5907277 [14,] -0.6550055 [15,] -1.1228666 [16,] -0.6550055 [17,] 1.2164389 [18,] 0.2807167 [19,] 0.7485778 [20,] -1.5907277 attr(,"scaled:center") [1] 4.4 attr(,"scaled:scale") [1] 2.137387
> scale(df$x5)
输出结果
[,1] [1,] 0.95270619 [2,] 0.29336256 [3,] -1.09325500 [4,] 1.04546450 [5,] 0.07390313 [6,] -0.88861840 [7,] -0.75805159 [8,] 1.11268817 [9,] 0.28427648 [10,] -1.62046133 [11,] 1.53181336 [12,] 0.12130380 [13,] -0.25754850 [14,] 0.02771942 [15,] -0.04004115 [16,] -2.15332455 [17,] 0.18933035 [18,] 1.07926704 [19,] -0.98256346 [20,] 1.08202896 attr(,"scaled:center") [1] 3.23444 attr(,"scaled:scale") [1] 0.9963449
> scale(df$x6)
输出结果
[,1] [1,] 2.8372424 [2,] -1.0124395 [3,] -0.9754135 [4,] 0.4050652 [5,] -0.5715544 [6,] -0.8864709 [7,] -0.7965477 [8,] 1.0572753 [9,] -0.6463456 [10,] 0.1570224 [11,] 0.5708871 [12,] 1.3193929 [13,] -0.2137772 [14,] -0.4264525 [15,] -0.6940844 [16,] 0.2567235 [17,] 0.0501497 [18,] -0.9855638 [19,] 1.1528122 [20,] -0.5979214 attr(,"scaled:center") [1] 0.6289008 attr(,"scaled:scale") [1] 0.6009926