我们不能确定要获得分析的数据特征,而且大多数情况下数据组织得不好,因此,首要任务是使其更有条理。如果不是特别以标题区分大小写,则还应注意不在标题区分大小写的字符串值。为此,我们可以使用str_to_title函数来打包字符串。
> x1<-sample(c("india","indonesia","russia","canada","united kingdom"),100,replace=TRUE) > x1输出结果
[1] "india" "united kingdom" "indonesia" "canada" [5] "canada" "india" "united kingdom" "canada" [9] "indonesia" "united kingdom" "indonesia" "canada" [13] "russia" "indonesia" "canada" "russia" [17] "united kingdom" "russia" "russia" "india" [21] "united kingdom" "india" "india" "united kingdom" [25] "united kingdom" "india" "united kingdom" "canada" [29] "united kingdom" "indonesia" "united kingdom" "canada" [33] "canada" "russia" "united kingdom" "united kingdom" [37] "indonesia" "united kingdom" "united kingdom" "indonesia" [41] "indonesia" "canada" "india" "united kingdom" [45] "india" "india" "india" "indonesia" [49] "united kingdom" "canada" "indonesia" "india" [53] "canada" "canada" "india" "india" [57] "indonesia" "canada" "canada" "indonesia" [61] "united kingdom" "indonesia" "india" "india" [65] "canada" "india" "canada" "canada" [69] "india" "india" "united kingdom" "russia" [73] "united kingdom" "canada" "canada" "indonesia" [77] "indonesia" "india" "india" "india" [81] "india" "india" "india" "india" [85] "united kingdom" "canada" "indonesia" "india" [89] "indonesia" "canada" "indonesia" "russia" [93] "india" "india" "canada" "indonesia" [97] "united kingdom" "indonesia" "united kingdom" "russia"
加载stringer包并将x1中的值转换为标题大小写-
> library(stringr)> str_to_title(x1)输出结果
[1] "India" "United Kingdom" "Indonesia" "Canada" [5] "Canada" "India" "United Kingdom" "Canada" [9] "Indonesia" "United Kingdom" "Indonesia" "Canada" [13] "Russia" "Indonesia" "Canada" "Russia" [17] "United Kingdom" "Russia" "Russia" "India" [21] "United Kingdom" "India" "India" "United Kingdom" [25] "United Kingdom" "India" "United Kingdom" "Canada" [29] "United Kingdom" "Indonesia" "United Kingdom" "Canada" [33] "Canada" "Russia" "United Kingdom" "United Kingdom" [37] "Indonesia" "United Kingdom" "United Kingdom" "Indonesia" [41] "Indonesia" "Canada" "India" "United Kingdom" [45] "India" "India" "India" "Indonesia" [49] "United Kingdom" "Canada" "Indonesia" "India" [53] "Canada" "Canada" "India" "India" [57] "Indonesia" "Canada" "Canada" "Indonesia" [61] "United Kingdom" "Indonesia" "India" "India" [65] "Canada" "India" "Canada" "Canada" [69] "India" "India" "United Kingdom" "Russia" [73] "United Kingdom" "Canada" "Canada" "Indonesia" [77] "Indonesia" "India" "India" "India" [81] "India" "India" "India" "India" [85] "United Kingdom" "Canada" "Indonesia" "India" [89] "Indonesia" "Canada" "Indonesia" "Russia" [93] "India" "India" "Canada" "Indonesia" [97] "United Kingdom" "Indonesia" "United Kingdom" "Russia"
> x2<-sample(c("hot","cold","mild cold","mild hot"),100,replace=TRUE) > x2输出结果
[1] "mild cold" "hot" "cold" "mild hot" "mild cold" "mild cold" [7] "mild hot" "hot" "mild hot" "hot" "mild hot" "mild hot" [13] "cold" "mild hot" "mild hot" "mild cold" "cold" "cold" [19] "mild hot" "mild cold" "hot" "cold" "mild cold" "cold" [25] "cold" "cold" "mild hot" "mild cold" "hot" "hot" [31] "cold" "cold" "hot" "cold" "hot" "mild cold" [37] "cold" "hot" "mild cold" "mild hot" "hot" "hot" [43] "cold" "mild hot" "mild hot" "mild cold" "cold" "mild hot" [49] "mild hot" "mild hot" "mild hot" "mild cold" "mild cold" "hot" [55] "cold" "cold" "hot" "cold" "hot" "mild cold" [61] "cold" "mild hot" "cold" "hot" "mild hot" "hot" [67] "cold" "mild hot" "mild hot" "cold" "hot" "mild hot" [73] "mild hot" "mild cold" "mild cold" "mild cold" "mild hot" "mild hot" [79] "hot" "cold" "mild hot" "cold" "mild hot" "hot" [85] "cold" "mild cold" "hot" "hot" "hot" "hot" [91] "mild cold" "cold" "cold" "mild hot" "hot" "hot" [97] "mild cold" "mild hot" "hot" "cold"
> str_to_title(x2)输出结果
[1] "Mild Cold" "Hot" "Cold" "Mild Hot" "Mild Cold" "Mild Cold" [7] "Mild Hot" "Hot" "Mild Hot" "Hot" "Mild Hot" "Mild Hot" [13] "Cold" "Mild Hot" "Mild Hot" "Mild Cold" "Cold" "Cold" [19] "Mild Hot" "Mild Cold" "Hot" "Cold" "Mild Cold" "Cold" [25] "Cold" "Cold" "Mild Hot" "Mild Cold" "Hot" "Hot" [31] "Cold" "Cold" "Hot" "Cold" "Hot" "Mild Cold" [37] "Cold" "Hot" "Mild Cold" "Mild Hot" "Hot" "Hot" [43] "Cold" "Mild Hot" "Mild Hot" "Mild Cold" "Cold" "Mild Hot" [49] "Mild Hot" "Mild Hot" "Mild Hot" "Mild Cold" "Mild Cold" "Hot" [55] "Cold" "Cold" "Hot" "Cold" "Hot" "Mild Cold" [61] "Cold" "Mild Hot" "Cold" "Hot" "Mild Hot" "Hot" [67] "Cold" "Mild Hot" "Mild Hot" "Cold" "Hot" "Mild Hot" [73] "Mild Hot" "Mild Cold" "Mild Cold" "Mild Cold" "Mild Hot" "Mild Hot" [79] "Hot" "Cold" "Mild Hot" "Cold" "Mild Hot" "Hot" [85] "Cold" "Mild Cold" "Hot" "Hot" "Hot" "Hot" [91] "Mild Cold" "Cold" "Cold" "Mild Hot" "Hot" "Hot" [97] "Mild Cold" "Mild Hot" "Hot" "Cold"
> x3<-sample(c("nhooo is an e-learning portal","they have courses","pdf documents","different tutorials"),30,replace=TRUE) > x3输出结果
[1] "they have courses" [2] "they have courses" [3] "different tutorials" [4] "pdf documents" [5] "different tutorials" [6] "nhooo is an e-learning portal" [7] "they have courses" [8] "they have courses" [9] "pdf documents" [10] "they have courses" [11] "different tutorials" [12] "pdf documents" [13] "different tutorials" [14] "pdf documents" [15] "they have courses" [16] "nhooo is an e-learning portal" [17] "they have courses" [18] "pdf documents" [19] "pdf documents" [20] "different tutorials" [21] "they have courses" [22] "pdf documents" [23] "they have courses" [24] "nhooo is an e-learning portal" [25] "they have courses" [26] "pdf documents" [27] "they have courses" [28] "pdf documents" [29] "they have courses" [30] "different tutorials"
> str_to_title(x3)输出结果
[1] "They Have Courses" [2] "They Have Courses" [3] "Different Tutorials" [4] "Pdf Documents" [5] "Different Tutorials" [6] "Nhooo Is An E-Learning Portal" [7] "They Have Courses" [8] "They Have Courses" [9] "Pdf Documents" [10] "They Have Courses" [11] "Different Tutorials" [12] "Pdf Documents" [13] "Different Tutorials" [14] "Pdf Documents" [15] "They Have Courses" [16] "Nhooo Is An E-Learning Portal" [17] "They Have Courses" [18] "Pdf Documents" [19] "Pdf Documents" [20] "Different Tutorials" [21] "They Have Courses" [22] "Pdf Documents" [23] "They Have Courses" [24] "Nhooo Is An E-Learning Portal" [25] "They Have Courses" [26] "Pdf Documents" [27] "They Have Courses" [28] "Pdf Documents" [29] "They Have Courses" [30] "Different Tutorials"