R - Extract subset of Object


  • Get subset from Vector
    • [] return an object of the same classes as extracted object
    •  > x <- 1:10  
       > x[x>5]  # extract subset which includes 
       [1] 6 7 8 9 10  
       > x[2]  
       [1] 2  
       > x[3]  
       [1] 3  
      
    • Get 1,3,5th elements from x
    •  > x[c(1,3,5)]  
       [1] 1 3 5  
      
    • What if we want to get all elements except 1,3,5th elements?
    •  > x[c(-1,-3,-5)]  # or simply use x[-c(1,3,5)]
       [1] 2 4 6 7 8 9 10  
      
  • Get subset from Frame
    • Examples
    •  > x  
        var1 var2 var3  
       1  3  10  11  
       2  2  6  15  
       3  4  8  12  
       4  5  7  14  
       5  1  9  13  
       > x[,1] # get first column data  
       [1] 3 2 4 5 1  
       > x[,"var3"] # get var3 column data  
       [1] 11 15 12 14 13  
       > x[2:3,"var3"] # get 2,3rd data in column var3  
       [1] 15 12  
       > x[(x$var2 >8 & x$var3 <15),] # get all column data, with 2nd data in column var2 >8 and 3rd in column var3 <15  
        var1 var2 var3  
       1  3  10  11  
       5  1  9  13  
      
    • Example for dealing with NA values
    •  > x$var1[c(1,4)] = NA  
       > x  
        var1 var2 var3  
       3  NA  8  12  
       5  1  9  13  
       2  2  6  15  
       4  NA  7  14  
       1  3  10  11  
       > x[x$var1>1,]  # lines with NA will show up as well
          var1 var2 var3  
       NA   NA  NA  NA  
       2    2  6  15  
       NA.1  NA  NA  NA  
       1    3  10  11  
       > x[which(x$var1>1),]  # use which to ignore NA line
        var1 var2 var3  
       2  2  6  15  
       1  3  10  11 
    • Example of sorting
    •  > sort(x$var1)  
       [1] 1 2 3  
       > sort(x$var1, decreasing=TRUE)  
       [1] 3 2 1  
       > sort(x$var1, na.last = TRUE)  
       [1] 1 2 3 NA NA  
      
    • Example of reordering frame
    •  > x[order(x$var2),]  
        var1 var2 var3  
       2  2  6  15  
       4  NA  7  14  
       3  NA  8  12  
       5  1  9  13  
       1  3  10  11  
       > library(plyr)  # use plyr package
       > arrange(x,var3)  
        var1 var2 var3  
       1  3  10  11  
       2  NA  8  12  
       3  1  9  13  
       4  NA  7  14  
       5  2  6  15  
       > arrange(x,desc(var3)  
       + )  
        var1 var2 var3  
       1  2  6  15  
       2  NA  7  14  
       3  1  9  13  
       4  NA  8  12  
       5  3  10  11  
      
    • Example of adding column
    •  > x$var4 <- rnorm(5)  
       > x  
        var1 var2 var3    var4  
       3  NA  8  12 0.01046482  
       5  1  9  13 0.06659688  
       2  2  6  15 0.91059308  
       4  NA  7  14 1.26587778  
       1  3  10  11 -1.46815620  
       > y <- cbind(x, rnorm(5))  
       > y  
        var1 var2 var3    var4  rnorm(5)  
       3  NA  8  12 0.01046482 0.4359661  
       5  1  9  13 0.06659688 -0.7918177  
       2  2  6  15 0.91059308 -0.0485241  
       4  NA  7  14 1.26587778 -0.5299538  
       1  3  10  11 -1.46815620 0.1181559  
      
  • Get subset from Matrix
    • Example for getting one element from matrix and one row from it
    •  > x <- matrix(1:6,2,3)  
       > x  
          [,1] [,2] [,3]  
       [1,]  1  3  5  
       [2,]  2  4  6  
       > x[1,2]  # get element in first row, second column
       [1] 3  
       > x[1,]  # get first row
       [1] 1 3 5  
      
    • It will get vector by default, you could also get matrix by using drop argument
    •  > x[1,2,drop = FALSE]  
          [,1]  
       [1,]  3  
      
  • Get subset from List
    • Example for getting subset from List
    •  > x <- list(male=c(1:4),female=c(5:10))  # create a list
       > x  # print
       $male  
       [1] 1 2 3 4  
       $female  
       [1] 5 6 7 8 9 10  
       > x[1]  # single bracket return list
       $male  
       [1] 1 2 3 4  
       > x[[1]]  # double bracket return vector
       [1] 1 2 3 4  
       > x$male  # use name to return vector
       [1] 1 2 3 4  
      
    • Use variable indice to get subset
    •  > var <- "male"  
       > x[[var]]  
       [1] 1 2 3 4  
      
  • Deal with NA values
    • Get complete cases over two vector
    •  > x <- c("male","female",NA,"male")  
       > y <- c("female",NA,NA,"female")  
       > completeflag <- complete.cases(x,y)  # save complete flag TRUE,FALSE,FALSE,TRUE
       > x[completeflag]  
       [1] "male" "male"  
      
    • Remove NA from vector
    •  > x <- c(1:10,NA)  
       > x  
        [1] 1 2 3 4 5 6 7 8 9 10 NA  
       > naflag <- is.na(x)  
       > x[naflag]  
       [1] NA  
       > x[!naflag]  
        [1] 1 2 3 4 5 6 7 8 9 10  
      
  • Sample data from vector(s)
    • Use sample() function to get number of samples from target
    •  > x <- c(1:10)  
       > y <- c("a","b","c","d")  
       > sample(c(x,y),4)  
       [1] "3" "10" "2" "c"   
      
  • How to count the number of NA values?
    • R treats TRUE as 1 and FALSE as 0, so we could check sum of the vector to see the count.
    •  > x <- rep(NA, 10)  
       > y <- c(1:10)  
       > z <- sample(c(x,y),5)  
       > z  
       [1] 9 NA 8 NA NA  
       > naflag <- is.na(z)  
       > naflag  
       [1] FALSE TRUE FALSE TRUE TRUE  
       > sum(naflag)  
       [1] 3