Translate

Tuesday, 29 November 2016

data.table

data.table package allows R to handle very large data sets, typically 10's or 100's of millions of rows, efficiently. This includes loading/importing the data and aggregating the data.
 
To import a flat file with very large number of rows, data.table provides fread function.
 
library(data.table)
Data<- fread("data.csv", sep = ",", header = TRUE)

  
To aggregate the data set: 
  
Agg <- as.data.table(iris)[, list(Avg_Sepal_Length = mean(Sepal.Length)), by = "Species"]
 
When aggregating multiple columns at the same time:
 
AggMC <- as.data.table(iris)[, list(Avg_Sepal_Length = mean(Sepal.Length), Avg_Petal_Length = mean(Petal.Length)), by = "Species"]
 
When aggregating all columns other than the grouping column:
 
AggAC <- as.data.table(iris)[, lapply(.SD, mean), by = "Species"]
 
   
When aggregating by multiple grouping columns:

AggMCMG <- as.data.table(CO2)[, list(Avg_Conc = mean(conc), Total_Uptake = sum(uptake)), by = c("Plant", "Type")]





Tuesday, 25 October 2016

Passing parameters to R script from command line


To pass parameters to the R script when running the script from the command line, commandArgs( ) can be used.

Example: 

Save the below script in a file called 'DateRange.r'

Para <- commandArgs() 
DATE <- as.Date(as.character(Para[6]), format = "%Y%m%d")
N <- as.numeric(Para[7])
DateRge <- data.frame(Date = seq(from = DATE, length.out = N, by = 1), Value = rnorm(N))

Then, run the below command with the parameters inserted at the end


For Windows

If the below path is saved in your environment variable, you can simply use 'Rscript' without writing out the full path.

"C:\Program Files\R\R-3.2.3\bin\Rscript.exe" DateRange.r [date in yyyymmdd format (DATE)] [length of sequence (N)] 

"C:\Program Files\R\R-3.2.3\bin\Rscript.exe" DateRange.r 20161005 5
will return:
       Date       Value
 2016-10-05  1.61637011
 2016-10-06 -0.08534756
 2016-10-07 -2.24108808
 2016-10-08  0.05773242
 2016-10-09  0.73725642 


For Linux
 
Similar to Windows, you can use Rscript command

Rscript DateRange.r yyyymmdd N 

Rscript DateRange.r 20161005 5 
        Date      Value
 2016-10-05 -0.7931385
 2016-10-06 -0.4229764
 2016-10-07 -0.3338677
 2016-10-08 -1.0844999








Friday, 20 May 2016

send emails from R through Outlook


This assumes Outlook Application is installed and your account is set up etc...

Also, you may need to restart Outlook after installing the package in R, if you get an error like 'Error: Exception occurred.'.


library(RDCOMClient)

OutApp <- COMCreate("Outlook.Application")  
outMail = OutApp$CreateItem(0) 

outMail[["To"]] = "recipient's email address" 
outMail[["subject"]] = "subject" 
outMail[["body"]] = "body text" 

outMail$Send()



To send emails to multiple recipients, use semicolon (;) to separate email addresses:

OutApp <- COMCreate("Outlook.Application") 
outMail = OutApp$CreateItem(0)

outMail[["To"]] = "recipient's email address 1; recipient's email address 2"
outMail[["subject"]] = "subject" 
outMail[["body"]] = "body text" 

outMail$Send()



To send emails with attachment(s):

OutApp <- COMCreate("Outlook.Application") 
outMail = OutApp$CreateItem(0)

outMail[["To"]] = "recipient's email address"
outMail[["subject"]] = "subject" 
outMail[["body"]] = "body text" 

outMail[["Attachments"]]$Add("full path to file")     
#e.g. "C:/Users/Documents/someFile.txt"
#note the use of forward slash instead of back slash as you'd normally do in R when setting path to the attachment 

outMail$Send()



To embed table within the body of the email:
library(pander) 

panderOptions('table.split.table', Inf)

OutApp <- COMCreate("Outlook.Application") 
outMail = OutApp$CreateItem(0)

outMail[["To"]] = "recipient's email address"
outMail[["subject"]] = "subject" 
outMail[["body"]] = paste("Hello!", "", "The below summarises xxx:", pandoc.table.return(data.frame(V1 = 1:5, V2 = LETTERS[1:5])), sep = "\n")

outMail$Send()



Wednesday, 14 October 2015

radarchart - 'fmsb' package


radarchart() in 'fmsb' package offer a plotting function that draws radar/spider chart, similar to stars() in base package.


library(fmsb) 

'zoo' package converts date values to year-month values.
 
library(zoo) 
 
dat<-as.data.frame(sunspot.month) 

dat$TS<-seq(as.yearmon("1749-01-01"), as.yearmon("2013-09-01"), by = 1/12)
colnames(dat)[1] <- "sunspot"


The decades and centuries were derived from the year-month field to be used for grouping variables as below.

dat$decade <- floor(as.numeric(format(dat$TS, "%Y"))/10)*10
dat$century <- floor(as.numeric(format(dat$TS, "%Y"))/100)*100
dat$month <- format(dat$TS, "%b")
dat$month <- factor(dat$month, levels = unique(dat$month))
  
 
I used 'reshape2' package to rearrange the data to the desired structure while doing the aggregation to obtain mean values.

library(reshape2) 

agg <- recast(data = dat,century~month, measure.var = "sunspot", mean)


radarchart() requires the input data to have the max value and min value in the first and second rows respectively.
  
MX <- c(NA, rep(max(agg[,-1]), ncol(agg)-1)) 
MN <- c(NA, rep(min(agg[,-1]), ncol(agg)-1)) 

agg <- rbind(MX, MN, agg)


To draw a radar chart:

radarchart(agg[,-1])
  



  
  





















To assign colours for different groups and to insert a legend:


COL<-colorRampPalette(c("red", "blue"))(nrow(agg)-2) 

radarchart(agg[,-1], pcol = COL) 
legend(2, 1, legend = levels(as.factor(agg$century)), title = "century", col = COL, seg.len = 2, border = "transparent", pch = 16, lty = 1)




To change the line colour in the background:


radarchart(agg[,-1], pcol = COL, cglcol = "grey80") 
legend(2, 1, legend = levels(as.factor(agg$century)), title = "century", col = COL, seg.len = 2, border = "transparent", pch = 16, lty = 1)
























To modify number of layers in the background (in this example from 5 to 10):

radarchart(agg[,-1], pcol = COL, cglcol = "grey80", seg = 10) 
legend(2, 1, legend = levels(as.factor(agg$century)), title = "century", col = COL, seg.len = 2, border = "transparent", pch = 16, lty = 1)
  






















To add a title:

radarchart(agg[,-1], pcol = COL, cglcol = "grey80", seg = 10, title = "sun spots") 
legend(2, 1, legend = levels(as.factor(agg$century)), title = "century", col = COL, seg.len = 2, border = "transparent", pch = 16, lty = 1)























To split the groups in to individual radar chart:


par(mfrow = c(2, 2)) 
for(i in 3:nrow(agg)){ 
radarchart(agg[c(1,2,i), -1], pcol = COL[i-2], cglcol = "grey80", seg = 10, title = paste("Century:", agg$century[i], sep=" ")) 
}









































To colour the regions/polygons of the above graphs:


par(mfrow=c(2, 2)) 
for(i in 3:nrow(agg)){ 
radarchart(agg[c(1, 2, i), -1], pcol = COL[i-2], cglcol = "grey80", seg = 10, title = paste("Century:", agg$century[i], sep=" "), pdensity = 20, pangle = 30, pfcol = COL[i-2]) 
}




















































3D Graph - Lattice Package



cloud() function in 'lattice' package provides 3-dimensional visualisation option

library(lattice)

cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species)
  






















  
  

  
Adding a title
  
cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="Iris Species")  
 























  


Modify axis  
  
cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="Iris Species",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5),y=list(cex=0.9,tck=0.5),z=list(cex=0.9,tck=0.5)))
  






















  
  


Modify frame style  
  
cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="Iris Species",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1")))
























  
  
Modify axis labels  
  
cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="Iris Species",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)))
    























      




Modify plotting style  
  
cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="Iris Species",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"))
























  



Insert legend/key  
  
cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="Iris Species",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),key=list(space="right",title="Species",cex.title=1,points=list(col=c("tan","goldenrod","brown"),cex=0.9,pch=16),text=list(lab=levels(iris$Species),cex=0.9)))
  






















  
  




Rotating the graph (displayed using 'gridExtra' package for better comparison):
  
library(gridExtra)  
    
- around x-axis  
#default g1<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="x = -60 (default)",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=-60,y=0))

#x rotates graph around x axis - i.e. moves viewing  angle up and down - e.g. for view from top (x=0) g2<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="x = 0",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=0,y=0))

g3<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="x = -30",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=-30,y=0))


grid.arrange(g1,g3,g2,nrow=1,main="Rotating Around X-Axis")

  



























  

- around z-axis  
  
#z rotates graph around z axis - i.e. moves viewing  angle left and right - e.g. for view from front (z=0) 
g1<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="z = 40 (default)",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=-60,y=0))

g2<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="z = 20",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=20,x=-60,y=0))

g3<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="z = 0",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=0,x=-60,y=0))

grid.arrange(g1,g2,g3,nrow=1,main="Rotating Around Z-Axis")
  























  
- around y-axis  
  
#y rotates graph around y axis - i.e. moves viewing  angle up and down but hinges on y-axisg1<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="y = 0 (default)",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=0,y=0))

g2<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="y = 30",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=0,y=30))

g3<-cloud(Sepal.Length~Sepal.Width*Petal.Length,iris,pch=16,alpha=0.7,cex=1,groups=iris$Species,main="y = 60",scales=list(arrows=FALSE,x=list(cex=0.9,tck=0.5,col="wheat3"),y=list(cex=0.9,tck=0.5,col="wheat3"),z=list(cex=0.9,tck=0.5,col="wheat3")),par.settings=list(box.3d=list(col="wheat1"),par.xlab.text=list(cex=0.8,col="tan3"),par.ylab.text=list(cex=0.8,col="tan3"),par.zlab.text=list(cex=0.8,col="tan3",rot=90)),col=c("tan","goldenrod","brown"),screen=list(z=40,x=0,y=60))

grid.arrange(g1,g2,g3,nrow=1,main="Rotating Around Y-Axis")