r class 5 -data visualization
DESCRIPTION
RSVP our R beginner 5 days intensive classes at www.nycdatascience.comTRANSCRIPT
Data Visualization http://nycdatascience.com/part4_en/
1 of 98
Data Visualization http://nycdatascience.com/part4_en/
2 of 98
Data Visualization http://nycdatascience.com/part4_en/
3 of 98
data <- read.table('data/anscombe.txt',T)
data <- data[,-1]
head(data)
x1 x2 x3 x4 y1 y2 y3 y4
1 10 10 10 8 8.04 9.14 7.46 6.58
2 8 8 8 8 6.95 8.14 6.77 5.76
3 13 13 13 8 7.58 8.74 12.74 7.71
4 9 9 9 8 8.81 8.77 7.11 8.84
5 11 11 11 8 8.33 9.26 7.81 8.47
6 14 14 14 8 9.96 8.10 8.84 7.04
Data Visualization http://nycdatascience.com/part4_en/
4 of 98
colMeans(data)
x1 x2 x3 x4 y1 y2 y3 y4
9.0 9.0 9.0 9.0 7.5 7.5 7.5 7.5
sapply(1:4,function(x) cor(data[,x],data[,x+4]))
[1] 0.816 0.816 0.816 0.817
Data Visualization http://nycdatascience.com/part4_en/
5 of 98
Data Visualization http://nycdatascience.com/part4_en/
6 of 98
Data Visualization http://nycdatascience.com/part4_en/
7 of 98
Data Visualization http://nycdatascience.com/part4_en/
8 of 98
Data Visualization http://nycdatascience.com/part4_en/
9 of 98
Data Visualization http://nycdatascience.com/part4_en/
10 of 98
plot(cars$dist~cars$speed)
Data Visualization http://nycdatascience.com/part4_en/
11 of 98
plot(cars$dist,type='l')
Data Visualization http://nycdatascience.com/part4_en/
12 of 98
plot(cars$dist,type='h')
Data Visualization http://nycdatascience.com/part4_en/
13 of 98
hist(cars$dist)
Data Visualization http://nycdatascience.com/part4_en/
14 of 98
library(lattice)
num <- sample(1:3,size=50,replace=T)
barchart(table(num))
Data Visualization http://nycdatascience.com/part4_en/
15 of 98
qqmath(rnorm(100))
Data Visualization http://nycdatascience.com/part4_en/
16 of 98
stripplot(~ Sepal.Length | Species, data = iris,layout=c(1,3))
Data Visualization http://nycdatascience.com/part4_en/
17 of 98
densityplot(~ Sepal.Length, groups=Species, data = iris,plot.points=FALSE)
Data Visualization http://nycdatascience.com/part4_en/
18 of 98
bwplot(Species~ Sepal.Length, data = iris)
Data Visualization http://nycdatascience.com/part4_en/
19 of 98
xyplot(Sepal.Width~ Sepal.Length, groups=Species, data = iris)
Data Visualization http://nycdatascience.com/part4_en/
20 of 98
splom(iris[1:4])
Data Visualization http://nycdatascience.com/part4_en/
21 of 98
histogram(~ Sepal.Length | Species, data = iris,layout=c(1,3))
Data Visualization http://nycdatascience.com/part4_en/
22 of 98
library(plyr)
func3d <- function(x,y) {
sin(x^2/2 - y^2/4) * cos(2*x - exp(y))
}
vec1 <- vec2 <- seq(0,2,length=30)
para <- expand.grid(x=vec1,y=vec2)
result6 <- mdply(.data=para,.fun=func3d)
Data Visualization http://nycdatascience.com/part4_en/
23 of 98
library(lattice)
wireframe(V1~x*y,data=result6,scales = list(arrows = FALSE),
drape = TRUE, colorkey = F)
Data Visualization http://nycdatascience.com/part4_en/
24 of 98
library(ggplot2)
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
25 of 98
summary(p)
data: manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class [234x11]
mapping: x = cty, y = hwy
faceting: facet_null()
-----------------------------------
geom_point: na.rm = FALSE
stat_identity:
position_identity: (width = NULL, height = NULL)
Data Visualization http://nycdatascience.com/part4_en/
26 of 98
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))
p <- p + geom_point()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
27 of 98
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))
p <- p + geom_smooth()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
28 of 98
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth()
Data Visualization http://nycdatascience.com/part4_en/
29 of 98
Data Visualization http://nycdatascience.com/part4_en/
30 of 98
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth() +
scale_color_manual(values=c('blue2','red4'))
Data Visualization http://nycdatascience.com/part4_en/
31 of 98
Data Visualization http://nycdatascience.com/part4_en/
32 of 98
p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year))) +
geom_smooth() +
scale_color_manual(values=c('blue2','red4')) +
facet_wrap(~ year,ncol=1)
Data Visualization http://nycdatascience.com/part4_en/
33 of 98
Data Visualization http://nycdatascience.com/part4_en/
34 of 98
p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=class,size=displ),
alpha=0.5,position = "jitter") +
geom_smooth() +
scale_size_continuous(range = c(4, 10)) +
facet_wrap(~ year,ncol=1) +
opts(title='Vehicle model and fuel consumption') +
labs(y='Highway miles per gallon',
x='Urban miles per gallon',
size='Displacement',
colour = 'Model')
Data Visualization http://nycdatascience.com/part4_en/
35 of 98
Data Visualization http://nycdatascience.com/part4_en/
36 of 98
p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +
geom_point(aes(colour=factor(year),size=displ), alpha=0.5,position = "jitter")+
stat_smooth()+
scale_color_manual(values =c('steelblue','red4'))+
scale_size_continuous(range = c(4, 10))
Data Visualization http://nycdatascience.com/part4_en/
37 of 98
Data Visualization http://nycdatascience.com/part4_en/
38 of 98
library(ggplot2)
p <- ggplot(data=iris,aes(x=Sepal.Length))+
geom_histogram()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
39 of 98
p <- ggplot(iris,aes(x=Sepal.Length))+
geom_histogram(binwidth=0.1, # Set the group gap
fill='skyblue', # Set the fill color
colour='black') # Set the border color
Data Visualization http://nycdatascience.com/part4_en/
40 of 98
Data Visualization http://nycdatascience.com/part4_en/
41 of 98
p <- ggplot(iris,aes(x=Sepal.Length)) +
geom_histogram(aes(y=..density..),
fill='skyblue',
color='black') +
geom_density(color='black',
linetype=2,adjust=2)
Data Visualization http://nycdatascience.com/part4_en/
42 of 98
Data Visualization http://nycdatascience.com/part4_en/
43 of 98
p <- ggplot(iris,aes(x=Sepal.Length)) +
geom_histogram(aes(y=..density..), # Note: set y to relative frequency
fill='gray60',
color='gray') +
geom_density(color='black',linetype=1,adjust=0.5) +
geom_density(color='black',linetype=2,adjust=1) +
geom_density(color='black',linetype=3,adjust=2)
Data Visualization http://nycdatascience.com/part4_en/
44 of 98
Data Visualization http://nycdatascience.com/part4_en/
45 of 98
p <- ggplot(iris,aes(x=Sepal.Length,fill=Species)) + geom_density(alpha=0.5,color='gra
print(p)
Data Visualization http://nycdatascience.com/part4_en/
46 of 98
p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_boxplot()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
47 of 98
p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_violin()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
48 of 98
p <- ggplot(iris,aes(x=Species,y=Sepal.Length,
fill=Species)) +
geom_violin(fill='gray',alpha=0.5) +
geom_dotplot(binaxis = "y", stackdir = "center")
print(p)
Data Visualization http://nycdatascience.com/part4_en/
49 of 98
Data Visualization http://nycdatascience.com/part4_en/
50 of 98
p <- ggplot(mpg,aes(x=class)) +
geom_bar()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
51 of 98
mpg$year <- factor(mpg$year)
p <- ggplot(mpg,aes(x=class,fill=year)) +
geom_bar(color='black')
Data Visualization http://nycdatascience.com/part4_en/
52 of 98
Data Visualization http://nycdatascience.com/part4_en/
53 of 98
p <- ggplot(mpg,aes(x=class,fill=year)) +
geom_bar(color='black',
position=position_dodge())
Data Visualization http://nycdatascience.com/part4_en/
54 of 98
Data Visualization http://nycdatascience.com/part4_en/
55 of 98
p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) +
geom_bar(width = 1)+
coord_polar(theta = "y")
Data Visualization http://nycdatascience.com/part4_en/
56 of 98
Data Visualization http://nycdatascience.com/part4_en/
57 of 98
set.seed(1)
# Randomly generate 100 wind directions, and divide them into 16 intervals.
dir <- cut_interval(runif(100,0,360),n=16)
# Randomly generate 100 wind speed, and divide them into 4 intensities.
mag <- cut_interval(rgamma(100,15),4)
sample <- data.frame(dir=dir,mag=mag)
# Map wind direction to X-axie, frequency to Y-axie and speed to fill colors. Transfor
p <- ggplot(sample,aes(x=dir,fill=mag)) +
geom_bar()+ coord_polar()
Data Visualization http://nycdatascience.com/part4_en/
58 of 98
Data Visualization http://nycdatascience.com/part4_en/
59 of 98
Data Visualization http://nycdatascience.com/part4_en/
60 of 98
Data Visualization http://nycdatascience.com/part4_en/
61 of 98
data <- read.csv('data/soft_impact.csv',T)
library(reshape2)
data.melt <- melt(data,id='Year')
p <- ggplot(data.melt,aes(x=Year,y=value,
group=variable,fill=variable)) +
geom_area(color='black',size=0.3,
position=position_fill()) +
scale_fill_brewer()
Data Visualization http://nycdatascience.com/part4_en/
62 of 98
Data Visualization http://nycdatascience.com/part4_en/
63 of 98
Data Visualization http://nycdatascience.com/part4_en/
64 of 98
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point()
print(p)
Data Visualization http://nycdatascience.com/part4_en/
65 of 98
mpg$year <- factor(mpg$year)
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year))
print(p)
Data Visualization http://nycdatascience.com/part4_en/
66 of 98
mpg$year <- factor(mpg$year)
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,shape=year))
print(p)
Data Visualization http://nycdatascience.com/part4_en/
67 of 98
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position
print(p)
Data Visualization http://nycdatascience.com/part4_en/
68 of 98
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(color=year),alpha=0.5,position = "jitter") +
geom_smooth(method='lm')
print(p)
Data Visualization http://nycdatascience.com/part4_en/
69 of 98
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(color=year,size=displ),alpha=0.5,position = "jitter") +
geom_smooth(method='lm') +
scale_size_continuous(range = c(4, 10))
Data Visualization http://nycdatascience.com/part4_en/
70 of 98
Data Visualization http://nycdatascience.com/part4_en/
71 of 98
p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +
geom_point(aes(colour=class,size=displ),
alpha=0.5,position = "jitter") +
geom_smooth() +
scale_size_continuous(range = c(4, 10)) +
facet_wrap(~ year,ncol=1)
Data Visualization http://nycdatascience.com/part4_en/
72 of 98
Data Visualization http://nycdatascience.com/part4_en/
73 of 98
Data Visualization http://nycdatascience.com/part4_en/
74 of 98
Data Visualization http://nycdatascience.com/part4_en/
75 of 98
Data Visualization http://nycdatascience.com/part4_en/
76 of 98
Data Visualization http://nycdatascience.com/part4_en/
77 of 98
Data Visualization http://nycdatascience.com/part4_en/
78 of 98
Data Visualization http://nycdatascience.com/part4_en/
79 of 98
Data Visualization http://nycdatascience.com/part4_en/
80 of 98
fillcolor <- ifelse(economics[440:470,'unemploy']<8000,'steelblue','red4')
p <- ggplot(economics[440:470,],aes(x=date,y=unemploy)) +
geom_bar(stat='identity',
fill=fillcolor)
Data Visualization http://nycdatascience.com/part4_en/
81 of 98
Data Visualization http://nycdatascience.com/part4_en/
82 of 98
p <- ggplot(economics[300:470,],aes(x=date,ymax=psavert,ymin=0)) +
geom_linerange(color='grey20',size=0.5) +
geom_point(aes(y=psavert),color='red4') +
theme_bw()
Data Visualization http://nycdatascience.com/part4_en/
83 of 98
Data Visualization http://nycdatascience.com/part4_en/
84 of 98
fill.color <- ifelse(economics$date > '1980-01-01' &
economics$date < '1990-01-01',
'steelblue','red4')
p <- ggplot(economics,aes(x=date,ymax=psavert,ymin=0)) +
geom_linerange(color=fill.color,size=0.9) +
geom_text(aes(x=as.Date("1985-01-01",'%Y-%m-%d'),y=13),label="1980'") +
theme_bw()
Data Visualization http://nycdatascience.com/part4_en/
85 of 98
Data Visualization http://nycdatascience.com/part4_en/
86 of 98
Data Visualization http://nycdatascience.com/part4_en/
87 of 98
Data Visualization http://nycdatascience.com/part4_en/
88 of 98
Data Visualization http://nycdatascience.com/part4_en/
89 of 98
library(ggplot2)
world <- map_data("world")
worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +
geom_path(color='gray10',size=0.3) +
geom_point(x=114,y=30,size=10,shape='*') +
scale_y_continuous(breaks=(-2:2) * 30) +
scale_x_continuous(breaks=(-4:4) * 45) +
coord_map("ortho", orientation=c(30, 120, 0)) +
theme(panel.grid.major = element_line(colour = "gray50"),
panel.background = element_rect(fill = "white"),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank())
Data Visualization http://nycdatascience.com/part4_en/
90 of 98
Data Visualization http://nycdatascience.com/part4_en/
91 of 98
map <- map_data('state')
arrests <- USArrests
names(arrests) <- tolower(names(arrests))
arrests$region <- tolower(rownames(USArrests))
usmap <- ggplot(data=arrests) +
geom_map(map =map,aes(map_id = region,fill = murder),color='gray40' ) +
expand_limits(x = map$long, y = map$lat) +
scale_fill_continuous(high='red2',low='white') +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.background = element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.position = c(0.95,0.28),
legend.background=element_rect(fill="white", colour="white"))+ coord_map('mercat
Data Visualization http://nycdatascience.com/part4_en/
92 of 98
Data Visualization http://nycdatascience.com/part4_en/
93 of 98
library(ggmap)
library(XML)
webpage <-'http://data.earthquake.cn/datashare/globeEarthquake_csn.html'
tables <- readHTMLTable(webpage,stringsAsFactors = FALSE)
raw <- tables[[6]]
data <- raw[,c(1,3,4)]
names(data) <- c('date','lan','lon')
data$lan <- as.numeric(data$lan)
data$lon <- as.numeric(data$lon)
data$date <- as.Date(data$date, "%Y-%m-%d")
#Read the map data from Google by the ggmap package, and mark the previous data on the
earthquake <- ggmap(get_googlemap(center = 'china', zoom=4,maptype='terrain'),extent='
geom_point(data=data,aes(x=lon,y=lan),colour = 'red',alpha=0.7)+
theme(legend.position = "none")
Data Visualization http://nycdatascience.com/part4_en/
94 of 98
Data Visualization http://nycdatascience.com/part4_en/
95 of 98
library(googleVis)
library(WDI)
DF <- WDI(country=c("CN","RU","BR","ZA","IN",'DE','AU','CA','FR','IT','JP','MX','GB','
M <- gvisMotionChart(DF, idvar="country", timevar="year",
xvar='EN.ATM.CO2E.KT',
yvar='NY.GDP.MKTP.CD')
plot(M)
Data Visualization http://nycdatascience.com/part4_en/
96 of 98
Data Visualization http://nycdatascience.com/part4_en/
97 of 98
Data Visualization http://nycdatascience.com/part4_en/
98 of 98