install.packages("sqldf")
library(sqldf)
sqldf("SELECT
day
, avg(temp) as avg_temp
FROM beaver2
GROUP BY
day;")# day avg_temp#1 307 37.57931#2 308 37.71308#beavers1 和 beavers2 是R base 自带的两个数据集,记录了两种海狸的体温序列
beavers <- sqldf("select * from beaver1
union all
select * from beaver2;")#head(beavers)# day time temp activ#1 346 840 36.33 0#2 346 850 36.34 0#3 346 900 36.35 0#4 346 910 36.42 0#5 346 920 36.55 0#6 346 930 36.69 0
movies <- data.frame(
title=c("The Great Outdoors","Caddyshack","Fletch","Days of Thunder","Crazy Heart"),year=c(1988,1980,1985,1990,2009))
boxoffice <- data.frame(
title=c("The Great Outdoors","Caddyshack","Fletch","Days of Thunder","Top Gun"),
revenue=c(43455230,39846344,59600000,157920733,353816701))
sqldf("SELECT
m.*
, b.revenue
FROM
movies m
INNER JOIN
boxoffice b
ON m.title = b.title;")# title year revenue#1 The Great Outdoors 1988 43455230#2 Caddyshack 1980 39846344#3 Fletch 1985 59600000#4 Days of Thunder 1990 157920733
install.packages("stringr")library(stringr)names(iris)#[1] "Sepal.Length""Sepal.Width""Petal.Length""Petal.Width""Species"names(iris)<- str_replace_all(names(iris),"[.]","_")names(iris)#[1] "Sepal_Length""Sepal_Width""Petal_Length""Petal_Width""Species"
s <- c("Go to Heaven for the climate, Hell for the company.")str_extract_all(s,"[H][a-z]+ ")#[[1]]#[1] "Heaven ""Hell "
每个人(包括我自己)开始的时候都会这样做。你刚在首选的SQL编辑器中写了一个很棒的查询。一切都是完美的—列名都是snake case(译者注:表示单词之间用下划线连接。单词要么全部大写,要么全部小写。),日期有正确的数据类型,最后调试出了"must appear in the GROUP BY clause or be used in an aggregate function"的问题。你现在准备在R中进行一些分析,因此你可以在SQL编辑器中运行查询,将结果复制到csv(或者……xlsx)并读入R,你并不需要这样做!
library(RPostgreSQL)
drv <- dbDriver("PostgreSQL")
db <- dbConnect(drv, dbname="ncaa",
user="YOUR USER NAME", password="YOUR PASSWORD")
q <-"SELECT
*
FROM
game_scores;"data<- dbGetQuery(db, q)
head(data)#id school game_date spread school_score opponent opp_score was_home#1 45111 Boston College 1985-11-16 6.0 21 Syracuse 41 False#2 45112 Boston College 1985-11-02 13.5 12 Penn State 16 False#3 45113 Boston College 1985-10-26 -11.0 17 Cincinnati 24 False#4 45114 Boston College 1985-10-12 -2.0 14 Army 45 False#5 45115 Boston College 1985-09-28 5.0 10 Miami 45 True#6 45116 Boston College 1985-09-21 6.5 29 Pittsburgh 22 False
nrow(data)#[1] 30932
ncol(data)#[1] 8