# 2.4 Using data in R

1. Exercise 2.4 - Download and extract zip folder into your preferred location
2. Set working directory to the extracted folder in R under File - Change dir...
3. First we need to load the packages needed for the exercise

library(rgdal)
library(gstat)
library(plyr)

4. Now open the script "Weather-Station-Code_Snowfall.R" and run code directly from the script

5. This code is designed to process data downloaded from NOAA Date. This version looks at weather stations that provide snowfall data in and around Pennsylvania. The code pulls out the desired data from the downloaded aggregate weather-station file and calculates the mean annual snowfall per weather station for 12/1/94 - 3/31/05. The data is then exported to a text file for interpolation in ArcGIS.

### remove other objects from the working session ###
rm(list=ls())

6. Read weather station data - note the use of stringsAsFactors=FALSE and na.strings=’-9999’

#Check data
dim(WS)
summary (WS)

#Reformat DATE and create Year Month Day columns from NewDate column
WS\$NewDate <- as.Date(as.character(WS\$DATE), format("%Y%m%d"))
WS\$Year = as.numeric(format(WS\$NewDate, format = "%Y"))
WS\$Month = as.numeric(format(WS\$NewDate, format = "%m"))
WS\$Day = as.numeric(format(WS\$NewDate, format = "%d"))

7. Make a subset of WS that includes only the months of Dec-March with further manipulation of the data for desired output of project objectives.

Winter <- WS[WS\$Month %in% c(1,2,3,12), ]

#For December, add 1 to Year so that Year matches Jan-March in that season
Winter <- within(Winter, Year[Month==12] <- Year[Month==12] +1)

#Check subset, including random row to make sure only selected months included
dim(Winter)
Winter[699,]

#Create a matrix of unique STATION values (GHCND ) with Lat/Long values for later reference. Data contains some multiple versions of individual GHCND coordinates. Only want 1 set per.
PulledCoords <- Winter[!duplicated(Winter[,1]),]
dim(PulledCoords)

CoordChart <- ddply(PulledCoords, c(’STATION’), function(x) c(Lat=x\$LATITUDE, Long=x\$LONGITUDE))

#Get the number of snowfall records for each STATION for each year and name it RecordTotal. Note that NA is omited from the length count #
WinterRecords <- ddply(Winter, .(STATION,Year), summarize, RecordTotal = length(na.omit(SNOW)))
tail(WinterRecords)
dim(WinterRecords)

#Get the total amount of snowfall per STATION per year and name it YearlySnow
YearlySnow <- ddply(Winter, .(STATION,Year), summarize, Snow = sum(SNOW, na.rm=TRUE))
tail(YearlySnow)
dim(YearlySnow)

#Combine WinterRecords and YearlySnow into one matrix
AllWinters <- cbind(WinterRecords,YearlySnow)
AllWinters <- AllWinters[,-4:-5]
tail(AllWinters)
dim(AllWinters)

#Only include years that have more than 75% of days recorded #
WinterDays <- 121
FullWinters <- AllWinters[AllWinters\$RecordTotal/WinterDays > 0.75, ]
tail(FullWinters)
dim(FullWinters)

#Get the number of years with more than 75% of days recorded for each STATION
WinterYears <- ddply(FullWinters, c(’STATION’), function(x) c(TotalYears=length(x\$Year)))
tail(WinterYears)
dim(WinterYears)

#Get the total amount of snow for each station for all years ###
TotalWinterSnow <- ddply(FullWinters, c(’STATION’), function(x) c(TotalWinterSnow=sum(x\$Snow)))
dim(TotalWinterSnow)

#Combine WinterYears and TotalWinterSnow into one matrix #
SnowCalc <- cbind(WinterYears,TotalWinterSnow)
SnowCalc <- SnowCalc[,-3]

#Get rid of the stations that don’t have at least 10 years recorded at >75% of days
Complete.Records <- SnowCalc[SnowCalc\$TotalYears > 9, ]
dim(Complete.Records)

#Calculate average annual snowfall and round to nearest mm
Complete.Records\$MeanAnnualSnowfall <- Complete.Records\$TotalWinterSnow/Complete.Records\$TotalYears
Complete.Records\$MeanAnnualSnowfall <-round (Complete.Records\$MeanAnnualSnowfall, digits = 0)

#Convert SnowDepth from mm to cm
Complete.Records\$MeanAnnualSnowfall <- Complete.Records\$MeanAnnualSnowfall/10

#Add a column to CoordChart showing whether each row matches a STATION #in Complete.Records. Use "NA" for value if no match, then delete rows with "NA" value.
#Number of rows in CoordChart should now equal number of rows in Complete.Records
CoordChart\$match <- match(CoordChart\$STATION, Complete.Records\$STATION, nomatch=NA)
CoordChart <- na.omit(CoordChart)
dim(CoordChart)
dim(Complete.Records)

#Combine Complete.Records and CoordChart. Make sure each STATION #matches in row. Delete any rows that don’t match. Shouldn’t be any. If # of rows in #Final.Values is less than number of rows in CoordChart, there is a problem (but note #that number of cols does change).
Final.Values <- cbind(Complete.Records,CoordChart)
Final.Values\$match2 <- match(Final.Values[ ,1], Final.Values[ ,5], nomatch=NA)
Final.Values <- na.omit(Final.Values)
dim(Final.Values)
dim(CoordChart)

#Take out unnecssary rows (2nd STATION, match, and match2) and round #MeanSnow to 2 decimal places
Final.Values[,5] <- Final.Values[,8] <- Final.Values[,9] <- NULL
8. Make data frame to get rid of lists (in R) so can export to text file to use to load weather station points into ArcGIS and skip to Section 2.5.

Final.Values <- as.data.frame(lapply(Final.Values,unlist))
write.table(Final.Values, "MeanSnowData_95-05.txt", sep="\t", row.names=F)
9. Alternatively we can conduct interpolation directly in R using the steps below.

#Need to convert factors to numeric
Final.Values\$Longitude <- as.numeric(as.character(Final.Values\$Long))
Final.Values\$Latitude <- as.numeric(as.character(Final.Values\$Lat))

#Here we need to create Spatial points, attach ID and Date Time sorted
data.xy = Final.Values[c("Longitude","Latitude")]
#Creates class Spatial Points for all locations
xysp <- SpatialPoints(data.xy)
#proj4string(xysp) <- CRS("+proj=longlat +ellps=WGS84")

#Creates a Spatial Data Frame from
sppt<-data.frame(xysp)

#Creates a spatial data frame of STATION
ID<-data.frame(Final.Values)
#Creates a spatial data frame of Mean Annual Snow Fall
MAS<-data.frame(Final.Values)
#Merges ID and Date into the same spatial data frame
merge<-data.frame(ID,MAS)
#Adds ID and Date data frame with locations data frame
coordinates(merge)<-sppt
proj4string(merge) <- CRS("+proj=longlat +ellps=WGS84")

#Import a county layer for study site and check projections
proj4string(counties)
plot(counties)

#Project Weather Stations to match Counties
Albers.crs <-CRS("+proj=aea +lat_1=29.3 +lat_2=45.3 +lat_0=23+lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0")
stations <- spTransform(merge, CRS=Albers.crs)

#Plot Weather Stations over counties
points(stations)

stations@data\$MAS <- stations@data\$MeanAnnualSnowfall
str(stations)

str(counties)

##Create a grid onto which we will interpolate:
xx = spsample(counties, type="regular", cellsize=10000)
class(xx)

points(xx, bg="red", cex=.5,col="red")

#Convert to a SpatialPixels class
gridded(xx) <- TRUE
class(xx)

plot(xx)
points(stations, bg="red", cex=.5,col="red")

#Plot out the MAS across the study region
bubble(stations, zcol=’MAS’, fill=FALSE, do.sqrt=FALSE, maxsize=2, add=T)

##Create a grid onto which we will interpolate:
##First get the range in data
x.range <- as.integer(range(stations@coords[,1]))
y.range <- as.integer(range(stations@coords[,2]))

##Now expand to a grid with 500 meter spacing:
grd <- expand.grid(x=seq(from=x.range, to=x.range, by=5000), y=seq(from=y.range, to=y.range, by=5000) )

#Convert to SpatialPixel class
coordinates(grd) <- ~ x+y
gridded(grd) <- TRUE

##Test it out:
plot(grd, cex=0.5)
points(stations, pch=1, col=’red’, cex=0.7)
title("Interpolation Grid and Sample Points")

x <- krige(stations@data\$MAS~1, stations, xx)
class(x)
image(x)
points(stations, pch=1, col=’blue’, cex=0.7)