Data Preparation
d = read_csv("books.csv", col_type=cols(date=col_date(format="%Y-%m-%d")))
# sum counts for each date
d = d %>%
group_by(date) %>%
summarize(count=n())
# add "zero events" on January 1st and December 31st
zeroes.jan1 = tibble(date=floor_date(d$date, 'year') %>% unique(), count=0)
zeroes.dec31 = tibble(date=ceiling_date(d$date, 'year') %>% unique() - 1, count=0)
d = rbind(zeroes.jan1, d, zeroes.dec31)
# cumulative count for each year
d = d %>%
mutate(year=year(date)) %>%
group_by(year) %>%
mutate(cumcount=cumsum(count)) %>%
arrange(date)
# all date on the same year, for comparison
d$ndate = as.Date(format(d$date,"2016-%m-%d"))
# years aren't numbers... for the sake of these graphs
d$year = factor(d$year)
d$month = factor(month(d$date), 1:12)
cols = brewer.pal(length(levels(d$year)),"Paired")
Each Event as a Dot
ggplot(d %>% filter(count > 0), aes(ndate, y=factor(year, levels=rev(levels(year))), color=year)) +
geom_point(alpha=0.5, size=2) +
scale_x_date(labels=date_format("%b"), date_breaks="1 month") +
scale_color_manual(values=cols) +
ylab("")
Cumulative Plots
# as geom_step
ggplot(d, aes(ndate, cumcount, color=year)) +
geom_step(alpha=0.5, size=1) +
ylab("books read") +
scale_x_date(labels=date_format("%b"), date_breaks="1 month") +
scale_y_continuous(breaks=seq(0, 100, by=5)) +
scale_color_manual(values=cols)
# as geom_line
ggplot(d, aes(ndate, cumcount, color=year)) +
geom_line(alpha=0.5, size=1) +
ylab("books read") +
scale_x_date(labels=date_format("%b"), date_breaks="1 month") +
scale_y_continuous(breaks=seq(0, 100, by=5)) +
scale_color_manual(values=cols)