Seit Juni 2014 melden Tankstellen in Deutschland ihre aktuellen Kraftstoffpreise an die Markttransparenzstelle für Kraftstoffe (MTS-K), eine Abteilung des Bundeskartellamtes. Die rechtliche Verpflichtung zur Meldung der Preisinformationen bietet eine seltene Möglichkeit die Entwicklung von Preisen zeitnah flächendeckend zu analysieren. Datenquelle für die Kraftstoffpreise und Tankstellen Informationen ist Tankerkönig.
# packages
library(tidyverse)
library(ggthemes)
library(gridExtra)
library(tmap)
library(leaflet)
library(leaflet.extras)
library(rgdal)
library(DBI)
library(odbc)
# themes
source(file = "code/functions/theme_base_FUN.R", echo = F, encoding = "UTF-8")
# plot elements
colors = c("#FF8F00", "#5A8F29", "#3C7DC4", "#2B2B2B")
# DB connection
dbcon <- dbConnect(odbc(), "fuelprice", timeout = 10, encoding = "windows-1252", bigint = "integer")
Für die Übersicht der in Deutschland aktiven Tankstellen, wurden die Daten sämtlicher Tankstellen ausgewertet, welche im Beobachtungszeitraum (01.07. bis 31.12.) mindestens eine Preisinformation übermittelt haben.
Die räumliche Zuordnung der Tankstellen erfolgt auf Basis der im Datensatz enthalten Angaben zu Längen- und Breitengrad einzelner Tankstellen.
# query active stations in year
year <- 2014
query_stations_year <- paste0("
SELECT
distinct(stid)
FROM
price
WHERE
time BETWEEN '", year,"-07-01 00:00:01'
AND '", year,"-12-31 23:59:59'
GROUP BY
stid;")
act_stations <- dbGetQuery(dbcon, query_stations_year) %>%
as_tibble()
act_stations_string <- paste(act_stations$stid, collapse = ",")
# Query stations data
query_states <- paste0("
SELECT
stations.bl_kz,
stations.bl_name,
count,
area.area_bl,
pop.pop_bl,
round(count/area.area_bl,4) AS a_dens,
round(count*1000/(pop.pop_bl),4) AS p_dens
FROM
(SELECT
bl_kz,
bl_name,
count(*) AS count
FROM
stations
WHERE
bl_kz IS NOT NULL
AND
uuid = ANY('{",act_stations_string ,"}')
GROUP BY bl_kz, bl_name) stations
LEFT JOIN
(SELECT
bl_kz,
area_bl
FROM
area_bl
WHERE
date = '", year, "-12-31') area
ON
stations.bl_kz = area.bl_kz
LEFT JOIN
(SELECT
bl_kz,
pop_bl
FROM
pop_bl
WHERE
date = '", year, "-12-31') pop
ON
stations.bl_kz = pop.bl_kz
ORDER BY
bl_kz ASC;")
stations_states <- dbGetQuery(dbcon, query_states) %>%
as_tibble() %>%
mutate(bl_name = factor(bl_name, bl_name))
# plot number of stations by state
stations_states_plot <- ggplot(data = stations_states, aes(x = bl_name, y = count)) +
geom_bar(stat = "identity",
show.legend = F,
width = 0.9,
fill = colors[3],
alpha = 1) +
#geom_text(aes(label = count), # add for printing actual numbers to bars
# hjust = if_else(stations_states$count >= 0.7*max(stations_states$count), 1.2, -0.1),
# color = if_else(stations_states$count >= 0.7*max(stations_states$count), "white", "black"),
# position = position_dodge(0.9), size = 3.5) +
theme_base() +
labs(x = NULL, y = "Anzahl Tankstellen") +
coord_flip()
# plot number of stations per square kilometer by state
area_states_plot <- ggplot(data = stations_states, mapping = aes(x = bl_name, y = a_dens)) +
geom_bar(stat = "identity",
show.legend = F,
width = 0.9,
fill = colors[1],
alpha = 1) +
#geom_text(aes(label = round(a_dens, 2)), # add for printing actual numbers to bars
# hjust = if_else(stations_states$a_dens >= 0.7*max(stations_states$a_dens), 1.2, -0.1),
# color = "black",
# position = position_dodge(0.9), size = 3.5) +
theme_base() +
labs(x = NULL, y = "... pro Quadratkilometer") +
coord_flip() +
scale_x_discrete(labels = rep("",dim(stations_states)[1]))
# plot number of stations per population by state
pop_states_plot <- ggplot(data = stations_states, mapping = aes(x = bl_name, y = p_dens)) +
geom_bar(stat = "identity",
show.legend = F,
width = 0.9,
fill = colors[2],
alpha = 1) +
#geom_text(aes(label = round(p_dens, 2)), # add for printing actual numbers to bars
# hjust = if_else(stations_states$p_dens >= 0.7*max(stations_states$p_dens), 1.2, -0.1),
# color = if_else(stations_states$p_dens >= 0.7*max(stations_states$p_dens), "white", "black"),
# position = position_dodge(0.9), size = 3.5) +
theme_base() +
labs(x = NULL, y = "... pro 1.000 Einwohner") +
coord_flip() +
scale_x_discrete(labels = rep("",dim(stations_states)[1]))
# arrange plots
grid.arrange(stations_states_plot, area_states_plot, pop_states_plot,
ncol = 3,
widths = c(4.5,3,3))
# query data
query_city7 <- paste0("
SELECT
pop.kreis_kz,
stations.kreis_name,
count,
pop.pop_kreis,
area.area_kreis,
round(count/area.area_kreis, 4) AS a_dens,
round(count*1000/pop.pop_kreis, 4) AS p_dens
FROM
(SELECT
kreis_kz,
pop_kreis
FROM
pop_kreis
WHERE
date = '", year, "-12-31'
AND
pop_kreis IS NOT NULL
ORDER BY
pop_kreis DESC
LIMIT 7) pop
LEFT JOIN
(SELECT
kreis_kz,
kreis_name,
count(*) AS count
FROM
stations
WHERE
uuid = ANY('{",act_stations_string ,"}')
GROUP BY
kreis_kz,
kreis_name) stations
ON
pop.kreis_kz = stations.kreis_kz
LEFT JOIN
(SELECT
kreis_kz,
area_kreis
FROM
area_kreis
WHERE
date = '", year, "-12-31') area
ON
pop.kreis_kz = area.kreis_kz
ORDER BY
pop.kreis_kz ASC;")
stations_city7 <- dbGetQuery(dbcon, query_city7) %>%
as_tibble() %>%
mutate(kreis_name = factor(kreis_name, kreis_name))
# plot number of stations by city
stations_city7_plot <- ggplot(data = stations_city7, mapping = aes(x = kreis_name, y = count)) +
geom_bar(stat = "identity",
show.legend = F,
width = 0.9,
fill = colors[3],
alpha = 1) +
#geom_text(aes(label = count), # add for printing actual numbers to bars
# hjust = if_else(stations_city7$count >= 0.7*max(stations_city7$count), 1.2, -0.1),
# color = if_else(stations_city7$count >= 0.7*max(stations_city7$count), "white", "black"),
# position = position_dodge(0.9), size = 3.5) +
theme_base() +
labs(x = NULL, y = "Anzahl Tankstellen") +
coord_flip()
# plot number of stations per square kilometer by city
area_city7_plot <- ggplot(data = stations_city7, mapping = aes(x = kreis_name, y = a_dens)) +
geom_bar(stat = "identity",
show.legend = F,
width = 0.9,
fill = colors[1],
alpha = 1) +
#geom_text(aes(label = round(a_dens, 2)), # add for printing actual numbers to bars
# hjust = if_else(stations_city7$a_dens >= 0.7*max(stations_city7$a_dens), 1.2, -0.1),
# color = "black",
# position = position_dodge(0.9), size = 3.5) +
theme_base() +
labs(x = NULL, y = "... pro Quadratkilometer") +
coord_flip() +
scale_x_discrete(labels = rep("",dim(stations_city7)[1]))
# plot number of stations per population by city
pop_city7_plot <- ggplot(data = stations_city7, mapping = aes(x = kreis_name, y = p_dens)) +
geom_bar(stat = "identity",
show.legend = F,
width = 0.9,
fill = colors[2],
alpha = 1) +
#geom_text(aes(label = round(p_dens, 2)), # add for printing actual numbers to bars
# hjust = if_else(stations_city7$p_dens >= 0.7*max(stations_city7$p_dens), 1.2, -0.1),
# color = if_else(stations_city7$p_dens >= 0.7*max(stations_city7$p_dens), "white", "black"),
# position = position_dodge(0.9), size = 3.5) +
theme_base() +
labs(x = NULL, y = "... pro 1.000 Einwohner") +
coord_flip() +
scale_x_discrete(labels = rep("",dim(stations_city7)[1]))
# arrange plots
grid.arrange(stations_city7_plot, area_city7_plot, pop_city7_plot,
ncol = 3,
widths = c(4,3,3))
# load map county map data
county_shp_in <- readOGR(dsn = "data/map2019/vg250_ebenen",
layer="VG250_KRS", encoding = "UTF-8",
stringsAsFactors = F, verbose = F, use_iconv = T)
# query county data
query_county <- paste0("
SELECT
stations.kreis_kz,
stations.kreis_name,
count,
area.area_kreis,
pop.pop_kreis,
cars.kfz,
cars.lkw,
round(count/area.area_kreis, 4) AS a_dens,
round(count*1000/pop.pop_kreis, 4) AS p_dens,
round(count*1000/cars.kfz::decimal, 4) AS kfz_dens,
round(count*1000/cars.lkw::decimal, 4) AS lkw_dens
FROM
(SELECT
kreis_kz,
kreis_name,
count(*) AS count
FROM
stations_test
WHERE
kreis_kz IS NOT NULL
AND
uuid = ANY('{",act_stations_string ,"}')
GROUP BY
kreis_kz,
kreis_name) stations
LEFT JOIN
(SELECT
kreis_kz,
area_kreis
FROM
area_kreis
WHERE
date = '", year, "-12-31') area
ON
stations.kreis_kz = area.kreis_kz
LEFT JOIN
(SELECT
kreis_kz,
pop_kreis
FROM
pop_kreis
WHERE
date = '", year, "-12-31') pop
ON
stations.kreis_kz = pop.kreis_kz
LEFT JOIN
(SELECT
kreis_kz,
kfz,
lkw
FROM
car_numbers
WHERE
date = '", year, "-01-01') cars
ON
stations.kreis_kz = cars.kreis_kz
ORDER BY
kreis_kz ASC;")
stations_county <- dbGetQuery(dbcon, query_county) %>%
as_tibble()
# join map data and queried data
county_shp <- county_shp_in
county_shp@data <- county_shp_in@data %>%
left_join(stations_county, by = c("AGS" = "kreis_kz"))
# map number of stations
map_kreise_count <- tm_shape(shp = county_shp, name = "Anzahl Tankstellen") +
tm_polygons("count",
style = "jenks",
palette = "Blues", n = 6, contrast = c(0.4, 0.95),
border.col = "white",
border.alpha = 0.5,
title = "",
id = "GEN",
legend.format = list(digits = 0),
showNA = F,
colorNA = "grey90",
popup.vars = c("Number stations:" = "count"),
popup.format = list(digits = 0)) +
tm_layout(title = "Anzahl Tankstellen",
title.size = 1.1,
title.position = c("center","top"),
inner.margins = c(0.05,0.10,0.10,0.10),
legend.position = c("left","bottom"),
frame = F) +
tm_scale_bar(color.dark = "gray70",
position = c("right","bottom"))
# map number of station per square kilometer
map_kreise_area <- tm_shape(shp = county_shp, name = "Tankstellen pro Quadratkilometer") +
tm_polygons("a_dens",
style = "jenks",
palette = "Oranges", n = 6, contrast = c(0.4, 0.95),
border.col = "white",
border.alpha = 0.5,
title = "",
id = "GEN",
legend.format = list(digits = 2),
showNA = F,
colorNA = "grey90",
popup.vars = c("Number stations:" = "a_dens"),
popup.format = list(digits = 2)) +
tm_layout(title = "Tankstellen pro Quadratkilometer",
title.size = 1.1,
title.position = c("center","top"),
inner.margins = c(0.05,0.10,0.10,0.10),
legend.position = c("left","bottom"),
frame = F) +
tm_scale_bar(color.dark = "gray70",
position = c("right","bottom"))
# map number of stations per 1.000 population
map_kreise_pop <- tm_shape(shp = county_shp, name = "Tankstellen pro 1.000 Einwohner") +
tm_polygons("p_dens",
style = "jenks",
palette = "Greens", n = 6, contrast = c(0.4, 0.95),
border.col = "white",
border.alpha = 0.5,
title = "",
id = "GEN",
legend.format = list(digits = 2),
showNA = F,
colorNA = "grey90",
popup.vars = c("Number stations:" = "p_dens"),
popup.format = list(digits = 2)) +
tm_layout(title = "Tankstellen pro 1.000 Einwohner",
title.size = 1.1,
title.position = c("center","top"),
inner.margins = c(0.05,0.10,0.10,0.10),
legend.position = c("left","bottom"),
frame = F) +
tm_scale_bar(color.dark = "gray70",
position = c("right","bottom"))
# map number of stations per 1.000 cars
map_kreise_cars <- tm_shape(shp = county_shp, name = "Tankstellen pro 1.000 zugelassene PKW") +
tm_polygons("kfz_dens",
style = "jenks",
palette = "PuRd", n = 6, contrast = c(0.4, 0.95),
border.col = "white",
border.alpha = 0.5,
title = "",
id = "GEN",
legend.format = list(digits = 2),
showNA = F,
colorNA = "grey90",
popup.vars = c("Number stations:" = "kfz_dens"),
popup.format = list(digits = 2)) +
tm_layout(title = "Tankstellen pro 1.000 zugelassene PKW",
title.size = 1.1,
title.position = c("center","top"),
inner.margins = c(0.05,0.10,0.10,0.10),
legend.position = c("left","bottom"),
frame = F) +
tm_scale_bar(color.dark = "gray70",
position = c("right","bottom"))
# arrange maps
tmap_arrange(map_kreise_count, map_kreise_area, map_kreise_pop, map_kreise_cars,
ncol = 2, nrow = 2)
Die Marken der einzelnen Tankstellen sind im Datensatz von Tankerkönig nicht immer eindeutig benannt. Die Zuordnung der Tankstellen zu den jeweiligen Marken erfolgte daher mittels automatischer Textmustererkennung. Daher sind gewisse Fehlzuordnungen nicht auszuschließen. Ein stichprobenartiger Vergleich zeigt jedoch, dass die automatische Zuordnung nahe an der Realität liegt.
# query brands in dataset
query_brands <- paste0("
SELECT
distinct(brand_cat),
count(*)
FROM
stations_test
WHERE
uuid = ANY('{",act_stations_string ,"}')
GROUP BY
brand_cat;")
# tidy data, assign brand colors, calc shares
brands <- dbGetQuery(dbcon, query_brands) %>%
as_tibble() %>%
arrange(brand_cat) %>%
add_column(color = c("yellow","blue","blueviolet","orange",
"lightblue","green","gold","deepskyblue",
"darkblue","lightgreen","red","grey",
"hotpink","white")) %>%
arrange(desc(count)) %>%
mutate(brand_cat = factor(brand_cat, brand_cat)) %>%
mutate(ypos = sum(count) - cumsum(count) + count - 0.5*count) %>%
mutate(share = count/sum(count)*100)
# plot brand shares
ggplot(data = brands, mapping = aes(x = "", y = count, fill = brand_cat)) +
geom_bar(stat = "identity",
color = "black",
show.legend = T,
width = 1) +
labs(x = NULL, y = NULL) +
scale_fill_manual(values = brands$color, name = "",
labels = paste(brands$brand_cat, " - ", brands$count, " (", round(brands$share,0), "%)", sep = ""),
guide = guide_legend(ncol = 1)) +
coord_polar(theta = "y", start = 0) +
geom_text(aes(y = ypos, label = brand_cat, x = 1.75),
check_overlap = T) +
theme_base() +
theme(panel.grid.major.x = element_blank(),
axis.text = element_blank(),
legend.position = "right")