day2 intermediate code
This commit is contained in:
parent
23f03e90d1
commit
7d515e7032
@ -1,11 +1,99 @@
|
||||
import pandas
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
from numpy.f2py.crackfortran import true_intent_list
|
||||
|
||||
|
||||
def get_population_of_country(df, country):
|
||||
"""
|
||||
Get the summed population of all cities in a given country.
|
||||
:param df: Dataframe, must contain country and population as columns.
|
||||
:param country: string, name of the country, case-senstive
|
||||
:return: float: Final Population
|
||||
:raises: KeyError if column is missing
|
||||
"""
|
||||
cities = df[df["country"] == country]
|
||||
return cities["population"].sum()
|
||||
|
||||
def get_cities_beyond_latitude(df, lat, north=True):
|
||||
"""
|
||||
Get all rows where latitude is either north or south of given latitude.
|
||||
:param df:
|
||||
:param lat: Breitengrad
|
||||
:param north: north is >= and !north <
|
||||
:return: Dataframe
|
||||
"""
|
||||
if north:
|
||||
return df[df['lat'] >= lat]
|
||||
return df[df['lat'] < lat]
|
||||
|
||||
|
||||
def get_larger_cities_north_of_city_conditions(df, city: str, **cmp_cols):
|
||||
mask = df["city"] == city
|
||||
for col_name, col_value in cmp_cols.items():
|
||||
mask &= df[col_name] == col_value
|
||||
row = df[mask]
|
||||
if len(row) > 1:
|
||||
raise ValueError(f"Search not unique. found: {row}")
|
||||
# Achtung potentiel mehr als eine Zeile
|
||||
latitude = row['lat'].iloc[0]
|
||||
population = row['population'].iloc[0]
|
||||
cities_north = get_cities_beyond_latitude(df, latitude, north=True)
|
||||
cities_north = cities_north[cities_north['population'] > population]
|
||||
return cities_north
|
||||
|
||||
def get_larger_cities_north_of_city(df, city: str, country: Optional[str]=None):
|
||||
if country:
|
||||
return get_larger_cities_north_of_city_conditions(
|
||||
df, city, country=country
|
||||
)
|
||||
return get_larger_cities_north_of_city_conditions(
|
||||
df, city
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Country Population")
|
||||
world_cities = pd.read_excel("../data/worldcities.xlsx")
|
||||
print(world_cities)
|
||||
print(world_cities.columns)
|
||||
print(f"{world_cities['city']}")
|
||||
german_population = get_population_of_country(world_cities, "Germany")
|
||||
print(f"Population of Germany: {german_population}")
|
||||
narnia_population = get_population_of_country(world_cities, "Narnia")
|
||||
print(f"Population of Narnia: {narnia_population}")
|
||||
|
||||
print("-"*100)
|
||||
print("North-South Population")
|
||||
# nord/süd
|
||||
north = get_cities_beyond_latitude(world_cities, 0, north=True)
|
||||
print(north["population"].sum())
|
||||
south = get_cities_beyond_latitude(world_cities, 0, north=False)
|
||||
print(south["population"].sum())
|
||||
print("-" * 100)
|
||||
print("Cities north of")
|
||||
cities_north_of_berlin = get_larger_cities_north_of_city(world_cities,
|
||||
"Berlin",
|
||||
"Germany")
|
||||
print(cities_north_of_berlin)
|
||||
cities = ["New York", "Istanbul"]
|
||||
for city in cities:
|
||||
northern_cities = get_larger_cities_north_of_city(world_cities,
|
||||
city)
|
||||
print(f"Cities north of: {city}")
|
||||
print(northern_cities)
|
||||
try:
|
||||
northern_cities = get_larger_cities_north_of_city(world_cities,
|
||||
"Rome")
|
||||
print("Error should have happened")
|
||||
except ValueError:
|
||||
print("Expected ValueError: Rome multiple times in world_cities")
|
||||
|
||||
print(world_cities[world_cities["city"] == "Rome"])
|
||||
northern_cities = get_larger_cities_north_of_city_conditions(
|
||||
world_cities,
|
||||
"Rome",
|
||||
capital="primary",
|
||||
)
|
||||
print(northern_cities)
|
||||
# conda install openpyxl
|
||||
# 1) Wie viele Einwohner haben alle Deutschen Städte?
|
||||
# XYZ
|
||||
@ -20,6 +108,3 @@ print(f"{world_cities['city']}")
|
||||
# funktion
|
||||
# -> raise
|
||||
# -> assert
|
||||
|
||||
val = world_cities[(world_cities["city"] == "Berlin") & (world_cities["country"] == "Germany")]["population"].iloc[0]
|
||||
print(type(val))
|
||||
45
src/T09_GroupBy.py
Normal file
45
src/T09_GroupBy.py
Normal file
@ -0,0 +1,45 @@
|
||||
import pandas as pd
|
||||
|
||||
beverages = pd.read_csv("../data/beverages.csv")#
|
||||
print(beverages)
|
||||
|
||||
|
||||
# 1) Funktion
|
||||
groups = beverages.groupby("Name").max()
|
||||
print(groups)
|
||||
|
||||
groups = beverages.groupby("Name") # kategorisches
|
||||
print(groups)
|
||||
|
||||
print(groups.max())
|
||||
print(groups.min())
|
||||
print()
|
||||
for name, info in groups:
|
||||
# name ist die Kategorie nach der gruppiert wurde
|
||||
# info sind alle Zeilen die zu dieser Kategorie gehören
|
||||
print(name)
|
||||
print(info)
|
||||
print("-"*100)
|
||||
print()
|
||||
|
||||
print()
|
||||
# print(help(pd.read_csv))
|
||||
donations_df = pd.read_csv("../data/donations.csv")
|
||||
print(donations_df)
|
||||
|
||||
subset = donations_df[["city", "job", "income", "donations"]]
|
||||
grouped_donations = subset.groupby(["job", "city"]).mean().round()
|
||||
print(grouped_donations)
|
||||
print("-"*100)
|
||||
print(grouped_donations.loc["Student"].loc[["Hamburg", "Köln"]])
|
||||
# zeile students ist wieder ein dataframe
|
||||
students = grouped_donations.loc["Student"]
|
||||
print(students.loc["Hamburg"])
|
||||
print("-"*100)
|
||||
# doppelte indizes
|
||||
info = grouped_donations.unstack()
|
||||
income = info["income"]
|
||||
don = info["donations"]
|
||||
print(income)
|
||||
print(don)
|
||||
print()
|
||||
16
src/T10_ex_Energy.py
Normal file
16
src/T10_ex_Energy.py
Normal file
@ -0,0 +1,16 @@
|
||||
import pandas as pd
|
||||
|
||||
energy_df = pd.read_csv("../data/germany_energy_mix_2019_2024.csv")
|
||||
print(energy_df)
|
||||
print(energy_df.columns)
|
||||
|
||||
# Green, Non-Green
|
||||
|
||||
# 1) Wiewiel erneuerbare bzw nicht-erneuerbare energie wurde insgesamt produziert
|
||||
# 2) Pro Jahr (unstack)
|
||||
|
||||
# 3) jährlich nach Energy_Source angeben
|
||||
# 4) welche energiequelle hat den größten/kleinsten wachstum (idxmax)
|
||||
# 5) Prozentual
|
||||
# Monatlichen Verlauf (Jan 2019 und Jan 2020 sind verschiedene)
|
||||
# In welchen monaten wurde mehr grüner als nicht-grüner strom produziert
|
||||
Loading…
Reference in New Issue
Block a user