From 7d515e703208b17062e33b4453f1edc631c20cb0 Mon Sep 17 00:00:00 2001 From: Philip Date: Wed, 16 Jul 2025 10:08:11 +0200 Subject: [PATCH] day2 intermediate code --- src/T08_WorldCities.py | 103 +++++++++++++++++++++++++++++++++++++---- src/T09_GroupBy.py | 45 ++++++++++++++++++ src/T10_ex_Energy.py | 16 +++++++ 3 files changed, 155 insertions(+), 9 deletions(-) create mode 100644 src/T09_GroupBy.py create mode 100644 src/T10_ex_Energy.py diff --git a/src/T08_WorldCities.py b/src/T08_WorldCities.py index ac024bb..70a23c8 100644 --- a/src/T08_WorldCities.py +++ b/src/T08_WorldCities.py @@ -1,11 +1,99 @@ -import pandas +from typing import Optional + import pandas as pd +from numpy.f2py.crackfortran import true_intent_list -world_cities = pd.read_excel("../data/worldcities.xlsx") -print(world_cities) -print(world_cities.columns) -print(f"{world_cities['city']}") +def get_population_of_country(df, country): + """ + Get the summed population of all cities in a given country. + :param df: Dataframe, must contain country and population as columns. + :param country: string, name of the country, case-senstive + :return: float: Final Population + :raises: KeyError if column is missing + """ + cities = df[df["country"] == country] + return cities["population"].sum() + +def get_cities_beyond_latitude(df, lat, north=True): + """ + Get all rows where latitude is either north or south of given latitude. + :param df: + :param lat: Breitengrad + :param north: north is >= and !north < + :return: Dataframe + """ + if north: + return df[df['lat'] >= lat] + return df[df['lat'] < lat] + + +def get_larger_cities_north_of_city_conditions(df, city: str, **cmp_cols): + mask = df["city"] == city + for col_name, col_value in cmp_cols.items(): + mask &= df[col_name] == col_value + row = df[mask] + if len(row) > 1: + raise ValueError(f"Search not unique. found: {row}") + # Achtung potentiel mehr als eine Zeile + latitude = row['lat'].iloc[0] + population = row['population'].iloc[0] + cities_north = get_cities_beyond_latitude(df, latitude, north=True) + cities_north = cities_north[cities_north['population'] > population] + return cities_north + +def get_larger_cities_north_of_city(df, city: str, country: Optional[str]=None): + if country: + return get_larger_cities_north_of_city_conditions( + df, city, country=country + ) + return get_larger_cities_north_of_city_conditions( + df, city + ) + + +if __name__ == "__main__": + print("Country Population") + world_cities = pd.read_excel("../data/worldcities.xlsx") + print(world_cities.columns) + german_population = get_population_of_country(world_cities, "Germany") + print(f"Population of Germany: {german_population}") + narnia_population = get_population_of_country(world_cities, "Narnia") + print(f"Population of Narnia: {narnia_population}") + + print("-"*100) + print("North-South Population") + # nord/süd + north = get_cities_beyond_latitude(world_cities, 0, north=True) + print(north["population"].sum()) + south = get_cities_beyond_latitude(world_cities, 0, north=False) + print(south["population"].sum()) + print("-" * 100) + print("Cities north of") + cities_north_of_berlin = get_larger_cities_north_of_city(world_cities, + "Berlin", + "Germany") + print(cities_north_of_berlin) + cities = ["New York", "Istanbul"] + for city in cities: + northern_cities = get_larger_cities_north_of_city(world_cities, + city) + print(f"Cities north of: {city}") + print(northern_cities) + try: + northern_cities = get_larger_cities_north_of_city(world_cities, + "Rome") + print("Error should have happened") + except ValueError: + print("Expected ValueError: Rome multiple times in world_cities") + + print(world_cities[world_cities["city"] == "Rome"]) + northern_cities = get_larger_cities_north_of_city_conditions( + world_cities, + "Rome", + capital="primary", + ) + print(northern_cities) # conda install openpyxl # 1) Wie viele Einwohner haben alle Deutschen Städte? # XYZ @@ -19,7 +107,4 @@ print(f"{world_cities['city']}") # funktion # -> raise -# -> assert - -val = world_cities[(world_cities["city"] == "Berlin") & (world_cities["country"] == "Germany")]["population"].iloc[0] -print(type(val)) \ No newline at end of file +# -> assert \ No newline at end of file diff --git a/src/T09_GroupBy.py b/src/T09_GroupBy.py new file mode 100644 index 0000000..51f8972 --- /dev/null +++ b/src/T09_GroupBy.py @@ -0,0 +1,45 @@ +import pandas as pd + +beverages = pd.read_csv("../data/beverages.csv")# +print(beverages) + + +# 1) Funktion +groups = beverages.groupby("Name").max() +print(groups) + +groups = beverages.groupby("Name") # kategorisches +print(groups) + +print(groups.max()) +print(groups.min()) +print() +for name, info in groups: + # name ist die Kategorie nach der gruppiert wurde + # info sind alle Zeilen die zu dieser Kategorie gehören + print(name) + print(info) + print("-"*100) + print() + +print() +# print(help(pd.read_csv)) +donations_df = pd.read_csv("../data/donations.csv") +print(donations_df) + +subset = donations_df[["city", "job", "income", "donations"]] +grouped_donations = subset.groupby(["job", "city"]).mean().round() +print(grouped_donations) +print("-"*100) +print(grouped_donations.loc["Student"].loc[["Hamburg", "Köln"]]) +# zeile students ist wieder ein dataframe +students = grouped_donations.loc["Student"] +print(students.loc["Hamburg"]) +print("-"*100) +# doppelte indizes +info = grouped_donations.unstack() +income = info["income"] +don = info["donations"] +print(income) +print(don) +print() diff --git a/src/T10_ex_Energy.py b/src/T10_ex_Energy.py new file mode 100644 index 0000000..b92ab9f --- /dev/null +++ b/src/T10_ex_Energy.py @@ -0,0 +1,16 @@ +import pandas as pd + +energy_df = pd.read_csv("../data/germany_energy_mix_2019_2024.csv") +print(energy_df) +print(energy_df.columns) + +# Green, Non-Green + +# 1) Wiewiel erneuerbare bzw nicht-erneuerbare energie wurde insgesamt produziert +# 2) Pro Jahr (unstack) + +# 3) jährlich nach Energy_Source angeben +# 4) welche energiequelle hat den größten/kleinsten wachstum (idxmax) +# 5) Prozentual +# Monatlichen Verlauf (Jan 2019 und Jan 2020 sind verschiedene) +# In welchen monaten wurde mehr grüner als nicht-grüner strom produziert \ No newline at end of file