day2 intermediate code

This commit is contained in:
Philip 2025-07-16 10:08:11 +02:00
parent 23f03e90d1
commit 7d515e7032
3 changed files with 155 additions and 9 deletions

View File

@ -1,11 +1,99 @@
import pandas from typing import Optional
import pandas as pd import pandas as pd
from numpy.f2py.crackfortran import true_intent_list
world_cities = pd.read_excel("../data/worldcities.xlsx")
print(world_cities)
print(world_cities.columns)
print(f"{world_cities['city']}")
def get_population_of_country(df, country):
"""
Get the summed population of all cities in a given country.
:param df: Dataframe, must contain country and population as columns.
:param country: string, name of the country, case-senstive
:return: float: Final Population
:raises: KeyError if column is missing
"""
cities = df[df["country"] == country]
return cities["population"].sum()
def get_cities_beyond_latitude(df, lat, north=True):
"""
Get all rows where latitude is either north or south of given latitude.
:param df:
:param lat: Breitengrad
:param north: north is >= and !north <
:return: Dataframe
"""
if north:
return df[df['lat'] >= lat]
return df[df['lat'] < lat]
def get_larger_cities_north_of_city_conditions(df, city: str, **cmp_cols):
mask = df["city"] == city
for col_name, col_value in cmp_cols.items():
mask &= df[col_name] == col_value
row = df[mask]
if len(row) > 1:
raise ValueError(f"Search not unique. found: {row}")
# Achtung potentiel mehr als eine Zeile
latitude = row['lat'].iloc[0]
population = row['population'].iloc[0]
cities_north = get_cities_beyond_latitude(df, latitude, north=True)
cities_north = cities_north[cities_north['population'] > population]
return cities_north
def get_larger_cities_north_of_city(df, city: str, country: Optional[str]=None):
if country:
return get_larger_cities_north_of_city_conditions(
df, city, country=country
)
return get_larger_cities_north_of_city_conditions(
df, city
)
if __name__ == "__main__":
print("Country Population")
world_cities = pd.read_excel("../data/worldcities.xlsx")
print(world_cities.columns)
german_population = get_population_of_country(world_cities, "Germany")
print(f"Population of Germany: {german_population}")
narnia_population = get_population_of_country(world_cities, "Narnia")
print(f"Population of Narnia: {narnia_population}")
print("-"*100)
print("North-South Population")
# nord/süd
north = get_cities_beyond_latitude(world_cities, 0, north=True)
print(north["population"].sum())
south = get_cities_beyond_latitude(world_cities, 0, north=False)
print(south["population"].sum())
print("-" * 100)
print("Cities north of")
cities_north_of_berlin = get_larger_cities_north_of_city(world_cities,
"Berlin",
"Germany")
print(cities_north_of_berlin)
cities = ["New York", "Istanbul"]
for city in cities:
northern_cities = get_larger_cities_north_of_city(world_cities,
city)
print(f"Cities north of: {city}")
print(northern_cities)
try:
northern_cities = get_larger_cities_north_of_city(world_cities,
"Rome")
print("Error should have happened")
except ValueError:
print("Expected ValueError: Rome multiple times in world_cities")
print(world_cities[world_cities["city"] == "Rome"])
northern_cities = get_larger_cities_north_of_city_conditions(
world_cities,
"Rome",
capital="primary",
)
print(northern_cities)
# conda install openpyxl # conda install openpyxl
# 1) Wie viele Einwohner haben alle Deutschen Städte? # 1) Wie viele Einwohner haben alle Deutschen Städte?
# XYZ # XYZ
@ -20,6 +108,3 @@ print(f"{world_cities['city']}")
# funktion # funktion
# -> raise # -> raise
# -> assert # -> assert
val = world_cities[(world_cities["city"] == "Berlin") & (world_cities["country"] == "Germany")]["population"].iloc[0]
print(type(val))

45
src/T09_GroupBy.py Normal file
View File

@ -0,0 +1,45 @@
import pandas as pd
beverages = pd.read_csv("../data/beverages.csv")#
print(beverages)
# 1) Funktion
groups = beverages.groupby("Name").max()
print(groups)
groups = beverages.groupby("Name") # kategorisches
print(groups)
print(groups.max())
print(groups.min())
print()
for name, info in groups:
# name ist die Kategorie nach der gruppiert wurde
# info sind alle Zeilen die zu dieser Kategorie gehören
print(name)
print(info)
print("-"*100)
print()
print()
# print(help(pd.read_csv))
donations_df = pd.read_csv("../data/donations.csv")
print(donations_df)
subset = donations_df[["city", "job", "income", "donations"]]
grouped_donations = subset.groupby(["job", "city"]).mean().round()
print(grouped_donations)
print("-"*100)
print(grouped_donations.loc["Student"].loc[["Hamburg", "Köln"]])
# zeile students ist wieder ein dataframe
students = grouped_donations.loc["Student"]
print(students.loc["Hamburg"])
print("-"*100)
# doppelte indizes
info = grouped_donations.unstack()
income = info["income"]
don = info["donations"]
print(income)
print(don)
print()

16
src/T10_ex_Energy.py Normal file
View File

@ -0,0 +1,16 @@
import pandas as pd
energy_df = pd.read_csv("../data/germany_energy_mix_2019_2024.csv")
print(energy_df)
print(energy_df.columns)
# Green, Non-Green
# 1) Wiewiel erneuerbare bzw nicht-erneuerbare energie wurde insgesamt produziert
# 2) Pro Jahr (unstack)
# 3) jährlich nach Energy_Source angeben
# 4) welche energiequelle hat den größten/kleinsten wachstum (idxmax)
# 5) Prozentual
# Monatlichen Verlauf (Jan 2019 und Jan 2020 sind verschiedene)
# In welchen monaten wurde mehr grüner als nicht-grüner strom produziert