day2 intermediate code2

This commit is contained in:
Philip 2025-07-16 14:03:00 +02:00
parent 7d515e7032
commit 331e55f987
4 changed files with 178 additions and 2 deletions

View File

@ -35,6 +35,11 @@ print(grouped_donations.loc["Student"].loc[["Hamburg", "Köln"]])
# zeile students ist wieder ein dataframe
students = grouped_donations.loc["Student"]
print(students.loc["Hamburg"])
print(grouped_donations)
print(grouped_donations.idxmin())
print(grouped_donations.loc[grouped_donations.idxmin()])
print(grouped_donations.loc["Student"].loc["Hamburg"])
print("-"*100)
# doppelte indizes
info = grouped_donations.unstack()

View File

@ -7,10 +7,60 @@ print(energy_df.columns)
# Green, Non-Green
# 1) Wiewiel erneuerbare bzw nicht-erneuerbare energie wurde insgesamt produziert
total_energy_production = energy_df[["Generation_TWh", "Energy_Type"]].groupby("Energy_Type").sum()
print(total_energy_production)
# 2) Pro Jahr (unstack)
yearly_production = energy_df[["Year", "Generation_TWh", "Energy_Type"]].groupby(["Year", "Energy_Type"]).sum()
yearly_production = yearly_production.unstack()
print(yearly_production)
# 3) jährlich nach Energy_Source angeben
yearly_production = energy_df[["Year", "Generation_TWh", "Energy_Source"]].groupby(["Year", "Energy_Source"]).sum()
yearly_production = yearly_production.unstack()
print(yearly_production)
# 4) welche energiequelle hat den größten/kleinsten wachstum (idxmax)
yearly_production = yearly_production["Generation_TWh"]
print(yearly_production)
# a = yearly_production["Biomass"]
# a.sort_values(inplace=True) Verboten, da a eine View auf yearly_production ist!
print("-"*100)
diff = yearly_production.loc[2024] - yearly_production.loc[2019]
sorted_diffs = diff.sort_values()
print(diff)
print(sorted_diffs)
# gößter wachtumg
print(f"Größter Wachstum: {diff.idxmax()}, {diff.max()}")
print(f"Größter Wachstum: {sorted_diffs.index[-1]}, {sorted_diffs.iloc[-1]}")
print(f"Kleinster Wachstum: {diff.idxmin()}, {diff.min()}")
print(f"Kleinster Wachstum: {sorted_diffs.index[0]}, {sorted_diffs.iloc[0]}")
# 5) Prozentual
percentages = yearly_production.divide(yearly_production.sum(axis=1), axis=0)
print(percentages)
print(percentages.mean(axis=0))
perc_diff = percentages.loc[2024] - percentages.loc[2019]
perc_diff.sort_values(inplace=True)
print(perc_diff)
# Monatlichen Verlauf (Jan 2019 und Jan 2020 sind verschiedene)
verlauf = energy_df[["Year", "Month", "Energy_Type", "Generation_TWh"]].groupby(["Year", "Month", "Energy_Type"]).sum().unstack()
print(verlauf)
# In welchen monaten wurde mehr grüner als nicht-grüner strom produziert
print(verlauf[verlauf["Generation_TWh", "Green"] > verlauf["Generation_TWh", "Non-Green"]])
verlauf = verlauf["Generation_TWh"]
print(verlauf["Green"] > verlauf["Non-Green"])
# Biomass, CoalHard,
#2019-1 100, 2321,
#2019-2 100, 2321,
# Green, NonGreen,
#2019-1 100, 2321,
#2019-2 110, 2121,
# ....
#2024-2 2313, 111,

76
src/T11_Pivotieren.py Normal file
View File

@ -0,0 +1,76 @@
import pandas as pd
from pandas.core.dtypes.missing import construct_1d_array_from_inferred_fill_value
data = {
'Product': ['Fancy Chair', 'Fancy Chair', 'Luxury Sofa', 'Designer Table', 'Luxury Sofa'],
'Color': ['Blue', 'Green', 'Blue', 'Green', 'Red'],
'Customer Price': [2345.89, 2390.50, 1820.00, 3100.00, 2750.00],
'Non-Customer Price': [2445.89, 2495.50, 1980.00, 3400.00, 2850.00]
}
df = pd.DataFrame(data)
print(df)
# pivot erlaubt keine Duplikate!
pivoted_df = df.pivot(index="Product",
columns="Color",
values="Non-Customer Price")
print(pivoted_df)
pivoted_df = df.pivot(index="Product",
columns="Color",
values=["Non-Customer Price", "Customer Price"])
print(pivoted_df)
beverages = pd.read_csv("../data/beverages.csv")
beverages["Day"] = (["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"] * 35)[:103]
print(beverages)
# eine aggfunc für duplikate mitgeben
# pivot_table ~= gorupby().aggfunc().unstack()
coffe_pivot = beverages.pivot_table(
index="Name",
columns="Day",
values="Coffee",
aggfunc="mean",
fill_value=0
).round(1)
print(coffe_pivot)
# als groupby
coffees = beverages[["Name", "Day", "Coffee"]].groupby(["Name", "Day"]).mean().unstack().round(1)
print(coffees)
coffees[coffees.isna()] = 0.0
print(coffees)
# 1. Energiedaten
# ["Year_Quarter"] = ["Year"].astype(str) + ["Quarter"] # 2019 + "Q1"
# Pro Type für jedes Quartal die Produktion gruppieren
# Pivot -> Zeilen: (Jahr, Q1) Spalten: Biomass, WInd
# df.index <- name der index-spalte
print("\n"*3)
energy_df = pd.read_csv("../data/germany_energy_mix_2019_2024.csv")
# 1) Neue spalte
energy_df["Year Quarter"] = energy_df["Year"].astype(str) + " " + energy_df['Quarter']
# drop: axis=0 die zeilen namens [Year, quarter] gelöscht
# axis=1 die spalten namens [Year, quarter] gelöscht
energy_df.drop(["Year", "Quarter"], axis=1, inplace=True)
print(energy_df)
# 1) Mit 'Year Quarter' gruppieren
quarterly_data = energy_df[["Year Quarter", "Energy_Source", "Generation_TWh"]].groupby(
["Year Quarter", "Energy_Source"]).sum().unstack()
print(quarterly_data)
# 2) Pivot_table
quarterly_data = energy_df.pivot_table(
index="Year Quarter",
columns="Energy_Source",
values="Generation_TWh",
aggfunc="sum",
fill_value=0, # für jeden energietyp in jedem quartal ein wert existiert
)
print(quarterly_data)

45
src/T12_Datetimes.py Normal file
View File

@ -0,0 +1,45 @@
import pandas as pd
# jahr-monat-tag
# monat/tag/jahr (us-schreibweise)
beverages_by_date = pd.read_csv("../data/beverages_by_date.csv",
index_col=0)
# zum datum konvertiert
beverages_by_date.index = pd.to_datetime(
beverages_by_date.index,
format="%Y-%m-%d" # normalerweise nicht
)
print(beverages_by_date)
print(beverages_by_date.index.dtype)
print()
sampler = beverages_by_date.resample("2W")
for el in sampler:
print(el)
print(sampler)
print(beverages_by_date.loc["2024-02-8":"2024-02-14"])
by_weekly = beverages_by_date.resample("2W").agg({
'coffee': ["sum", "mean", "std", "count"]
})
print(by_weekly)
# bfill und ffill
# interploate = linear
#
daily = beverages_by_date.resample("8h").bfill()
print(daily.loc["2024-02-8":"2024-02-14"])
# übung mit zeiten
solar_df = pd.read_csv("../data/Balkonkraftwerk.csv", index_col=0)
solar_df.index = pd.to_datetime(solar_df.index)
print(solar_df)
print(solar_df.columns)
# 1) Wie sieht es im durchschnitt jeden Tag aus (D)
# 2) An welchen Tagen war die effizientz > 35%
# 3) Stündliche Werte interpolieren (h) (1h), (3h)
# - Komisch