day2 intermediate code2
This commit is contained in:
parent
7d515e7032
commit
331e55f987
@ -35,6 +35,11 @@ print(grouped_donations.loc["Student"].loc[["Hamburg", "Köln"]])
|
|||||||
# zeile students ist wieder ein dataframe
|
# zeile students ist wieder ein dataframe
|
||||||
students = grouped_donations.loc["Student"]
|
students = grouped_donations.loc["Student"]
|
||||||
print(students.loc["Hamburg"])
|
print(students.loc["Hamburg"])
|
||||||
|
print(grouped_donations)
|
||||||
|
print(grouped_donations.idxmin())
|
||||||
|
print(grouped_donations.loc[grouped_donations.idxmin()])
|
||||||
|
print(grouped_donations.loc["Student"].loc["Hamburg"])
|
||||||
|
|
||||||
print("-"*100)
|
print("-"*100)
|
||||||
# doppelte indizes
|
# doppelte indizes
|
||||||
info = grouped_donations.unstack()
|
info = grouped_donations.unstack()
|
||||||
|
|||||||
@ -7,10 +7,60 @@ print(energy_df.columns)
|
|||||||
# Green, Non-Green
|
# Green, Non-Green
|
||||||
|
|
||||||
# 1) Wiewiel erneuerbare bzw nicht-erneuerbare energie wurde insgesamt produziert
|
# 1) Wiewiel erneuerbare bzw nicht-erneuerbare energie wurde insgesamt produziert
|
||||||
|
total_energy_production = energy_df[["Generation_TWh", "Energy_Type"]].groupby("Energy_Type").sum()
|
||||||
|
print(total_energy_production)
|
||||||
|
|
||||||
# 2) Pro Jahr (unstack)
|
# 2) Pro Jahr (unstack)
|
||||||
|
yearly_production = energy_df[["Year", "Generation_TWh", "Energy_Type"]].groupby(["Year", "Energy_Type"]).sum()
|
||||||
|
yearly_production = yearly_production.unstack()
|
||||||
|
print(yearly_production)
|
||||||
|
|
||||||
|
|
||||||
# 3) jährlich nach Energy_Source angeben
|
# 3) jährlich nach Energy_Source angeben
|
||||||
|
yearly_production = energy_df[["Year", "Generation_TWh", "Energy_Source"]].groupby(["Year", "Energy_Source"]).sum()
|
||||||
|
yearly_production = yearly_production.unstack()
|
||||||
|
print(yearly_production)
|
||||||
|
|
||||||
# 4) welche energiequelle hat den größten/kleinsten wachstum (idxmax)
|
# 4) welche energiequelle hat den größten/kleinsten wachstum (idxmax)
|
||||||
|
yearly_production = yearly_production["Generation_TWh"]
|
||||||
|
print(yearly_production)
|
||||||
|
# a = yearly_production["Biomass"]
|
||||||
|
# a.sort_values(inplace=True) Verboten, da a eine View auf yearly_production ist!
|
||||||
|
print("-"*100)
|
||||||
|
diff = yearly_production.loc[2024] - yearly_production.loc[2019]
|
||||||
|
sorted_diffs = diff.sort_values()
|
||||||
|
print(diff)
|
||||||
|
print(sorted_diffs)
|
||||||
|
|
||||||
|
# gößter wachtumg
|
||||||
|
print(f"Größter Wachstum: {diff.idxmax()}, {diff.max()}")
|
||||||
|
print(f"Größter Wachstum: {sorted_diffs.index[-1]}, {sorted_diffs.iloc[-1]}")
|
||||||
|
print(f"Kleinster Wachstum: {diff.idxmin()}, {diff.min()}")
|
||||||
|
print(f"Kleinster Wachstum: {sorted_diffs.index[0]}, {sorted_diffs.iloc[0]}")
|
||||||
|
|
||||||
# 5) Prozentual
|
# 5) Prozentual
|
||||||
|
percentages = yearly_production.divide(yearly_production.sum(axis=1), axis=0)
|
||||||
|
print(percentages)
|
||||||
|
print(percentages.mean(axis=0))
|
||||||
|
perc_diff = percentages.loc[2024] - percentages.loc[2019]
|
||||||
|
perc_diff.sort_values(inplace=True)
|
||||||
|
print(perc_diff)
|
||||||
# Monatlichen Verlauf (Jan 2019 und Jan 2020 sind verschiedene)
|
# Monatlichen Verlauf (Jan 2019 und Jan 2020 sind verschiedene)
|
||||||
|
verlauf = energy_df[["Year", "Month", "Energy_Type", "Generation_TWh"]].groupby(["Year", "Month", "Energy_Type"]).sum().unstack()
|
||||||
|
print(verlauf)
|
||||||
|
|
||||||
# In welchen monaten wurde mehr grüner als nicht-grüner strom produziert
|
# In welchen monaten wurde mehr grüner als nicht-grüner strom produziert
|
||||||
|
print(verlauf[verlauf["Generation_TWh", "Green"] > verlauf["Generation_TWh", "Non-Green"]])
|
||||||
|
|
||||||
|
verlauf = verlauf["Generation_TWh"]
|
||||||
|
print(verlauf["Green"] > verlauf["Non-Green"])
|
||||||
|
|
||||||
|
# Biomass, CoalHard,
|
||||||
|
#2019-1 100, 2321,
|
||||||
|
#2019-2 100, 2321,
|
||||||
|
|
||||||
|
# Green, NonGreen,
|
||||||
|
#2019-1 100, 2321,
|
||||||
|
#2019-2 110, 2121,
|
||||||
|
# ....
|
||||||
|
#2024-2 2313, 111,
|
||||||
|
|||||||
76
src/T11_Pivotieren.py
Normal file
76
src/T11_Pivotieren.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from pandas.core.dtypes.missing import construct_1d_array_from_inferred_fill_value
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'Product': ['Fancy Chair', 'Fancy Chair', 'Luxury Sofa', 'Designer Table', 'Luxury Sofa'],
|
||||||
|
'Color': ['Blue', 'Green', 'Blue', 'Green', 'Red'],
|
||||||
|
'Customer Price': [2345.89, 2390.50, 1820.00, 3100.00, 2750.00],
|
||||||
|
'Non-Customer Price': [2445.89, 2495.50, 1980.00, 3400.00, 2850.00]
|
||||||
|
}
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
# pivot erlaubt keine Duplikate!
|
||||||
|
pivoted_df = df.pivot(index="Product",
|
||||||
|
columns="Color",
|
||||||
|
values="Non-Customer Price")
|
||||||
|
|
||||||
|
print(pivoted_df)
|
||||||
|
|
||||||
|
pivoted_df = df.pivot(index="Product",
|
||||||
|
columns="Color",
|
||||||
|
values=["Non-Customer Price", "Customer Price"])
|
||||||
|
|
||||||
|
print(pivoted_df)
|
||||||
|
|
||||||
|
beverages = pd.read_csv("../data/beverages.csv")
|
||||||
|
beverages["Day"] = (["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"] * 35)[:103]
|
||||||
|
print(beverages)
|
||||||
|
|
||||||
|
# eine aggfunc für duplikate mitgeben
|
||||||
|
# pivot_table ~= gorupby().aggfunc().unstack()
|
||||||
|
coffe_pivot = beverages.pivot_table(
|
||||||
|
index="Name",
|
||||||
|
columns="Day",
|
||||||
|
values="Coffee",
|
||||||
|
aggfunc="mean",
|
||||||
|
fill_value=0
|
||||||
|
).round(1)
|
||||||
|
print(coffe_pivot)
|
||||||
|
# als groupby
|
||||||
|
coffees = beverages[["Name", "Day", "Coffee"]].groupby(["Name", "Day"]).mean().unstack().round(1)
|
||||||
|
print(coffees)
|
||||||
|
coffees[coffees.isna()] = 0.0
|
||||||
|
print(coffees)
|
||||||
|
# 1. Energiedaten
|
||||||
|
# ["Year_Quarter"] = ["Year"].astype(str) + ["Quarter"] # 2019 + "Q1"
|
||||||
|
# Pro Type für jedes Quartal die Produktion gruppieren
|
||||||
|
# Pivot -> Zeilen: (Jahr, Q1) Spalten: Biomass, WInd
|
||||||
|
|
||||||
|
# df.index <- name der index-spalte
|
||||||
|
print("\n"*3)
|
||||||
|
energy_df = pd.read_csv("../data/germany_energy_mix_2019_2024.csv")
|
||||||
|
# 1) Neue spalte
|
||||||
|
energy_df["Year Quarter"] = energy_df["Year"].astype(str) + " " + energy_df['Quarter']
|
||||||
|
# drop: axis=0 die zeilen namens [Year, quarter] gelöscht
|
||||||
|
# axis=1 die spalten namens [Year, quarter] gelöscht
|
||||||
|
energy_df.drop(["Year", "Quarter"], axis=1, inplace=True)
|
||||||
|
print(energy_df)
|
||||||
|
|
||||||
|
# 1) Mit 'Year Quarter' gruppieren
|
||||||
|
quarterly_data = energy_df[["Year Quarter", "Energy_Source", "Generation_TWh"]].groupby(
|
||||||
|
["Year Quarter", "Energy_Source"]).sum().unstack()
|
||||||
|
print(quarterly_data)
|
||||||
|
|
||||||
|
# 2) Pivot_table
|
||||||
|
quarterly_data = energy_df.pivot_table(
|
||||||
|
index="Year Quarter",
|
||||||
|
columns="Energy_Source",
|
||||||
|
values="Generation_TWh",
|
||||||
|
aggfunc="sum",
|
||||||
|
fill_value=0, # für jeden energietyp in jedem quartal ein wert existiert
|
||||||
|
)
|
||||||
|
print(quarterly_data)
|
||||||
|
|
||||||
|
|
||||||
45
src/T12_Datetimes.py
Normal file
45
src/T12_Datetimes.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# jahr-monat-tag
|
||||||
|
# monat/tag/jahr (us-schreibweise)
|
||||||
|
|
||||||
|
beverages_by_date = pd.read_csv("../data/beverages_by_date.csv",
|
||||||
|
index_col=0)
|
||||||
|
|
||||||
|
# zum datum konvertiert
|
||||||
|
beverages_by_date.index = pd.to_datetime(
|
||||||
|
beverages_by_date.index,
|
||||||
|
format="%Y-%m-%d" # normalerweise nicht
|
||||||
|
)
|
||||||
|
|
||||||
|
print(beverages_by_date)
|
||||||
|
print(beverages_by_date.index.dtype)
|
||||||
|
print()
|
||||||
|
sampler = beverages_by_date.resample("2W")
|
||||||
|
for el in sampler:
|
||||||
|
print(el)
|
||||||
|
print(sampler)
|
||||||
|
|
||||||
|
print(beverages_by_date.loc["2024-02-8":"2024-02-14"])
|
||||||
|
|
||||||
|
by_weekly = beverages_by_date.resample("2W").agg({
|
||||||
|
'coffee': ["sum", "mean", "std", "count"]
|
||||||
|
})
|
||||||
|
print(by_weekly)
|
||||||
|
|
||||||
|
# bfill und ffill
|
||||||
|
# interploate = linear
|
||||||
|
#
|
||||||
|
daily = beverages_by_date.resample("8h").bfill()
|
||||||
|
print(daily.loc["2024-02-8":"2024-02-14"])
|
||||||
|
|
||||||
|
# übung mit zeiten
|
||||||
|
solar_df = pd.read_csv("../data/Balkonkraftwerk.csv", index_col=0)
|
||||||
|
solar_df.index = pd.to_datetime(solar_df.index)
|
||||||
|
print(solar_df)
|
||||||
|
print(solar_df.columns)
|
||||||
|
|
||||||
|
# 1) Wie sieht es im durchschnitt jeden Tag aus (D)
|
||||||
|
# 2) An welchen Tagen war die effizientz > 35%
|
||||||
|
# 3) Stündliche Werte interpolieren (h) (1h), (3h)
|
||||||
|
# - Komisch
|
||||||
Loading…
Reference in New Issue
Block a user