Wind and water data#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
from datetime import datetime

import os
from io import StringIO
import requests

from windrose import WindroseAxes
from scipy.stats import circmean, mode

1. Download data#

1.1 Download all files#

Information about the selected station:
https://www.ndbc.noaa.gov/station_page.php?station=42003

Sample url:
https://www.ndbc.noaa.gov/download_data.php?filename=42003h1990.txt.gz&dir=data/historical/stdmet/

download= False

if download:
    # Define the station ID (example: '42001' for Gulf of Mexico buoy)
    station_id = '42003'
    start_year = 1990
    current_year= 2022
    #current_year = datetime.now().year
    
    # Create directories for raw data
    raw_data_dir = f'input/station_{station_id}'
    os.makedirs(raw_data_dir, exist_ok=True)
    
    # Loop through each year and download the data
    for year in range(start_year, current_year + 1):
        url = f'https://www.ndbc.noaa.gov/view_text_file.php?filename={station_id}h{year}.txt.gz&dir=data/historical/stdmet/'
        print(f"Downloading data for year {year}...")    
        # Read the data
        #df = pd.read_csv(url, sep='\s+', header=[0], on_bad_lines='skip', low_memory=False)
        df = pd.read_csv(url, sep=r'\s+', header=[0], on_bad_lines='skip', low_memory=False)    
        # Save each individual file
        raw_file_path = os.path.join(raw_data_dir, f"wind_data_{year}.csv")
        df.to_csv(raw_file_path, index=False)
        display(df.head(2))
else:
    print('Skipping download and reading existing files from input/station_{ID}')
Skipping download and reading existing files from input/station_{ID}

1.2 File headers#

The file headers are changing across years where names are revised or new columns are added.

# Define input and output paths
station_id = '42003'
input_dir =  f'input/station_{station_id}'
output_file = 'input/headers_summary.csv'

# Create a dataframe to store extracted headers
header_data = []

# Loop through each file in the input directory
for file in sorted(os.listdir(input_dir)):
    if file.endswith('.csv'):
        file_path = os.path.join(input_dir, file)
        
        try:
            # Read the file and extract the header rows
            with open(file_path, 'r') as f:
                header_lines = [next(f).strip() for _ in range(1)]  # First two rows
            
            # Store in the list
            header_data.append({
                'Year': file.split('_')[-1].split('.')[0],
                'Header_1': header_lines[0],
            })

            #print(f"Extracted headers for {file}:\n{header_lines}\n")

        except Exception as e:
            print(f"Failed to extract headers for {file}: {e}")

# Convert the list to a DataFrame
header_df = pd.DataFrame(header_data)

# Save the headers to a CSV file
header_df.to_csv(output_file, index=False)

print(f"\nHeaders saved to '{output_file}'")
Headers saved to 'input/headers_summary.csv'

1.3 Merge all files#

# 1990-1999: 'YY',   'MM', 'DD', 'hh',       'WD',   'WSPD', 'GST', 'WVHT', 'DPD', 'APD','MWD', 'BAR',  'ATMP', 'WTMP', 'DEWP', 'VIS'
# 2000-2004: 'YYYY', 'MM', 'DD', 'hh',       'WD',   'WSPD', 'GST', 'WVHT', 'DPD', 'APD','MWD', 'BAR',  'ATMP', 'WTMP', 'DEWP', 'VIS', 'TIDE'
# 2005-2006: 'YYYY', 'MM', 'DD', 'hh', 'mm', 'WD',   'WSPD', 'GST', 'WVHT', 'DPD','APD', 'MWD', 'BAR',  'ATMP', 'WTMP', 'DEWP', 'VIS', 'TIDE'
# 2007-Now : 'YYYY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'GST', 'WVHT', 'DPD','APD', 'MWD', 'PRES', 'ATMP', 'WTMP', 'DEWP', 'VIS', 'TIDE'
# 2007-Now: Contain commented header row

# Station and year settings
STATION_ID = '42003'
START_YEAR = 1990
END_YEAR = 2022
raw_data_dir = f'input/station_{STATION_ID}'

# Create an empty list to store processed data
all_data = []

# Loop through saved files
for year in range(START_YEAR, END_YEAR + 1):
    file_path = os.path.join(raw_data_dir, f"wind_data_{year}.csv")
    
    if not os.path.exists(file_path):
        print(f" File for year {year} not found, skipping...")
        continue
    
    try:
        #print(f"Processing data for year {year}...")

        # Read the file to capture the second row (units)
        if year == 2007:
            df = pd.read_csv(file_path, low_memory=False)
            display(df.head(2))
            
        # Read the file directly (use first row as header)
        df = pd.read_csv(file_path, skiprows=[1], low_memory=False)

        # Add missing 'minute' column if not present
        if 'mm' not in df.columns:
            df['mm'] = 0
        
        # Add missing 'TIDE' column if not present
        if 'TIDE' not in df.columns:
            df['TIDE'] = np.nan

        # Rename columns to match pd.to_datetime requirements
        df.rename(columns={
            'YY': 'year',
            '#YY': 'year',
            'YYYY': 'year',
            'MM': 'month',
            'DD': 'day',
            'hh': 'hour',
            'mm': 'minute',
            'WD': 'WDIR',   # Normalize wind direction column
            'BAR': 'PRES'   # Normalize pressure column
        }, inplace=True)
        
        # Convert 2-digit year to 4-digit format only if it's a two-digit year
        if 'year' in df.columns:
            df['year'] = df['year'].apply(lambda x: x + 1900 if x < 100 and x > 50 else (x + 2000 if x < 100 else x))

        # # Ensure that the 'year' column is numeric
        # if 'year' in df.columns:
        #     df['year'] = pd.to_numeric(df['year'], errors='coerce')  # Convert to numeric and set invalid entries to NaN
        #     df.dropna(subset=['year'], inplace=True)  # Drop invalid rows
        #     df['year'] = df['year'].astype(int)  # Convert back to integer

        # Create a single 'Timestamp' column (if time columns are available)
        if all(col in df.columns for col in ['year', 'month', 'day', 'hour', 'minute']):
            df['Timestamp'] = pd.to_datetime(
                df[['year', 'month', 'day', 'hour', 'minute']],
                errors='coerce'
            )
            #df.dropna(subset=['Timestamp'], inplace=True)
        else:
            print(f"Missing time columns for year {year}, skipping...")
            continue

        if (year == END_YEAR) | (year == START_YEAR):
            display(df.head(2))
        
        # Keep only necessary columns
        df.columns = df.columns.str.strip()
        cols_to_keep = ['Timestamp', 'WDIR', 'WSPD', 'ATMP', 'WTMP']
        existing_cols = [col for col in cols_to_keep if col in df.columns]
        df = df[existing_cols]

        # # Drop rows with missing values
        # df.dropna(inplace=True)

        # Set timestamp as the index
        df.set_index('Timestamp', inplace=True)

        # Append to combined data
        all_data.append(df)

        #print(f"Processed data for year {year}: {list(df.columns)}")

    except Exception as e:
        print(f" Failed to process data for year {year}: {e}")
        continue

# Combine all processed data
if all_data:
    final_data = pd.concat(all_data)
    final_data.sort_index(inplace=True)

    # Replace placeholder values with NaN
    print('Replacing placeholder values including 999 and 99 with NaN')
    final_data.replace([999, 999.0, 99, 99.0], np.nan, inplace=True)

    # Save to CSV
    output_file = 'output/wind_data_cleaned.csv'
    final_data.to_csv(output_file)
    print(f"\n Final combined data saved to '{output_file}'")

    # Display the final processed data preview
    print("\n Final data preview:")
    display(final_data)

    display(final_data.describe().loc[['min', 'mean', 'max'],:])

else:
    print("\n No valid data to combine!")
year month day hour WDIR WSPD GST WVHT DPD APD MWD PRES ATMP WTMP DEWP VIS minute TIDE Timestamp
0 1990 1 1 1 211 6.4 7.5 1.5 7.1 5.6 999 1017.3 25.3 26.1 999.0 99.0 0 NaN 1990-01-01 01:00:00
1 1990 1 1 2 220 5.5 6.5 1.7 6.7 5.8 999 1017.9 25.3 26.1 999.0 99.0 0 NaN 1990-01-01 02:00:00
#YY MM DD hh mm WDIR WSPD GST WVHT DPD APD MWD PRES ATMP WTMP DEWP VIS TIDE
0 #yr mo dy hr mn degT m/s m/s m sec sec degT hPa degC degC degC nmi ft
1 2007 01 01 00 00 185 5.7 6.9 1.56 6.67 5.45 143 1017.6 26.5 26.9 24.5 99.0 99.00
year month day hour minute WDIR WSPD GST WVHT DPD APD MWD PRES ATMP WTMP DEWP VIS TIDE Timestamp
0 2022 8 6 21 30 108 6.3 8.2 99.00 99.00 99.00 999 1016.1 999.0 30.1 999.0 99.0 99.0 2022-08-06 21:30:00
1 2022 8 6 21 40 103 6.4 9.0 1.01 5.56 4.67 119 1016.4 29.4 30.0 25.2 99.0 99.0 2022-08-06 21:40:00
Replacing placeholder values including 999 and 99 with NaN
 Final combined data saved to 'output/wind_data_cleaned.csv'

 Final data preview:
WDIR WSPD ATMP WTMP
Timestamp
1990-01-01 01:00:00 211.0 6.4 25.3 26.1
1990-01-01 02:00:00 220.0 5.5 25.3 26.1
1990-01-01 03:00:00 201.0 6.0 25.3 26.1
1990-01-01 04:00:00 202.0 6.3 25.2 26.1
1990-01-01 05:00:00 199.0 6.4 25.1 26.1
... ... ... ... ...
2022-09-21 13:30:00 29.0 4.2 28.4 29.4
2022-09-21 13:40:00 37.0 4.3 28.5 29.4
2022-09-21 13:50:00 53.0 3.7 28.5 29.4
2022-09-21 14:00:00 54.0 3.4 28.7 29.4
2022-09-21 14:10:00 51.0 3.2 28.7 29.4

418087 rows × 4 columns

WDIR WSPD ATMP WTMP
min 0.000000 0.000000 11.200000 20.00000
mean 136.314668 5.734006 25.467262 27.34473
max 360.000000 28.600000 34.300000 34.50000

2. Plot data#

2.1 Wind speed over time and wind rose#

#  Create 'figures' folder if it doesn't exist
if not os.path.exists('figures'):
    os.makedirs('figures')


#  Load the cleaned data
file_path = 'output/wind_data_cleaned.csv'
data = pd.read_csv(file_path, parse_dates=['Timestamp'], index_col='Timestamp')


# Ask user for start and end years
start_year = 1990
end_year = 2022

# Filter data by user-selected years
data = data[(data.index.year >= start_year) & (data.index.year <= end_year)]



#  Wind Speed Over Time
def plot_wind_speed_over_time(data):
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(data.index, data['WSPD'], color='blue', label='Wind Speed (m/s)', alpha=0.7)
    ax.set_title('Wind Speed Over Time')
    ax.set_xlabel('Date')
    ax.set_ylabel('Wind Speed (m/s)')

    #ax.xaxis.set_major_formatter(DateFormatter('%Y-%m'))  # Format x-axis labels
    # Set major ticks for each year
    ax.xaxis.set_major_locator(mdates.YearLocator(1))  # Every year
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format as year
    
    plt.xticks(rotation=90)
    ax.grid(True, linestyle='--', alpha=0.7)
    ax.legend()
    plt.tight_layout()

    #  Save and show
    plt.savefig('figures/wind_speed_over_time.png', dpi=300)
    plt.show()

#  Wind Rose Plot
def plot_wind_rose(data):
    # Remove NaN and unrealistic values for plotting
    df = data[['WDIR', 'WSPD']].dropna()
    df = df[df['WSPD'] > 0]  # Keep positive wind speeds only

    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111, projection='windrose')

    # Plot the wind rose
    ax.bar(
        df['WDIR'], 
        df['WSPD'], 
        normed=True, 
        opening=0.8, 
        edgecolor='white'
    )

    ax.set_title('Wind Rose')
    ax.set_legend(title="Wind Speed (m/s)")
    plt.tight_layout()

    #  Save and show
    plt.savefig('figures/wind_rose.png', dpi=300)
    plt.show()

#  Run the plotting functions
plot_wind_speed_over_time(data)
plot_wind_rose(data)
../_images/a0a56beef1fff38ffc799a758299650972397e9369418717f0377539f89a2da9.png ../_images/813982d27eee44d8e911e3604936e92179936e5bef8c8ee011ab9244d75fd657.png

2.2 Seasonal patterns#

#  Create 'figures' folder if it doesn't exist
if not os.path.exists('figures'):
    os.makedirs('figures')

#  Load the cleaned data
file_path = 'output/wind_data_cleaned.csv'
data = pd.read_csv(file_path, parse_dates=['Timestamp'], index_col='Timestamp')

#  Ask user for start and end years
start_year = 1990
end_year = 2022

#  Filter data by user-selected years
data = data[(data.index.year >= start_year) & (data.index.year <= end_year)]
data = data.resample('MS').max()

#  1. Compute Baseline Seasonal Mean
def compute_baseline(data):
    # Extract month for grouping
    data['Month'] = data.index.month
    
    # Compute monthly baseline mean over all years
    baseline = data.groupby('Month')[['ATMP', 'WTMP', 'WSPD']].mean()
    
    return baseline

#  2. Compute Anomalies
def compute_anomalies(data, baseline):
    # Merge baseline into the original data based on the month
    data['ATMP_Baseline'] = data['Month'].map(baseline['ATMP'])
    data['WTMP_Baseline'] = data['Month'].map(baseline['WTMP'])
    data['WSPD_Baseline'] = data['Month'].map(baseline['WSPD'])

    # Compute anomalies as deviations from baseline
    data['ATMP_Anomaly'] = data['ATMP'] - data['ATMP_Baseline']
    data['WTMP_Anomaly'] = data['WTMP'] - data['WTMP_Baseline']
    data['WSPD_Anomaly'] = data['WSPD'] - data['WSPD_Baseline']
    
    return data

#  3. Plot Temperature and Wind Speed Anomalies (Subplots)
def plot_temperature_and_wind_anomalies(data):
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

    #  Plot Air and Water Temperature Anomalies (First Subplot)
    ax1.plot(data.index, data['ATMP_Anomaly'], label='Air Temp Anomaly (°C)', color='red', alpha=0.7)
    ax1.plot(data.index, data['WTMP_Anomaly'], label='Water Temp Anomaly (°C)', color='blue', alpha=0.7)
    
    ax1.axhline(0, color='black', linestyle='--', linewidth=1)  # Baseline reference line
    ax1.set_ylabel('Temperature Anomaly (°C)')
    ax1.set_title(f'Temperature Anomalies ({start_year}{end_year})')
    ax1.legend(loc='upper left')
    ax1.grid(True, linestyle='--', alpha=0.7)

    #  Plot Wind Speed Anomalies (Second Subplot)
    ax2.plot(data.index, data['WSPD_Anomaly'], label='Wind Speed Anomaly (m/s)', color='green', alpha=0.7)
    
    ax2.axhline(0, color='black', linestyle='--', linewidth=1)  # Baseline reference line
    ax2.set_ylabel('Wind Speed Anomaly (m/s)')
    ax2.set_title(f'Wind Speed Anomalies ({start_year}{end_year})')
    ax2.legend(loc='upper left')
    ax2.grid(True, linestyle='--', alpha=0.7)

    #  Set major ticks for each year on the x-axis (shared)
    ax2.xaxis.set_major_locator(plt.matplotlib.dates.YearLocator(1))
    ax2.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y'))

    plt.xticks(rotation=90)

    plt.tight_layout()

    #  Save and show plot
    output_path = f'figures/temperature_wind_anomalies_{start_year}_{end_year}.png'
    plt.savefig(output_path, dpi=300)
    plt.show()

    print(f"\nAnomaly plot saved to: {output_path}")

#  Run the functions
if not data.empty:
    baseline = compute_baseline(data)
    data = compute_anomalies(data, baseline)
    plot_temperature_and_wind_anomalies(data)
else:
    print(f"No data available between {start_year} and {end_year}.")
../_images/2ecacdaf6aaf6a5c58965fa9a2d788527cedc87a1085947f26e33edfc030c66c.png
Anomaly plot saved to: figures/temperature_wind_anomalies_1990_2022.png
#  Create 'figures' folder if it doesn't exist
if not os.path.exists('figures'):
    os.makedirs('figures')

#  Load the cleaned data
file_path = 'output/wind_data_cleaned.csv'
data = pd.read_csv(file_path, parse_dates=['Timestamp'], index_col='Timestamp')

#  User input for time range
start_year = 1990
end_year = 2022

#  Filter data for the selected range
data = data[(data.index.year >= start_year) & (data.index.year <= end_year)]

#  Compute 99th percentile for extreme events
value = 0.99
threshold = data['WSPD'].quantile(value)
print(f"{value*100:.0f}th percentile wind speed threshold: {threshold:.2f} m/s")

#  Identify extreme events
extreme_events = data[data['WSPD'] > threshold]

#  Load storm data (Optional)
storm_data_path = 'input/storm_data.csv'
if os.path.exists(storm_data_path):
    storms = pd.read_csv(storm_data_path, parse_dates=['Timestamp'])
    storms = storms[(storms['Timestamp'] >= data.index.min()) & (storms['Timestamp'] <= data.index.max())]
else:
    storms = None

#  Plot extreme wind events
fig, ax = plt.subplots(figsize=(14, 6))

# Plot all wind speeds
ax.plot(data.index, data['WSPD'], color='blue', alpha=0.5, label='Wind Speed (m/s)')

# Highlight extreme events
ax.scatter(
    extreme_events.index, 
    extreme_events['WSPD'], 
    color='red', 
    s=10, 
    label=f'Extreme Events (>{threshold:.2f} m/s)'
)

# Overlay storm events if available
if storms is not None:
    for _, row in storms.iterrows():
        ax.axvline(row['Timestamp'], color='black', linestyle='--', alpha=0.7, label=row['Event'])

#  Format x-axis to have ticks every 4 years
ax.xaxis.set_major_locator(mdates.YearLocator(4))  # Every 4 years
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format as year
plt.xticks(rotation=0, fontsize=14)  # Rotation and font size for x-ticks

#  Increase font sizes
ax.set_title(f'Extreme Wind Events ({start_year}{end_year})', fontsize=16)
ax.set_xlabel('Date', fontsize=14)
ax.set_ylabel('Wind Speed (m/s)', fontsize=14)

#  Adjust legend font size
handles, labels = plt.gca().get_legend_handles_labels()
unique_labels = dict(zip(labels, handles))
ax.legend(unique_labels.values(), unique_labels.keys(), loc='upper right', fontsize=14)

#  Grid and layout
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()

#  Save and show plot
output_path = f'figures/extreme_events_{start_year}_{end_year}.png'
plt.savefig(output_path, dpi=300)
plt.show()

print(f"\n Extreme event plot saved to: {output_path}")
99th percentile wind speed threshold: 13.10 m/s
../_images/940c9402bf538f852af60f8227b2057339771a845dafa5ae822cee7849b0a54c.png
 Extreme event plot saved to: figures/extreme_events_1990_2022.png
#  Create 'figures' folder if it doesn't exist
if not os.path.exists('figures'):
    os.makedirs('figures')

#  Load the cleaned data
file_path = 'output/wind_data_cleaned.csv'
data = pd.read_csv(file_path, parse_dates=['Timestamp'], index_col='Timestamp')

#  User input for time range
start_year = 1990
end_year = 2022

#  Filter data for the selected range
data = data[(data.index.year >= start_year) & (data.index.year <= end_year)]

#  Group by month and calculate mean values
monthly_cycle = data.groupby(data.index.month)[['WSPD', 'ATMP', 'WTMP']].mean()

#  Plot monthly wind and temperature cycle
fig, ax1 = plt.subplots(figsize=(14, 6))  # Increased width for better readability

# Plot wind speed
ax1.plot(
    monthly_cycle.index, 
    monthly_cycle['WSPD'], 
    label='Wind Speed (m/s)', 
    color='blue', 
    marker='o'
)

# Format left y-axis for wind speed
ax1.set_ylabel('Wind Speed (m/s)', color='blue', fontsize=14)
ax1.tick_params(axis='y', labelcolor='blue')
ax1.set_ylim(0, monthly_cycle['WSPD'].max() + 2)

#  Create a second y-axis for temperature
ax2 = ax1.twinx()

# Plot air temperature
ax2.plot(
    monthly_cycle.index, 
    monthly_cycle['ATMP'], 
    label='Air Temperature (°C)', 
    color='red', 
    marker='o'
)

# Plot water temperature
ax2.plot(
    monthly_cycle.index, 
    monthly_cycle['WTMP'], 
    label='Water Temperature (°C)', 
    color='green', 
    marker='o'
)

# Format right y-axis for temperature
ax2.set_ylabel('Temperature (°C)', color='black', fontsize=14)
ax2.tick_params(axis='y', labelcolor='black')
ax2.set_ylim(0, monthly_cycle[['ATMP', 'WTMP']].max().max() + 2)

#  Add labels and legend
ax1.set_title(f'Monthly Wind and Temperature Cycle ({start_year}{end_year})', fontsize=16)
ax1.set_xlabel('Month', fontsize=14)
ax1.set_xticks(range(1, 13))
ax1.set_xticklabels([
    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
], fontsize=14)  # Increased font size for month labels

#  Add combined legend
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc='lower right', fontsize=14)

#  Grid and layout
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.tick_params(axis='both', labelsize=14)  # Increased font size for tick labels
plt.tight_layout()

#  Save and show plot
output_path = f'figures/monthly_cycle_{start_year}_{end_year}.png'
plt.savefig(output_path, dpi=300)
plt.show()

print(f"\n Monthly cycle plot saved to: {output_path}")
../_images/66564c92bed53c06b345cc90e74c3a70ef173bed12993af034def043951ab6e9.png
 Monthly cycle plot saved to: figures/monthly_cycle_1990_2022.png
# Create 'output' folder if it doesn't exist
if not os.path.exists('output'):
    os.makedirs('output')

# Load the cleaned data
file_path = 'output/wind_data_cleaned.csv'
data = pd.read_csv(file_path, parse_dates=['Timestamp'], index_col='Timestamp')

# Drop NaNs to avoid invalid math operations
data = data.dropna(subset=['WDIR', 'WSPD'])

# Convert wind direction to radians
wind_direction_radians = np.radians(data['WDIR'])

# Compute weighted sine and cosine components
sin_component = np.sin(wind_direction_radians) * data['WSPD']
cos_component = np.cos(wind_direction_radians) * data['WSPD']

# Resample and compute the weighted mean of sine and cosine components
mean_sin = sin_component.resample('D').sum()
mean_cos = cos_component.resample('D').sum()

# Compute resultant vector length (magnitude) for normalization
total_wind_speed = data['WSPD'].resample('D').sum()

# Normalize vector length by total wind speed
mean_sin /= total_wind_speed
mean_cos /= total_wind_speed

# Use atan2 to calculate the directional mean (in degrees)
mean_direction = np.degrees(np.arctan2(mean_sin, mean_cos))
mean_direction = (mean_direction + 360) % 360  # Ensure values between 0–360

# Compute median manually using circular statistics
def circular_median(angles):
    if len(angles) == 0:
        return np.nan
    angles = np.radians(angles)
    sin_sum = np.sum(np.sin(angles))
    cos_sum = np.sum(np.cos(angles))
    median = np.arctan2(sin_sum, cos_sum)
    median = np.degrees(median) % 360
    return median

median_direction = data['WDIR'].resample('D').apply(circular_median)

# Compute mode using circular mode
mode_direction = data['WDIR'].resample('D').apply(
    lambda x: np.atleast_1d(mode(x, nan_policy='omit').mode)[0] if np.size(mode(x, nan_policy='omit').mode) > 0 else np.nan
)

# Resample other parameters using max
resampled_data = pd.DataFrame({
    'WDIR_mean': mean_direction,
    'WDIR_median': median_direction,
    'WDIR_mode': mode_direction,
    'WSPD': data['WSPD'].resample('D').max(),
    'ATMP': data['ATMP'].resample('D').max(),
    'WTMP': data['WTMP'].resample('D').max(),
})

# Save to CSV
output_file = 'output/wind_data_daily.csv'
resampled_data.to_csv(output_file)

print(f"\nResampled data saved to '{output_file}'")
display(resampled_data)
C:\Users\mgebremedhin\AppData\Local\Temp\ipykernel_25320\2048784597.py:49: SmallSampleWarning: One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.
  lambda x: np.atleast_1d(mode(x, nan_policy='omit').mode)[0] if np.size(mode(x, nan_policy='omit').mode) > 0 else np.nan
Resampled data saved to 'output/wind_data_daily.csv'
WDIR_mean WDIR_median WDIR_mode WSPD ATMP WTMP
Timestamp
1990-01-01 9.895843 328.282768 15.0 12.2 25.3 26.1
1990-01-02 64.344269 66.745525 41.0 10.5 19.8 26.1
1990-01-03 107.297128 104.599761 121.0 8.9 23.6 26.1
1990-01-04 118.146457 118.821916 113.0 8.6 25.0 26.1
1990-01-05 147.639847 148.166277 127.0 6.1 25.8 26.1
... ... ... ... ... ... ...
2022-09-17 130.276905 131.706764 140.0 8.5 28.9 30.3
2022-09-18 43.688991 48.731482 32.0 7.3 28.9 29.8
2022-09-19 11.151731 12.074958 1.0 7.7 29.0 29.8
2022-09-20 39.544991 57.747172 21.0 7.9 29.4 30.1
2022-09-21 55.286527 55.417258 58.0 4.5 28.9 29.7

11952 rows × 6 columns