Red tides data#

Binder


import numpy as np
import pandas as pd

import os
from pathlib import Path
from scipy.io import loadmat  # this is the SciPy module that loads mat-files

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

import warnings
warnings.filterwarnings("ignore", message="facecolor will have no effect as it has been defined as \"never\"")

1. Red tide data for study area#

Data download: https://www.ncei.noaa.gov/archive/archive-management-system/OAS/bin/prd/jquery/accession/details/120767

Direct access to current dataset (habsos_20240430): https://www.ncei.noaa.gov/data/oceans/archive/arc0069/0120767/8.8/data/0-data/

Data documentation: https://www.ncei.noaa.gov/data/oceans/archive/arc0069/0120767/8.8/data/0-data/Support Documents/AL Data Documentation 2016.pdf

# Read a csv file with Pandas
df = pd.read_csv('input/habsos_20240430.csv', low_memory=False)

# Select column names that you need for your analysis (e.g., 'LATITUDE', 'LONGITUDE', 'SAMPLE_DATE', 'CELLCOUNT', etc.) 
selected_columns = ['STATE_ID','DESCRIPTION','LATITUDE', 'LONGITUDE', 'SAMPLE_DATE', 'CELLCOUNT',
                   'SALINITY','WATER_TEMP','WIND_DIR','WIND_SPEED']

# Filter the DataFrame to include only the selected columns
df = df[selected_columns]

# Display your DataFrame
df

# Create a new column 'region' with default value 'Other'
df['REGION'] = 'Other'

# Mask for dicing: Define mask for Tampa Bay region based on latitude and longitude values
tampa_bay_mask = (df['LATITUDE'] >= 27) & (df['LATITUDE'] <= 28) & (df['LONGITUDE'] >= -85)

# Assign value 'Tampa Bay' to rows matching Tampa Bay mask 
df.loc[tampa_bay_mask, 'REGION'] = 'Tampa Bay'

# Mask for decing: Define mask for Charlotte Harbor estuary region  based on latitude and longitude values
charlotte_harbor_mask = (df['LATITUDE'] >= 25.5) & (df['LATITUDE'] < 27) & (df['LONGITUDE'] >= -85)

# Assign value 'Charlotte Harbor' to rows matching Charlotte Harbor mask
df.loc[charlotte_harbor_mask, 'REGION'] = 'Charlotte Harbor'

#Display dataframe
df
STATE_ID DESCRIPTION LATITUDE LONGITUDE SAMPLE_DATE CELLCOUNT SALINITY WATER_TEMP WIND_DIR WIND_SPEED REGION
0 TX Copano Bay 4 28.07237 -97.21968 2023/09/27 0 NaN NaN NaN NaN Other
1 TX Copano Bay 2 27.99567 -97.16831 2023/09/27 0 NaN NaN NaN NaN Other
2 TX Copano Bay 3 28.03930 -97.15836 2023/09/27 0 NaN NaN NaN NaN Other
3 TX Copano Bay 1 28.06491 -97.11270 2023/09/27 0 NaN NaN NaN NaN Other
4 FL off New Smyrna Beach; Atlantic 29.17460 -78.27890 1990/12/20 0 NaN 24.2 NaN NaN Other
... ... ... ... ... ... ... ... ... ... ... ...
211829 TX Nueces Bay, East of Nueces River Delta 27.86611 -97.51636 2015/10/14 0 NaN NaN NaN NaN Other
211830 TX Viola Turning Basin (Sample ID 1230-1) 27.84455 -97.52228 2009/12/30 1000000 NaN NaN NaN NaN Other
211831 TX South Padre Island, Brazos Santiago Pass south... 26.18451 -97.66535 2015/09/25 922000 34.0 29.0 NaN NaN Other
211832 TX South Padre Island, Brazos Santiago Pass south... 26.18451 -97.66535 2015/10/02 1513000 NaN NaN NaN NaN Other
211833 TX South Padre Island, Brazos Santiago Pass south... 26.18451 -97.66535 2015/09/25 248000 32.0 29.0 NaN NaN Other

211834 rows × 11 columns

2. Plot red tides#

2.1 Bathymetry data#

#[1] Load zos data 
member='AVISO-1-1_phy-001-030_r0'

#[2] Load Bathymetry_AVISO data
#Bathymetry=['NOAA','B-50','B-300','B-1000','B-2000','B-3000']
Bathymetry=['B300']
for bathymetry in Bathymetry: 
    yx=np.loadtxt('input/DigitizeSegments_Bathymetry_AVISO_{}.csv'.format(bathymetry),delimiter=',')
    print(yx.shape)
    
# Create coordinates for the four labels
latitudes= yx[:,0]  # latitude values 
longitudes= yx[:,1]  # longitude values 
(119, 2)
##### [1] Display processed results
#Section information (KB_Data_ESM)
#m=loadmat('zos_data_B300/Optimization/R3210.mat')
m=loadmat('input/zos_data_B300_segments.mat')

ndata = {n: m['row'][n][0,0] for n in m['row'].dtype.names}
dfm=pd.DataFrame.from_dict(dict((column, ndata[column][0]) for column in [n for n, v in ndata.items() if v.size == 1]))
#display(dfm)

#display(dfm)
NSeg=dfm['N'][0]; ns=dfm['Nstr'][0]; ne=dfm['Nend'][0];SSeg=dfm['S'][0]; ss=dfm['Sstr'][0]; se=dfm['Send'][0];
print('{} ({},{}) and {} ({},{}) north and south segments with {} and {} cells, respectively'.format(NSeg,ns,ne,SSeg,ss,se,ne-ns+1,se-ss+1))
B-300 (9,22) and B-300 (59,78) north and south segments with 14 and 20 cells, respectively

2.2 Convert lat and lon degree to decimal#

# Function to convert DMS to decimal degrees
def dms_to_decimal(degrees, minutes, seconds, direction):
    decimal = degrees + minutes / 60 + seconds / 3600
    if direction in ['S', 'W']:
        decimal = -decimal
    return decimal

# Input values
lat_dms = (25, 42, 1, 'N')
lon_dms = (83, 38, 28, 'W')

# Convert to decimal degrees
lat_decimal = dms_to_decimal(*lat_dms)
lon_decimal = dms_to_decimal(*lon_dms)

print(f"Latitude: {lat_decimal:.6f}, Longitude: {lon_decimal:.6f}")
Latitude: 25.700278, Longitude: -83.641111

2.3 Plot study area#

# 0) Select data to plot
Plot_Num= 2  #Type 1 to plot SLA data for the first time step or 2 to plot bathymetry lines

# 1) Select plot extend: Lon_West, Lon_East, Lat_South, Lat_North in Degrees
extent = [-75, -100, 18, 31] #Gulf of Mexico
#extent = [-80, -100, 20, 31] 

# 2) Select projection
projPC = ccrs.PlateCarree()

# 3) Create figure
fig = plt.figure(figsize=(11, 8.5))
#plt.subplots_adjust(hspace=0.1, wspace=0.2)  # Adjust the height and width spaces between subplots

# 4) Loop to plot variables 
variable = 'sla'
colormap = 'coolwarm'
title=''

# 5) Create Axes     
ax = plt.subplot(1, 1, 1, projection=projPC)

# 6) Setting the extent of the plot
ax.set_extent(extent, crs=projPC)

# 7) Add plot title 
ax.set_title(title, fontsize=10, loc='left')

# 8) Add gridlines (we need to cutomize grid lines to improve visualization)                              
gl = ax.gridlines(draw_labels=True, xlocs=np.arange(extent[1],extent[0], 5), ylocs= np.arange(extent[2], extent[3], 5),
                  linewidth=1, color='gray', alpha=0.5, linestyle='--',  zorder=40)

gl.right_labels = False  # 
gl.top_labels = False    # Set the font size for x-axis labels
gl.xlabel_style = {'size': 10}  # Set the font size for x-axis labels
gl.ylabel_style = {'size': 10}  # Set the font size for y-axis labels

# 9) Adding Natural Earth features
## Pre-defined features
ax.set_facecolor(cfeature.COLORS['water'])  #Blackgroundwater color for the ocean
#ax.add_feature(cfeature.LAND.with_scale('10m'), zorder=20) 
ax.add_feature(cfeature.LAKES.with_scale('10m'), color='aqua', zorder=21) 
ax.add_feature(cfeature.COASTLINE.with_scale('10m'), color='gray', zorder=21) 
ax.add_feature(cfeature.BORDERS, edgecolor='red',facecolor='none', linewidth=2, zorder=22) 
ax.stock_img()

## General features (not pre-defined)
ax.add_feature(cfeature.NaturalEarthFeature(
    'physical', 'rivers_lake_centerlines', scale='10m', edgecolor='blue', facecolor='none') ,zorder=21)
ax.add_feature(cfeature.NaturalEarthFeature(
    'physical','rivers_north_america', scale='10m', edgecolor='blue', facecolor='none', alpha=0.5),zorder=21)

# 10) Adding user-defined features 
# Plot site area on GeoAxes given lon and lat coordinates in first subplot 
#ax.plot([-85.0, -85.0], [27, 28.0], color='black', linewidth=1, zorder = 15)    #Tampa Bay region
ax.plot([-85.0, -85.0], [25.5, 27.0], color='black', linewidth=1, zorder = 15)  # Charlotte Harbor est uary region 
#ax.plot([-85.0, -82.85], [28.0, 28.0], color='black', linewidth=1, zorder = 15)
ax.plot([-85.0, -82.4], [27, 27], color='black', linewidth=1, zorder = 15)
ax.plot([-85.0, -81.2], [25.5, 25.5], color='black', linewidth=1, zorder = 15)


## 11) Plot Track 026 line on GeoAxex and annotate with text using axes cooridnates in second subplot
#points = np.array([[-85.7940, 28.4487], [-83.6190, 23.8411]]) 
#ax.plot(longitudes, latitudes, color='black', linewidth=2, zorder = 15)
ax.plot(longitudes[ns:ne], latitudes[ns:ne], color='red', linewidth=2, zorder = 15)
ax.plot(longitudes[ss:se], latitudes[ss:se], color='red', linewidth=2, zorder = 15)
ax.text(0.75, 0.48, ' Bathymetry 300', transform=ax.transAxes,   #x,y 
        color='black', fontsize=8,  ha='center',  va='center',  rotation=0, zorder = 15)

## Adding names and locations of major cities
locations_coordinates = {"Tampa": [27.9506, -82.4572], 
                     #"Sarasota": [27.3364, -82.5307], 
                     #"Fort Myers": [26.6406, -81.8723], 
                     "Naples": [26.142, -81.7948],  
                        }
for location, coordinates in locations_coordinates.items():
    ax.plot(coordinates[1], coordinates[0],  
            transform=ccrs.PlateCarree(),  marker='o', color='black', markersize=3, zorder = 28)
    ax.text(coordinates[1] + 0.1, coordinates[0] + 0.1, 
            location, transform=ccrs.PlateCarree(), color='black', fontsize=8, zorder = 28)

locations_coordinates = {
                     "Buoy 42003": [25.9253, -85.6161],
                    #"Buoy 42097": [25.700278, -83.641111]
                        }
for location, coordinates in locations_coordinates.items():
    ax.plot(coordinates[1], coordinates[0],  
            transform=ccrs.PlateCarree(),  marker='o', color='black', markersize=3, zorder = 22)
    ax.text(coordinates[1] - 2, coordinates[0] + 0.1, 
            location, transform=ccrs.PlateCarree(), color='black', fontsize=8, zorder = 22)


# 11) Adding raster data and colorbar for the first three subplots
if Plot_Num ==1:
    # dataplot = ax.contourf(da.latitude, da.longitude, da.sla[0,:,:], 
    #                        cmap=colormap, zorder=12, transform=ccrs.PlateCarree())
    dataplot = ax.contourf(ds.longitude, ds.latitude, ds.sla[0,:,:], 
                           cmap=colormap, zorder=12, transform=ccrs.PlateCarree())
    colorbar = plt.colorbar(dataplot, orientation='horizontal', pad=0.1, shrink=1)  
    colorbar.set_label(ds.sla.attrs['standard_name'], fontsize=10)

#12) Plot 6 bathymetry lines using a loop
if Plot_Num == 2:
    # Names of bathymetry line 
    bathymetry_names = ['bathymetry_L_0', 'bathymetry_K_200', 
                        'bathymetry_J_1000', 'bathymetry_I_2000', 
                        'bathymetry_H_3000', 'bathymetry_G_4000'] 
    
    # Very light blue to light blue generated by ChatGPT 3.5 Turbo
    colors = [(0.9, 0.95, 1), (0.7, 0.85, 1), 
              (0.5, 0.75, 1), (0.3, 0.65, 1), 
              (0.1, 0.55, 1), (0, 0.45, 1), ]  
    
    # Loop to plot each bathymetry line
    for bathymetry_name, color in zip(bathymetry_names, colors):
        ax.add_feature(
            cfeature.NaturalEarthFeature(
                category='physical', 
                name=bathymetry_name, 
                scale='10m', 
                edgecolor='none', 
                facecolor=color
            ), 
            zorder = 11)


# Plot red dots for CELLCOUNT >= 1e6 in the 'Charlotte Harbor' region with SAMPLE_DATE >= 1992-01-01
charlotte_harbor = df[
    (df['CELLCOUNT'] >= 1e5) & 
    (df['REGION'] == 'Charlotte Harbor') & 
    (df['LONGITUDE'] <= -81) & 
    (pd.to_datetime(df['SAMPLE_DATE']) >= pd.to_datetime('1992-01-01'))
]

# Plot red dots for CELLCOUNT >= 1e6
high = charlotte_harbor[charlotte_harbor['CELLCOUNT'] >= 1e6]
sc_high = ax.scatter(high['LONGITUDE'], high['LATITUDE'], 
                     color='red', s=10, edgecolor='none',
                     transform=ccrs.PlateCarree(), zorder=25)

# Plot orange dots for 1e5 <= CELLCOUNT < 1e6
medium = charlotte_harbor[(charlotte_harbor['CELLCOUNT'] >= 1e5) & (charlotte_harbor['CELLCOUNT'] < 1e6)]
sc_medium = ax.scatter(medium['LONGITUDE'], medium['LATITUDE'], 
                       color='orange', s=10, edgecolor='none',
                       transform=ccrs.PlateCarree(), zorder=24)

# Plot yellow dots for 1e4 <= CELLCOUNT < 1e5
low = charlotte_harbor[(charlotte_harbor['CELLCOUNT'] >= 1e4) & (charlotte_harbor['CELLCOUNT'] < 1e5)]
sc_low = ax.scatter(low['LONGITUDE'], low['LATITUDE'], 
                    color='yellow', s=10, edgecolor='none',
                    transform=ccrs.PlateCarree(), zorder=23)


# Add legend with defined handles
legend = ax.legend(
    handles=[sc_high, sc_medium, sc_low],
    labels=['Cellcount ≥ 1e6', '1e5 ≤ Cellcount < 1e6', '1e4 ≤ Cellcount < 1e5'],
    loc='upper left', fontsize=8, frameon=True
)
legend.set_zorder(30)

# Save the figure
output_path = 'figures/plot_output.png'  # Specify the desired output path and filename
plt.savefig(output_path, dpi=300)  # Save with high resolution
print(f"\n Plot saved to: {output_path}")

# Show the plot
plt.show()
 Plot saved to: figures/plot_output.png
../_images/ec38ecf9a55b0362f33e5f05c31dd6e16f9c6a91c5009eb7fed194da4d77d416.png

3. HAB data for study area#

# Select HAB data for study area (filtering and copying in one step)
charlotte_harbor = (
    df.loc[
        (df['REGION'] == 'Charlotte Harbor') & 
        (df['LONGITUDE'] <= -81) & 
        (pd.to_datetime(df['SAMPLE_DATE'], errors='coerce') >= '1990-01-01')
    ]
    .copy()
)

# Convert 'SAMPLE_DATE' to datetime and set as index (directly sorted)
charlotte_harbor['SAMPLE_DATE'] = pd.to_datetime(charlotte_harbor['SAMPLE_DATE'])
charlotte_harbor.set_index('SAMPLE_DATE', inplace=True)
charlotte_harbor.sort_index(inplace=True)

# Drop unnecessary columns safely
columns_to_drop = ['STATE_ID', 'DESCRIPTION', 'REGION', 'WIND_DIR', 'WIND_SPEED']
charlotte_harbor.drop(columns=columns_to_drop, axis=1, inplace=True, errors='ignore')

# Define the output directory using pathlib
output_dir = Path('output')
output_dir.mkdir(parents=True, exist_ok=True)

# Save the DataFrame to a CSV file
file_path = output_dir / 'hab_charlotte_harbor_all.csv'
charlotte_harbor.to_csv(file_path)

# Print the **relative path** instead of the full path
print(f"File saved to: {file_path}")

# Display the cleaned DataFrame
display(charlotte_harbor)

# Step 1: Reset index to make 'SAMPLE_DATE' a column
charlotte_harbor_reset = charlotte_harbor.reset_index()

# Step 2: Sort the DataFrame by 'SAMPLE_DATE' and 'CELLCOUNT' (highest first)
charlotte_harbor_sorted = charlotte_harbor_reset.sort_values(by=['SAMPLE_DATE', 'CELLCOUNT'], ascending=[True, False])

# Step 3: Drop duplicate dates (keep the highest CELLCOUNT for each day)
charlotte_harbor_daily = charlotte_harbor_sorted.drop_duplicates(subset='SAMPLE_DATE', keep='first')

# Step 4: Set 'SAMPLE_DATE' back as index and sort
charlotte_harbor_daily.set_index('SAMPLE_DATE', inplace=True)
charlotte_harbor_daily = charlotte_harbor_daily.sort_index()

# Save the DataFrame to a CSV file
file_path = output_dir / 'hab_charlotte_harbor_daily.csv'
charlotte_harbor_daily.to_csv(file_path)

# Print the **relative path** instead of the full path
print(f"File saved to: {file_path}")

# Display the cleaned DataFrame
display(charlotte_harbor_daily)
File saved to: output\hab_charlotte_harbor_all.csv
LATITUDE LONGITUDE CELLCOUNT SALINITY WATER_TEMP
SAMPLE_DATE
1990-02-26 26.89867 -82.33933 0 34.00 NaN
1990-02-26 26.81617 -82.28050 0 35.00 NaN
1990-02-26 26.71170 -82.25830 0 35.00 NaN
1990-02-27 26.92540 -82.52430 0 NaN NaN
1990-02-27 26.92720 -82.92080 0 NaN NaN
... ... ... ... ... ...
2024-03-18 26.13160 -81.80670 0 32.00 25.99
2024-03-18 26.30481 -81.83620 0 NaN NaN
2024-03-18 26.25375 -81.82377 0 32.35 28.09
2024-03-18 25.91170 -81.72940 0 31.00 26.00
2024-03-18 26.20730 -81.81690 0 34.63 28.29

49077 rows × 5 columns

File saved to: output\hab_charlotte_harbor_daily.csv
LATITUDE LONGITUDE CELLCOUNT SALINITY WATER_TEMP
SAMPLE_DATE
1990-02-26 26.89867 -82.33933 0 34.0 NaN
1990-02-27 26.61050 -82.38330 6000 NaN NaN
1990-03-01 26.53330 -82.33330 0 NaN NaN
1990-03-08 26.71170 -82.25830 0 36.0 NaN
1990-07-19 26.05760 -84.60810 3000 NaN 28.90
... ... ... ... ... ...
2024-03-11 25.98020 -81.70330 0 31.0 22.00
2024-03-12 26.93070 -82.34980 0 33.6 22.10
2024-03-13 26.57670 -82.13470 0 NaN NaN
2024-03-14 26.34700 -82.24600 0 NaN NaN
2024-03-18 26.13160 -81.80670 0 32.0 25.99

5212 rows × 5 columns