import os import pandas as pd import numpy as np from huggingface_hub import hf_hub_download repo_id = "Navanihk/rainfallPrediction" cache_dir = '/tmp/hf_cache' os.makedirs(cache_dir, exist_ok=True) csv_path = hf_hub_download(repo_id=repo_id, filename="rainFallData.csv", cache_dir=cache_dir, ) # Load as DataFrame df= pd.read_csv(csv_path, sep=";") # df= pd.read_csv('./rainFallData.csv',sep=";") import calendar print(df) # Function to remove invalid days for each month def valid_day(row): year = 2025 month = row['month'] day = row['day'] max_day = calendar.monthrange(year, month)[1] # max days in the month return day <= max_day def calculate_rainfall(): # Debug: Print actual column names print("Available columns in DataFrame:", df.columns.tolist()) print("DataFrame shape:", df.shape) print("First few rows:") print(df.head()) # Check if required columns exist required_cols = ['state', 'district', 'month'] missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: print(f"ERROR: Missing columns: {missing_cols}") print("Please check the actual column names in your CSV file") return None # Melt to long format day_cols = [col for col in df.columns if col not in ['state', 'district', 'month']] df_long = df.melt( id_vars=["state", "district", "month"], value_vars=day_cols, var_name="day", value_name="rainfall" ) # Extract numeric day df_long['day'] = df_long['day'].str.extract(r'(\d+)').astype(int) # Keep only valid days df_long = df_long[df_long.apply(valid_day, axis=1)] # Create proper date df_long['date'] = pd.to_datetime( dict(year=2025, month=df_long['month'], day=df_long['day']) ) df_long = df_long.sort_values(['district', 'date']).reset_index(drop=True) return df_long