Let me join the chorus!
It seems that cursor (maybe LLM in general) is amazingly good to come up new code, or nearly green field development.
But I often frustrated by cursor’s lack of understanding or respect to the context. When I ask it modify some functions to add new functionality, it will do it with OK implementation, but destroying coded functionality.
Here is an example.
First, the code co-written by me and cursor, passing my review and test:
def process_location_date(location: str, df: pd.DataFrame, date: pd.Timestamp,
window_size: int, overheat: float,
prediction_horizon_list: List[int],
scatter: bool = False,
overall_error_metrics: Dict[int, List[float]] = {}
) -> Dict[int, List[float]]:
"""
Process and plot data for a specific location and date.
"""
start_date, end_date = get_overheat_date_range(date)
actual = df[df['Time'].between(start_date, end_date)].sort_values(by='Time')
logger.info(f"Processing location: {location}, date: {date}, data points: {len(actual)}")
predictions = []
for index, horizon in enumerate(prediction_horizon_list):
# Calculate predictions only for the relevant part of the data
pred_end = actual['Time'].max() - pd.Timedelta(seconds=horizon)
actual_for_pred = actual[actual['Time'] < pred_end]
pred = calculate_predictions(actual_for_pred, window_size, horizon)
# Trim actual data to match prediction time range
pred_start = actual['Time'].min() + pd.Timedelta(seconds=horizon)
actual_for_merge = actual[actual['Time'] > pred_start]
# Merge prediction with actual data
# Note: We use pred as the left DataFrame to ensure we match as many prediction rows as possible.
# This is important because pred may be a subset of actual_for_merge in terms of timestamps.
merged_df = pd.merge_asof(pred[['PredictionTime', 'Predicted']], actual_for_merge,
left_on='PredictionTime',
right_on='Time',
direction='nearest',
tolerance=pd.Timedelta(minutes=5))
logger.info(f"Prediction horizon: {horizon}, merged data points: {len(merged_df)}")
if merged_df.empty:
logger.warning(f"No overlapping data for horizon {horizon}")
logger.warning(f"Actual data: {actual}")
logger.warning(f"Pred data: {pred}")
continue
error_metrics, additional_metrics = calculate_error(merged_df, overheat)
true_pos, false_pos, false_neg, true_neg, sum_residure_squared, max_error = additional_metrics
prior_metrics = overall_error_metrics.get(horizon, [0, 0, 0, 0, 0, 0])
overall_error_metrics[horizon] = [
prior_metrics[i] + additional_metrics[i] for i in range(5)
] + [
max(prior_metrics[5], additional_metrics[5]) # max_error
]
annotation_x_coord = index / max(len(prediction_horizon_list) - 1, 1)
predictions.append(PredictionResult(
dataframe=pred,
prediction_horizon_seconds=horizon,
error_metrics=error_metrics,
annotation_x_coord=annotation_x_coord
))
plot_description = {
'location': location,
'window_size': window_size,
'overheat': overheat if actual['Value'].max() >= overheat else None,
'plot_type': 'scatter' if scatter else 'line',
'same_y_axis': False
}
plot_predictions_and_actual(actual, predictions, plot_description)
return overall_error_metrics
def overheats_prediction_studies(dict_location_df: Dict[str, pd.DataFrame],
overheat: float, window_size: int,
prediction_horizon_list: List[int],
scatter: bool = False) -> Dict[int, ErrorMetrics]:
"""
Apply the linear model to predict overheat events for multiple locations and dates.
"""
dict_location_df = {
location: dft.convert_column_to_float(df, 'Value')
for location, df in dict_location_df.items() if df is not None
}
overheat_location_date_pairs = [
(location, date)
for location, df in dict_location_df.items()
for date in pd.to_datetime(df[df['Value'] >= overheat]['Time'].dt.date.unique())
]
process_func = partial(process_location_date,
window_size=window_size,
overheat=overheat,
prediction_horizon_list=prediction_horizon_list,
scatter=scatter)
overall_error_metrics = {}
for location, date in overheat_location_date_pairs:
overall_error_metrics = process_func(location, dict_location_df[location], date,
overall_error_metrics=overall_error_metrics)
# Compute the final error metrics
final_error_metrics = {}
for horizon, metrics in overall_error_metrics.items():
total_true_pos, total_false_pos, total_false_neg, total_true_neg, total_residure_squared, max_error = metrics
total_predictions = total_true_pos + total_false_pos + total_false_neg + total_true_neg
overall_precision = total_true_pos / (total_true_pos + total_false_pos) if (total_true_pos + total_false_pos) > 0 else 0
overall_recall = total_true_pos / (total_true_pos + total_false_neg) if (total_true_pos + total_false_neg) > 0 else 0
overall_rmse = np.sqrt(total_residure_squared / total_predictions) if total_predictions > 0 else 0
final_error_metrics[horizon] = ErrorMetrics(
rmse=overall_rmse,
max_error=max_error,
recall=overall_recall,
precision=overall_precision
)
return final_error_metrics
type or paste code here