bounce/src/analysis/bounce_detection.py

"""
Bounce detection module for the bouncing ball analysis project.
"""

import numpy as np
from scipy.signal import find_peaks
import pandas as pd

# Default parameters for bounce detection
DEFAULT_PROMINENCE = 0.1
DEFAULT_MIN_DISTANCE = 10
DEFAULT_MIN_TIME_DIFF = 0.2
DEFAULT_MAX_BOUNCES = 7

# Ball-specific parameters
BALL_PARAMS = {
    'Golf': {
        'relative_prominence': 0.15,
        'low_threshold': 12.5,
        'low_adjustment': 1.1,
        'high_threshold': 14.5,
        'high_adjustment': 1.3
    },
    'Lacrosse': {
        'relative_prominence': 0.05,
        'low_threshold': 18.5,
        'low_adjustment': 1.1,
        'high_threshold': 21.5,
        'high_adjustment': 1.3
    },
    'Metal': {
        'relative_prominence': 0.05,
        'low_threshold': 5,
        'low_adjustment': 0.8,
        'high_threshold': 8,
        'high_adjustment': 1.2
    }
}

def detect_bounces(df,
                   prominence=DEFAULT_PROMINENCE,
                   min_distance=DEFAULT_MIN_DISTANCE,
                   min_time_diff=DEFAULT_MIN_TIME_DIFF,
                   max_bounces=DEFAULT_MAX_BOUNCES,
                   fs=None):
    """
    Detects bounce events by locating peaks in the signal.

    Parameters:
      - df: DataFrame with 'Time' and 'Position'
      - prominence: required prominence for peaks (in the signal)
      - min_distance: minimum number of data points between peaks
      - min_time_diff: minimum time difference (seconds) between bounces
      - max_bounces: maximum bounces to report
      - fs: override sampling frequency (Hz). If None, computed from 'Time'.

    Returns:
      - peak_indices (array of indices),
      - bounce_heights (array of position values at these bounces),
      - bounce_times (array of times of these bounces),
      - signal_data (the data used for detection, here the raw Position values)
    """
    if fs is None:
        # Filter out duplicate time values and sort
        unique_times = np.unique(df['Time'].values)
        if len(unique_times) > 1:
            dt = np.median(np.diff(unique_times))
            if dt <= 0:
                # Fallback to a default sampling rate if time differences are non-positive
                fs = 1000  # 1000 Hz is a reasonable default for high-speed data
            else:
                fs = 1 / dt
        else:
            # If there's only one unique time value, use a default sampling rate
            fs = 1000

    signal_data = df['Position'].values

    peaks, _ = find_peaks(signal_data, prominence=prominence, distance=min_distance)

    filtered_peaks = []
    filtered_times = []
    group_start_time = None
    group_best_idx = None
    group_best_value = None

    for idx in peaks:
        current_time = df['Time'].iloc[idx]
        current_value = signal_data[idx]

        if group_start_time is None:
            group_start_time = current_time
            group_best_idx = idx
            group_best_value = current_value
            continue

        if (current_time - group_start_time) < min_time_diff:
            if current_value > group_best_value:
                group_best_idx = idx
                group_best_value = current_value
        else:
            filtered_peaks.append(group_best_idx)
            filtered_times.append(df['Time'].iloc[group_best_idx])

            group_start_time = current_time
            group_best_idx = idx
            group_best_value = current_value

    if group_best_idx is not None:
        filtered_peaks.append(group_best_idx)
        filtered_times.append(df['Time'].iloc[group_best_idx])

    filtered_peaks = filtered_peaks[:max_bounces]
    filtered_times = filtered_times[:max_bounces]

    bounce_heights = df['Position'].iloc[filtered_peaks].values
    bounce_times = np.array(filtered_times)

    return np.array(filtered_peaks), bounce_heights, bounce_times, signal_data


def compute_cor(bounce_heights):
    """
    Computes the Coefficient of Restitution (COR) for consecutive bounces:
      e = sqrt( h_{n+1} / h_n )

    Parameters:
        bounce_heights (numpy.ndarray): Array of bounce heights

    Returns:
        numpy.ndarray: Array of COR values
    """
    cor_values = []
    for i in range(len(bounce_heights) - 1):
        if bounce_heights[i] > 0:
            cor_values.append(np.sqrt(bounce_heights[i+1] / bounce_heights[i]))
        else:
            cor_values.append(np.nan)
    return np.array(cor_values)


def process_trial(df,
                  initial_height=None,
                  ball_type=None,
                  prominence=None,
                  min_distance=DEFAULT_MIN_DISTANCE,
                  min_time_diff=DEFAULT_MIN_TIME_DIFF,
                  max_bounces=DEFAULT_MAX_BOUNCES,
                  fs=None):
    """
    Processes a trial by detecting bounces and computing COR values.

    Parameters:
        df (pandas.DataFrame): DataFrame with 'Time' and 'Position' columns
        initial_height (float): Initial height of the ball
        ball_type (str): Type of ball ('Golf', 'Lacrosse', or 'Metal')
        prominence (float): Prominence for peak detection (if None, calculated from initial_height and ball_type)
        min_distance (int): Minimum distance between peaks
        min_time_diff (float): Minimum time difference between bounces
        max_bounces (int): Maximum number of bounces to detect
        fs (float): Sampling frequency (Hz)

    Returns:
        dict: Dictionary containing analysis results
    """
    # Calculate prominence if not provided
    if prominence is None and initial_height is not None and ball_type is not None:
        prominence = calculate_effective_prominence(initial_height, ball_type)
    elif prominence is None:
        prominence = DEFAULT_PROMINENCE

    # Detect bounces
    peak_indices, bounce_heights, bounce_times, signal_data = detect_bounces(
        df, prominence, min_distance, min_time_diff, max_bounces, fs
    )

    # Calculate COR values
    cor_values = compute_cor(bounce_heights)
    avg_cor = np.mean(cor_values) if cor_values.size > 0 else np.nan

    # Return results as a dictionary
    return {
        'peak_indices': peak_indices,
        'bounce_heights': bounce_heights,
        'bounce_times': bounce_times,
        'cor_values': cor_values,
        'Average COR': avg_cor,
        'signal_data': signal_data,
        'Initial Height': initial_height,
        'Num Bounces': len(peak_indices)
    }


def calculate_effective_prominence(initial_height, ball_type):
    """
    Calculate the effective prominence for bounce detection based on initial height and ball type.

    Parameters:
        initial_height (float): Initial height of the ball
        ball_type (str): Type of ball ('Golf', 'Lacrosse', or 'Metal')

    Returns:
        float: Effective prominence value
    """
    if ball_type not in BALL_PARAMS:
        raise ValueError(f"Unknown ball type: {ball_type}. Must be one of: {list(BALL_PARAMS.keys())}")

    params = BALL_PARAMS[ball_type]

    # Calculate base prominence
    effective_prominence = params['relative_prominence'] * initial_height

    # Apply adjustments based on height
    if initial_height < params['low_threshold']:
        effective_prominence *= params['low_adjustment']
    elif initial_height > params['high_threshold']:
        effective_prominence *= params['high_adjustment']

    return effective_prominence


def process_trials(file_paths, ball_type, min_distance=DEFAULT_MIN_DISTANCE,
                  min_time_diff=DEFAULT_MIN_TIME_DIFF, max_bounces=DEFAULT_MAX_BOUNCES, fs=None):
    """
    Process multiple trials and return a summary DataFrame.

    Parameters:
        file_paths (dict): Dictionary mapping labels to file paths
        ball_type (str): Type of ball ('Golf', 'Lacrosse', or 'Metal')
        min_distance (int): Minimum distance between peaks
        min_time_diff (float): Minimum time difference between bounces
        max_bounces (int): Maximum number of bounces to detect
        fs (float): Sampling frequency (Hz)

    Returns:
        pandas.DataFrame: Summary DataFrame with trial information
    """
    from src.data.loader import load_trial

    results = []

    for init_label, path in file_paths.items():
        try:
            initial_height = float(init_label.split()[0])
        except ValueError:
            initial_height = np.nan

        # Compute effective prominence
        effective_prominence = calculate_effective_prominence(initial_height, ball_type)

        # Load and process the trial
        df = load_trial(path)
        results.append(process_trial(
            df, initial_height, ball_type, effective_prominence, min_distance, min_time_diff, max_bounces, fs
        ))

    summary_df = pd.DataFrame(results)
    summary_df.sort_values(by='Initial Height', inplace=True)

    return summary_df