Provided modular architecture for animated bouncing ball analysis

- Organized project into src directory with subpackages (analysis, data, visualization, utils) - Added comprehensive README with project overview and structure - Implemented data loading, bounce detection, and visualization modules - Created example scripts and Jupyter notebook for project usage - Added requirements.txt for dependency management - Included output files for different ball types (golf, lacrosse, metal)
2025-03-01 16:55:29 -07:00
parent 3cf0e16c35
commit c6b08a089d
75 changed files with 3198 additions and 2 deletions
--- a/src/analysis/bounce_detection.py
+++ b/src/analysis/bounce_detection.py
@@ -0,0 +1,260 @@
+"""
+Bounce detection module for the bouncing ball analysis project.
+"""
+
+import numpy as np
+from scipy.signal import find_peaks
+import pandas as pd
+
+# Default parameters for bounce detection
+DEFAULT_PROMINENCE = 0.1
+DEFAULT_MIN_DISTANCE = 10
+DEFAULT_MIN_TIME_DIFF = 0.2
+DEFAULT_MAX_BOUNCES = 7
+
+# Ball-specific parameters
+BALL_PARAMS = {
+    'Golf': {
+        'relative_prominence': 0.15,
+        'low_threshold': 12.5,
+        'low_adjustment': 1.1,
+        'high_threshold': 14.5,
+        'high_adjustment': 1.3
+    },
+    'Lacrosse': {
+        'relative_prominence': 0.05,
+        'low_threshold': 18.5,
+        'low_adjustment': 1.1,
+        'high_threshold': 21.5,
+        'high_adjustment': 1.3
+    },
+    'Metal': {
+        'relative_prominence': 0.05,
+        'low_threshold': 5,
+        'low_adjustment': 0.8,
+        'high_threshold': 8,
+        'high_adjustment': 1.2
+    }
+}
+
+def detect_bounces(df, 
+                   prominence=DEFAULT_PROMINENCE, 
+                   min_distance=DEFAULT_MIN_DISTANCE, 
+                   min_time_diff=DEFAULT_MIN_TIME_DIFF,
+                   max_bounces=DEFAULT_MAX_BOUNCES, 
+                   fs=None):
+    """
+    Detects bounce events by locating peaks in the signal.
+    
+    Parameters:
+      - df: DataFrame with 'Time' and 'Position'
+      - prominence: required prominence for peaks (in the signal)
+      - min_distance: minimum number of data points between peaks
+      - min_time_diff: minimum time difference (seconds) between bounces
+      - max_bounces: maximum bounces to report
+      - fs: override sampling frequency (Hz). If None, computed from 'Time'.
+    
+    Returns:
+      - peak_indices (array of indices),
+      - bounce_heights (array of position values at these bounces),
+      - bounce_times (array of times of these bounces),
+      - signal_data (the data used for detection, here the raw Position values)
+    """
+    if fs is None:
+        # Filter out duplicate time values and sort
+        unique_times = np.unique(df['Time'].values)
+        if len(unique_times) > 1:
+            dt = np.median(np.diff(unique_times))
+            if dt <= 0:
+                # Fallback to a default sampling rate if time differences are non-positive
+                fs = 1000  # 1000 Hz is a reasonable default for high-speed data
+            else:
+                fs = 1 / dt
+        else:
+            # If there's only one unique time value, use a default sampling rate
+            fs = 1000
+    
+    signal_data = df['Position'].values
+    
+    peaks, _ = find_peaks(signal_data, prominence=prominence, distance=min_distance)
+    
+    filtered_peaks = []
+    filtered_times = []
+    group_start_time = None
+    group_best_idx = None
+    group_best_value = None
+    
+    for idx in peaks:
+        current_time = df['Time'].iloc[idx]
+        current_value = signal_data[idx]
+        
+        if group_start_time is None:
+            group_start_time = current_time
+            group_best_idx = idx
+            group_best_value = current_value
+            continue
+        
+        if (current_time - group_start_time) < min_time_diff:
+            if current_value > group_best_value:
+                group_best_idx = idx
+                group_best_value = current_value
+        else:
+            filtered_peaks.append(group_best_idx)
+            filtered_times.append(df['Time'].iloc[group_best_idx])
+            
+            group_start_time = current_time
+            group_best_idx = idx
+            group_best_value = current_value
+    
+    if group_best_idx is not None:
+        filtered_peaks.append(group_best_idx)
+        filtered_times.append(df['Time'].iloc[group_best_idx])
+    
+    filtered_peaks = filtered_peaks[:max_bounces]
+    filtered_times = filtered_times[:max_bounces]
+    
+    bounce_heights = df['Position'].iloc[filtered_peaks].values
+    bounce_times = np.array(filtered_times)
+
+    return np.array(filtered_peaks), bounce_heights, bounce_times, signal_data
+
+
+def compute_cor(bounce_heights):
+    """
+    Computes the Coefficient of Restitution (COR) for consecutive bounces:
+      e = sqrt( h_{n+1} / h_n )
+    
+    Parameters:
+        bounce_heights (numpy.ndarray): Array of bounce heights
+        
+    Returns:
+        numpy.ndarray: Array of COR values
+    """
+    cor_values = []
+    for i in range(len(bounce_heights) - 1):
+        if bounce_heights[i] > 0:
+            cor_values.append(np.sqrt(bounce_heights[i+1] / bounce_heights[i]))
+        else:
+            cor_values.append(np.nan)
+    return np.array(cor_values)
+
+
+def process_trial(df, 
+                  initial_height=None,
+                  ball_type=None,
+                  prominence=None, 
+                  min_distance=DEFAULT_MIN_DISTANCE, 
+                  min_time_diff=DEFAULT_MIN_TIME_DIFF,
+                  max_bounces=DEFAULT_MAX_BOUNCES, 
+                  fs=None):
+    """
+    Processes a trial by detecting bounces and computing COR values.
+    
+    Parameters:
+        df (pandas.DataFrame): DataFrame with 'Time' and 'Position' columns
+        initial_height (float): Initial height of the ball
+        ball_type (str): Type of ball ('Golf', 'Lacrosse', or 'Metal')
+        prominence (float): Prominence for peak detection (if None, calculated from initial_height and ball_type)
+        min_distance (int): Minimum distance between peaks
+        min_time_diff (float): Minimum time difference between bounces
+        max_bounces (int): Maximum number of bounces to detect
+        fs (float): Sampling frequency (Hz)
+        
+    Returns:
+        dict: Dictionary containing analysis results
+    """
+    # Calculate prominence if not provided
+    if prominence is None and initial_height is not None and ball_type is not None:
+        prominence = calculate_effective_prominence(initial_height, ball_type)
+    elif prominence is None:
+        prominence = DEFAULT_PROMINENCE
+    
+    # Detect bounces
+    peak_indices, bounce_heights, bounce_times, signal_data = detect_bounces(
+        df, prominence, min_distance, min_time_diff, max_bounces, fs
+    )
+    
+    # Calculate COR values
+    cor_values = compute_cor(bounce_heights)
+    avg_cor = np.mean(cor_values) if cor_values.size > 0 else np.nan
+    
+    # Return results as a dictionary
+    return {
+        'peak_indices': peak_indices,
+        'bounce_heights': bounce_heights,
+        'bounce_times': bounce_times,
+        'cor_values': cor_values,
+        'Average COR': avg_cor,
+        'signal_data': signal_data,
+        'Initial Height': initial_height,
+        'Num Bounces': len(peak_indices)
+    }
+
+
+def calculate_effective_prominence(initial_height, ball_type):
+    """
+    Calculate the effective prominence for bounce detection based on initial height and ball type.
+    
+    Parameters:
+        initial_height (float): Initial height of the ball
+        ball_type (str): Type of ball ('Golf', 'Lacrosse', or 'Metal')
+        
+    Returns:
+        float: Effective prominence value
+    """
+    if ball_type not in BALL_PARAMS:
+        raise ValueError(f"Unknown ball type: {ball_type}. Must be one of: {list(BALL_PARAMS.keys())}")
+    
+    params = BALL_PARAMS[ball_type]
+    
+    # Calculate base prominence
+    effective_prominence = params['relative_prominence'] * initial_height
+    
+    # Apply adjustments based on height
+    if initial_height < params['low_threshold']:
+        effective_prominence *= params['low_adjustment']
+    elif initial_height > params['high_threshold']:
+        effective_prominence *= params['high_adjustment']
+    
+    return effective_prominence
+
+
+def process_trials(file_paths, ball_type, min_distance=DEFAULT_MIN_DISTANCE, 
+                  min_time_diff=DEFAULT_MIN_TIME_DIFF, max_bounces=DEFAULT_MAX_BOUNCES, fs=None):
+    """
+    Process multiple trials and return a summary DataFrame.
+    
+    Parameters:
+        file_paths (dict): Dictionary mapping labels to file paths
+        ball_type (str): Type of ball ('Golf', 'Lacrosse', or 'Metal')
+        min_distance (int): Minimum distance between peaks
+        min_time_diff (float): Minimum time difference between bounces
+        max_bounces (int): Maximum number of bounces to detect
+        fs (float): Sampling frequency (Hz)
+        
+    Returns:
+        pandas.DataFrame: Summary DataFrame with trial information
+    """
+    from src.data.loader import load_trial
+    
+    results = []
+    
+    for init_label, path in file_paths.items():
+        try:
+            initial_height = float(init_label.split()[0])
+        except ValueError:
+            initial_height = np.nan
+        
+        # Compute effective prominence
+        effective_prominence = calculate_effective_prominence(initial_height, ball_type)
+        
+        # Load and process the trial
+        df = load_trial(path)
+        results.append(process_trial(
+            df, initial_height, ball_type, effective_prominence, min_distance, min_time_diff, max_bounces, fs
+        ))
+    
+    summary_df = pd.DataFrame(results)
+    summary_df.sort_values(by='Initial Height', inplace=True)
+    
+    return summary_df