""" Helper functions for the bouncing ball analysis project. """ import os import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator def ensure_directory(directory): """ Ensure that a directory exists, creating it if necessary. Parameters: directory (str): Path to the directory Returns: str: Path to the directory """ os.makedirs(directory, exist_ok=True) return directory def extract_height_from_path(path): """ Extract the initial height from a file path. Parameters: path (str): Path to the file Returns: float: Initial height in inches """ # Extract the filename without extension filename = os.path.basename(path).split('.')[0] # Extract the height value for part in filename.split('_'): try: return float(part) except ValueError: continue # If no height found, return None return None def smooth_data(data, window_size=5): """ Apply a simple moving average to smooth data. Parameters: data (numpy.ndarray): Data to smooth window_size (int): Size of the moving average window Returns: numpy.ndarray: Smoothed data """ return np.convolve(data, np.ones(window_size)/window_size, mode='valid') def calculate_statistics(values): """ Calculate basic statistics for a set of values. Parameters: values (list or numpy.ndarray): Values to analyze Returns: dict: Dictionary containing statistics """ values = np.array(values) return { 'mean': np.mean(values), 'median': np.median(values), 'std': np.std(values), 'min': np.min(values), 'max': np.max(values), 'count': len(values) } def format_statistics(stats): """ Format statistics as a string. Parameters: stats (dict): Dictionary containing statistics Returns: str: Formatted statistics string """ return ( f"Mean: {stats['mean']:.4f}\n" f"Median: {stats['median']:.4f}\n" f"Std Dev: {stats['std']:.4f}\n" f"Min: {stats['min']:.4f}\n" f"Max: {stats['max']:.4f}\n" f"Count: {stats['count']}" ) def create_bar_chart(data, x_label, y_label, title, integer_ticks=True): """ Create a bar chart from data. Parameters: data (dict): Dictionary mapping categories to values x_label (str): Label for the x-axis y_label (str): Label for the y-axis title (str): Title for the chart integer_ticks (bool): Whether to use integer ticks on the y-axis Returns: matplotlib.figure.Figure: The figure object """ fig, ax = plt.subplots(figsize=(10, 6)) categories = list(data.keys()) values = list(data.values()) bars = ax.bar(categories, values, color='skyblue', edgecolor='black') # Add value labels on top of bars for bar in bars: height = bar.get_height() ax.text( bar.get_x() + bar.get_width() / 2., height + 0.02 * max(values), f'{height:.3f}', ha='center', va='bottom', fontsize=10 ) ax.set_xlabel(x_label, fontsize=12) ax.set_ylabel(y_label, fontsize=12) ax.set_title(title, fontsize=14, pad=15) if integer_ticks: ax.yaxis.set_major_locator(MaxNLocator(integer=True)) plt.tight_layout() return fig def save_dataframe_to_csv(df, filename, index=False): """ Save a DataFrame to a CSV file. Parameters: df (pandas.DataFrame): DataFrame to save filename (str): Path to the output file index (bool): Whether to include the index in the output Returns: str: Path to the saved file """ # Ensure the directory exists directory = os.path.dirname(filename) if directory: ensure_directory(directory) # Save the DataFrame df.to_csv(filename, index=index) return filename