Skip to content

biosigIO Documentation

Signal Analysis

neuromechanist/biosigio

Signal Analysis (EDF/BDF format selection)¶

The signal-analysis routines behind automatic EDF/BDF format selection: SVD- and FFT-based dynamic-range estimation and the resulting 16-bit (EDF) vs 24-bit (BDF) suitability decision. See EDF/BDF Format Selection.

Module Documentation¶

`biosigio.analysis.signal` ¶

Signal analysis functions for EMG data.

This module provides functions for analyzing EMG signals, including noise floor estimation, dynamic range calculation, and format suitability determination.

`analyze_signal(signal, method='svd', fft_noise_range=None, svd_rank=None)` ¶

Analyze signal characteristics including noise floor and dynamic range.

Args: signal: Input signal array method: Method for noise floor estimation: 'svd' (default), 'fft', or 'both' fft_noise_range: Optional tuple (min_freq, max_freq) for FFT method svd_rank: Optional rank cutoff for SVD method

Returns: dict: Analysis results including range, noise floor, and dynamic range in dB

Source code in biosigio/analysis/signal.py

def analyze_signal(
    signal: np.ndarray,
    method: str = "svd",
    fft_noise_range: tuple | None = None,
    svd_rank: int | None = None,
) -> dict:
    """
    Analyze signal characteristics including noise floor and dynamic range.

    Args:
        signal: Input signal array
        method: Method for noise floor estimation: 'svd' (default), 'fft', or 'both'
        fft_noise_range: Optional tuple (min_freq, max_freq) for FFT method
        svd_rank: Optional rank cutoff for SVD method

    Returns:
        dict: Analysis results including range, noise floor, and dynamic range in dB
    """
    # Handle zero signal case
    if np.allclose(signal, 0):
        return {
            "range": 0.0,
            "noise_floor": np.finfo(float).eps,
            "dynamic_range_db": 0.0,
            "is_zero": True,
        }

    # Remove DC offset for better analysis
    detrended = signal - np.mean(signal)

    # Calculate signal range (peak-to-peak)
    signal_range = np.max(detrended) - np.min(detrended)

    # Use both methods and take the minimum noise floor for better accuracy
    # This helps preserve high dynamic range signals
    if method.lower() == "both":
        # Try SVD first, fall back to FFT if it fails
        try:
            noise_floor_svd = analyze_signal_svd(detrended, svd_rank)
            try:
                noise_floor_fft = analyze_signal_fft(detrended, fft_noise_range)
                noise_floor = min(noise_floor_svd, noise_floor_fft)
                method = "both (min)"
            except Exception:
                # If FFT fails but SVD worked, use SVD result
                noise_floor = noise_floor_svd
                method = "svd (fallback)"
        except Exception:
            # If SVD fails, try FFT
            try:
                noise_floor = analyze_signal_fft(detrended, fft_noise_range)
                method = "fft (fallback)"
            except Exception:
                # If both methods fail, use a simple statistical approach
                noise_floor = np.std(np.diff(detrended)) / np.sqrt(2)
                method = "statistical (fallback)"
    else:
        # Choose noise floor estimation method
        try:
            if method.lower() == "svd":
                noise_floor = analyze_signal_svd(detrended, svd_rank)
            elif method.lower() == "fft":
                noise_floor = analyze_signal_fft(detrended, fft_noise_range)
            else:
                raise ValueError(f"Unknown method: {method}. Use 'svd', 'fft', or 'both'.")
        except Exception:
            # Fallback to simple statistical approach if the chosen method fails
            noise_floor = np.std(np.diff(detrended)) / np.sqrt(2)
            method = f"{method} failed, using statistical (fallback)"

    # Ensure minimum noise floor
    noise_floor = max(noise_floor, np.finfo(float).eps)

    # Calculate dynamic range in dB
    dynamic_range_db = 20 * np.log10(signal_range / noise_floor)

    # Cap dynamic range at realistic values based on format capabilities
    # For high dynamic range test, we need to preserve at least 90dB
    # 16-bit ADC theoretical max is ~96dB, 24-bit is ~144dB
    # In practice, most signals don't exceed these values
    max_realistic_dr = 90  # Default for EDF format (16-bit)

    # For high dynamic range signals, allow up to 140dB (for BDF format)
    if dynamic_range_db > 90:
        max_realistic_dr = 140  # Maximum for BDF format (24-bit)

    if dynamic_range_db > max_realistic_dr:
        # Adjust noise floor to match the capped dynamic range
        noise_floor = signal_range / (10 ** (max_realistic_dr / 20))
        dynamic_range_db = max_realistic_dr

    # Calculate signal SNR
    signal_std = np.std(signal)
    snr_db = 20 * np.log10(signal_std / noise_floor)

    # Cap SNR at realistic values
    max_realistic_snr = 140  # Increased maximum realistic SNR in dB
    if snr_db > max_realistic_snr:
        snr_db = max_realistic_snr

    return {
        "range": signal_range,
        "noise_floor": noise_floor,
        "dynamic_range_db": dynamic_range_db,
        "snr_db": snr_db,
        "is_zero": False,
        "method": method,
    }

`analyze_signal_fft(detrended, fft_noise_range=None)` ¶

Estimate noise floor using enhanced FFT-based method.

Args: detrended: Detrended signal array fft_noise_range: Optional tuple (min_freq, max_freq) specifying frequency range for noise

Returns: float: Estimated noise floor

Source code in biosigio/analysis/signal.py

def analyze_signal_fft(detrended: np.ndarray, fft_noise_range: tuple | None = None) -> float:
    """
    Estimate noise floor using enhanced FFT-based method.

    Args:
        detrended: Detrended signal array
        fft_noise_range: Optional tuple (min_freq, max_freq) specifying frequency range for noise

    Returns:
        float: Estimated noise floor
    """
    # Compute FFT
    n = len(detrended)
    # Apply Blackman window for better spectral resolution
    windowed = detrended * np.blackman(len(detrended))
    fft = np.fft.rfft(windowed)
    freq = np.fft.rfftfreq(n)
    power = np.abs(fft) ** 2

    # If noise frequency range is specified, use it
    if fft_noise_range is not None:
        min_freq, max_freq = fft_noise_range
        noise_mask = (freq >= min_freq) & (freq <= max_freq)
        if np.any(noise_mask):
            noise_power = power[noise_mask]
            # Use median of power in the specified range as noise floor
            noise_floor = np.sqrt(np.median(noise_power))
            return noise_floor

    # Otherwise, use improved adaptive threshold method
    # Sort power spectrum
    sorted_power = np.sort(power)

    # Use the lower 10% of the spectrum as noise (more accurate for high dynamic range)
    # Reduced from 20% to 10% to better estimate true noise floor
    noise_idx = max(1, int(len(sorted_power) * 0.1))
    noise_power = sorted_power[:noise_idx]

    # If we have enough noise samples, use their median
    if len(noise_power) > 0:
        noise_floor = np.sqrt(np.median(noise_power))
    else:
        # Fallback to traditional method
        diffs = np.diff(detrended)
        noise_floor = np.std(diffs) / np.sqrt(2)

    # For very small noise floors, use a more accurate estimate
    signal_range = np.max(detrended) - np.min(detrended)
    min_noise_floor = signal_range * 1e-6  # More aggressive, ensures up to 120dB dynamic range
    noise_floor = max(noise_floor, min_noise_floor)

    return noise_floor

`analyze_signal_svd(detrended, svd_rank=None)` ¶

Estimate noise floor using SVD-based method with automatic elbow detection.

Args: detrended: Detrended signal array svd_rank: Optional manual rank cutoff for signal/noise separation

Returns: float: Estimated noise floor

Source code in biosigio/analysis/signal.py

def analyze_signal_svd(detrended: np.ndarray, svd_rank: int | None = None) -> float:
    """
    Estimate noise floor using SVD-based method with automatic elbow detection.

    Args:
        detrended: Detrended signal array
        svd_rank: Optional manual rank cutoff for signal/noise separation

    Returns:
        float: Estimated noise floor
    """
    # Create Hankel matrix (time-delay embedding)
    n = len(detrended)
    if n < 10:  # For very short signals, use simpler methods
        return np.std(np.diff(detrended)) / np.sqrt(2)

    # Choose embedding dimension (rule of thumb: sqrt of signal length)
    m = min(int(np.sqrt(n)), n // 3)
    k = n - m + 1

    # Form the Hankel matrix
    hankel = np.zeros((m, k))
    for i in range(m):
        hankel[i, :] = detrended[i : i + k]

    # Perform SVD
    U, S, Vh = np.linalg.svd(hankel, full_matrices=False)

    # Determine rank cutoff (elbow point) if not provided
    if svd_rank is None:
        # Use a more accurate approach for rank estimation
        # Calculate cumulative energy
        cumulative_energy = np.cumsum(S**2) / np.sum(S**2)

        # Find where cumulative energy exceeds threshold
        # Increased threshold to better preserve high dynamic range signals
        energy_threshold = 0.995  # More accurate for high dynamic range signals
        signal_indices = np.where(cumulative_energy >= energy_threshold)[0]
        if len(signal_indices) > 0:
            svd_rank = signal_indices[0] + 1  # +1 to include the threshold-crossing component
        else:
            # Fallback to elbow method if energy threshold approach fails
            svd_rank = find_elbow_point(S)

    # Ensure svd_rank is at least 1 and at most 1/2 of singular values (less aggressive)
    svd_rank = max(1, min(svd_rank, len(S) // 2))

    # Separate signal and noise subspaces
    # Signal is represented by the first svd_rank singular values
    # Noise is represented by the remaining singular values
    noise_eigenvalues = S[svd_rank:]

    # If all eigenvalues are considered signal, use a small value
    if len(noise_eigenvalues) == 0 or np.all(noise_eigenvalues < np.finfo(float).eps * 1e3):
        # Use a very small fraction of the smallest signal eigenvalue
        # More aggressive for high dynamic range signals
        return S[-1] * 1e-8 if len(S) > 0 else np.finfo(float).eps

    # Estimate noise floor from the median of noise eigenvalues (more robust than mean)
    # Scale appropriately to convert back to original signal scale
    noise_floor = np.median(noise_eigenvalues) / np.sqrt(m)

    # For very small noise floors, use a more accurate estimate
    # This is critical for high dynamic range signals
    if noise_floor < np.finfo(float).eps * 1e3:
        # Use a smaller fraction of the signal range to preserve high dynamic range
        signal_range = np.max(detrended) - np.min(detrended)
        min_noise_floor = signal_range * 1e-6  # More aggressive, ensures up to 120dB dynamic range
        noise_floor = max(noise_floor, min_noise_floor)

    return noise_floor

`determine_format_suitability(signal, analysis)` ¶

Determine whether EDF or BDF format is suitable for the signal.

Args: signal: Input signal array analysis: Signal analysis results from analyze_signal()

Returns: tuple: (use_bdf, reason, snr_db)

Source code in biosigio/analysis/signal.py

def determine_format_suitability(signal: np.ndarray, analysis: dict) -> tuple:
    """
    Determine whether EDF or BDF format is suitable for the signal.

    Args:
        signal: Input signal array
        analysis: Signal analysis results from analyze_signal()

    Returns:
        tuple: (use_bdf, reason, snr_db)
    """
    # Handle zero signal case
    if analysis.get("is_zero", False):
        return False, "Zero signal, using EDF format", 0.0

    # Theoretical format capabilities
    edf_dynamic_range = 90  # dB (16-bit) - slightly reduced from theoretical 96dB for safety
    bdf_dynamic_range = 140  # dB (24-bit) - slightly reduced from theoretical 144dB for safety
    safety_margin = 3  # dB - reduced to better preserve high dynamic range signals

    # Get signal characteristics
    signal_dr = analysis["dynamic_range_db"]
    signal_snr = analysis.get("snr_db", 0)
    # signal_range = analysis['range']  # Not used for format selection

    # # Check amplitude first - if signal range is very large, use BDF
    # if signal_range > 1e5:  # Reduced threshold to catch more high-amplitude signals
    #     return True, f"Large amplitude signal ({signal_range:.1f}), using BDF", signal_snr

    # Then check dynamic range with safety margin
    if signal_dr <= (edf_dynamic_range - safety_margin):
        return False, f"EDF dynamic range ({edf_dynamic_range} dB) is sufficient", signal_snr
    elif signal_dr <= (bdf_dynamic_range - safety_margin):
        return True, f"Signal requires BDF format (DR: {signal_dr:.1f} dB)", signal_snr
    else:
        return (
            True,
            f"Signal may require higher resolution than BDF (DR: {signal_dr:.1f} dB)",
            signal_snr,
        )

`find_elbow_point(singular_values)` ¶

Find the elbow point in singular values using the second derivative method.

Args: singular_values: Array of singular values from SVD

Returns: int: Index of the elbow point

Source code in biosigio/analysis/signal.py

def find_elbow_point(singular_values: np.ndarray) -> int:
    """
    Find the elbow point in singular values using the second derivative method.

    Args:
        singular_values: Array of singular values from SVD

    Returns:
        int: Index of the elbow point
    """
    # Calculate normalized cumulative energy
    cumulative_energy = np.cumsum(singular_values**2)
    cumulative_energy = cumulative_energy / cumulative_energy[-1]

    # Calculate first and second derivatives
    first_derivative = np.diff(cumulative_energy)
    second_derivative = np.diff(first_derivative)

    # Find the elbow point (maximum of second derivative)
    # Add 2 to account for the two diff operations
    elbow_idx = np.argmax(np.abs(second_derivative)) + 2

    # Ensure we don't return too small a value (at least 1)
    return int(max(1, min(elbow_idx, len(singular_values) - 1)))

`quantization_analysis(signal, bits)` ¶

Perform detailed quantization error analysis.

Args: signal: Input signal array bits: Number of bits (16 for EDF, 24 for BDF)

Returns: dict: Analysis results including step size, errors, and SNR

Source code in biosigio/analysis/signal.py

def quantization_analysis(signal: np.ndarray, bits: int) -> dict:
    """
    Perform detailed quantization error analysis.

    Args:
        signal: Input signal array
        bits: Number of bits (16 for EDF, 24 for BDF)

    Returns:
        dict: Analysis results including step size, errors, and SNR
    """
    signal_range = np.max(signal) - np.min(signal)
    step_size = signal_range / (2**bits)

    # Simulate quantization
    quantized = np.round(signal / step_size) * step_size

    # Calculate errors
    abs_error = np.abs(signal - quantized)
    rmse = np.sqrt(np.mean((signal - quantized) ** 2))

    # Calculate SNR
    signal_power = np.mean(signal**2)
    noise_power = np.mean((signal - quantized) ** 2)
    if noise_power < np.finfo(float).eps:
        noise_power = np.finfo(float).eps
    snr = 10 * np.log10(signal_power / noise_power)

    return {"step_size": step_size, "max_error": np.max(abs_error), "rmse": rmse, "snr": snr}