Skip to content

Signal Analysis (EDF/BDF format selection)

The signal-analysis routines behind automatic EDF/BDF format selection: SVD- and FFT-based dynamic-range estimation and the resulting 16-bit (EDF) vs 24-bit (BDF) suitability decision. See EDF/BDF Format Selection.

Module Documentation

biosigio.analysis.signal

Signal analysis functions for EMG data.

This module provides functions for analyzing EMG signals, including noise floor estimation, dynamic range calculation, and format suitability determination.

analyze_signal(signal, method='svd', fft_noise_range=None, svd_rank=None)

Analyze signal characteristics including noise floor and dynamic range.

Args: signal: Input signal array method: Method for noise floor estimation: 'svd' (default), 'fft', or 'both' fft_noise_range: Optional tuple (min_freq, max_freq) for FFT method svd_rank: Optional rank cutoff for SVD method

Returns: dict: Analysis results including range, noise floor, and dynamic range in dB

Source code in biosigio/analysis/signal.py
def analyze_signal(
    signal: np.ndarray,
    method: str = "svd",
    fft_noise_range: tuple | None = None,
    svd_rank: int | None = None,
) -> dict:
    """
    Analyze signal characteristics including noise floor and dynamic range.

    Args:
        signal: Input signal array
        method: Method for noise floor estimation: 'svd' (default), 'fft', or 'both'
        fft_noise_range: Optional tuple (min_freq, max_freq) for FFT method
        svd_rank: Optional rank cutoff for SVD method

    Returns:
        dict: Analysis results including range, noise floor, and dynamic range in dB
    """
    # Handle zero signal case
    if np.allclose(signal, 0):
        return {
            "range": 0.0,
            "noise_floor": np.finfo(float).eps,
            "dynamic_range_db": 0.0,
            "is_zero": True,
        }

    # Remove DC offset for better analysis
    detrended = signal - np.mean(signal)

    # Calculate signal range (peak-to-peak)
    signal_range = np.max(detrended) - np.min(detrended)

    # Use both methods and take the minimum noise floor for better accuracy
    # This helps preserve high dynamic range signals
    if method.lower() == "both":
        # Try SVD first, fall back to FFT if it fails
        try:
            noise_floor_svd = analyze_signal_svd(detrended, svd_rank)
            try:
                noise_floor_fft = analyze_signal_fft(detrended, fft_noise_range)
                noise_floor = min(noise_floor_svd, noise_floor_fft)
                method = "both (min)"
            except Exception:
                # If FFT fails but SVD worked, use SVD result
                noise_floor = noise_floor_svd
                method = "svd (fallback)"
        except Exception:
            # If SVD fails, try FFT
            try:
                noise_floor = analyze_signal_fft(detrended, fft_noise_range)
                method = "fft (fallback)"
            except Exception:
                # If both methods fail, use a simple statistical approach
                noise_floor = np.std(np.diff(detrended)) / np.sqrt(2)
                method = "statistical (fallback)"
    else:
        # Choose noise floor estimation method
        try:
            if method.lower() == "svd":
                noise_floor = analyze_signal_svd(detrended, svd_rank)
            elif method.lower() == "fft":
                noise_floor = analyze_signal_fft(detrended, fft_noise_range)
            else:
                raise ValueError(f"Unknown method: {method}. Use 'svd', 'fft', or 'both'.")
        except Exception:
            # Fallback to simple statistical approach if the chosen method fails
            noise_floor = np.std(np.diff(detrended)) / np.sqrt(2)
            method = f"{method} failed, using statistical (fallback)"

    # Ensure minimum noise floor
    noise_floor = max(noise_floor, np.finfo(float).eps)

    # Calculate dynamic range in dB
    dynamic_range_db = 20 * np.log10(signal_range / noise_floor)

    # Cap dynamic range at realistic values based on format capabilities
    # For high dynamic range test, we need to preserve at least 90dB
    # 16-bit ADC theoretical max is ~96dB, 24-bit is ~144dB
    # In practice, most signals don't exceed these values
    max_realistic_dr = 90  # Default for EDF format (16-bit)

    # For high dynamic range signals, allow up to 140dB (for BDF format)
    if dynamic_range_db > 90:
        max_realistic_dr = 140  # Maximum for BDF format (24-bit)

    if dynamic_range_db > max_realistic_dr:
        # Adjust noise floor to match the capped dynamic range
        noise_floor = signal_range / (10 ** (max_realistic_dr / 20))
        dynamic_range_db = max_realistic_dr

    # Calculate signal SNR
    signal_std = np.std(signal)
    snr_db = 20 * np.log10(signal_std / noise_floor)

    # Cap SNR at realistic values
    max_realistic_snr = 140  # Increased maximum realistic SNR in dB
    if snr_db > max_realistic_snr:
        snr_db = max_realistic_snr

    return {
        "range": signal_range,
        "noise_floor": noise_floor,
        "dynamic_range_db": dynamic_range_db,
        "snr_db": snr_db,
        "is_zero": False,
        "method": method,
    }

analyze_signal_fft(detrended, fft_noise_range=None)

Estimate noise floor using enhanced FFT-based method.

Args: detrended: Detrended signal array fft_noise_range: Optional tuple (min_freq, max_freq) specifying frequency range for noise

Returns: float: Estimated noise floor

Source code in biosigio/analysis/signal.py
def analyze_signal_fft(detrended: np.ndarray, fft_noise_range: tuple | None = None) -> float:
    """
    Estimate noise floor using enhanced FFT-based method.

    Args:
        detrended: Detrended signal array
        fft_noise_range: Optional tuple (min_freq, max_freq) specifying frequency range for noise

    Returns:
        float: Estimated noise floor
    """
    # Compute FFT
    n = len(detrended)
    # Apply Blackman window for better spectral resolution
    windowed = detrended * np.blackman(len(detrended))
    fft = np.fft.rfft(windowed)
    freq = np.fft.rfftfreq(n)
    power = np.abs(fft) ** 2

    # If noise frequency range is specified, use it
    if fft_noise_range is not None:
        min_freq, max_freq = fft_noise_range
        noise_mask = (freq >= min_freq) & (freq <= max_freq)
        if np.any(noise_mask):
            noise_power = power[noise_mask]
            # Use median of power in the specified range as noise floor
            noise_floor = np.sqrt(np.median(noise_power))
            return noise_floor

    # Otherwise, use improved adaptive threshold method
    # Sort power spectrum
    sorted_power = np.sort(power)

    # Use the lower 10% of the spectrum as noise (more accurate for high dynamic range)
    # Reduced from 20% to 10% to better estimate true noise floor
    noise_idx = max(1, int(len(sorted_power) * 0.1))
    noise_power = sorted_power[:noise_idx]

    # If we have enough noise samples, use their median
    if len(noise_power) > 0:
        noise_floor = np.sqrt(np.median(noise_power))
    else:
        # Fallback to traditional method
        diffs = np.diff(detrended)
        noise_floor = np.std(diffs) / np.sqrt(2)

    # For very small noise floors, use a more accurate estimate
    signal_range = np.max(detrended) - np.min(detrended)
    min_noise_floor = signal_range * 1e-6  # More aggressive, ensures up to 120dB dynamic range
    noise_floor = max(noise_floor, min_noise_floor)

    return noise_floor

analyze_signal_svd(detrended, svd_rank=None)

Estimate noise floor using SVD-based method with automatic elbow detection.

Args: detrended: Detrended signal array svd_rank: Optional manual rank cutoff for signal/noise separation

Returns: float: Estimated noise floor

Source code in biosigio/analysis/signal.py
def analyze_signal_svd(detrended: np.ndarray, svd_rank: int | None = None) -> float:
    """
    Estimate noise floor using SVD-based method with automatic elbow detection.

    Args:
        detrended: Detrended signal array
        svd_rank: Optional manual rank cutoff for signal/noise separation

    Returns:
        float: Estimated noise floor
    """
    # Create Hankel matrix (time-delay embedding)
    n = len(detrended)
    if n < 10:  # For very short signals, use simpler methods
        return np.std(np.diff(detrended)) / np.sqrt(2)

    # Choose embedding dimension (rule of thumb: sqrt of signal length)
    m = min(int(np.sqrt(n)), n // 3)
    k = n - m + 1

    # Form the Hankel matrix
    hankel = np.zeros((m, k))
    for i in range(m):
        hankel[i, :] = detrended[i : i + k]

    # Perform SVD
    U, S, Vh = np.linalg.svd(hankel, full_matrices=False)

    # Determine rank cutoff (elbow point) if not provided
    if svd_rank is None:
        # Use a more accurate approach for rank estimation
        # Calculate cumulative energy
        cumulative_energy = np.cumsum(S**2) / np.sum(S**2)

        # Find where cumulative energy exceeds threshold
        # Increased threshold to better preserve high dynamic range signals
        energy_threshold = 0.995  # More accurate for high dynamic range signals
        signal_indices = np.where(cumulative_energy >= energy_threshold)[0]
        if len(signal_indices) > 0:
            svd_rank = signal_indices[0] + 1  # +1 to include the threshold-crossing component
        else:
            # Fallback to elbow method if energy threshold approach fails
            svd_rank = find_elbow_point(S)

    # Ensure svd_rank is at least 1 and at most 1/2 of singular values (less aggressive)
    svd_rank = max(1, min(svd_rank, len(S) // 2))

    # Separate signal and noise subspaces
    # Signal is represented by the first svd_rank singular values
    # Noise is represented by the remaining singular values
    noise_eigenvalues = S[svd_rank:]

    # If all eigenvalues are considered signal, use a small value
    if len(noise_eigenvalues) == 0 or np.all(noise_eigenvalues < np.finfo(float).eps * 1e3):
        # Use a very small fraction of the smallest signal eigenvalue
        # More aggressive for high dynamic range signals
        return S[-1] * 1e-8 if len(S) > 0 else np.finfo(float).eps

    # Estimate noise floor from the median of noise eigenvalues (more robust than mean)
    # Scale appropriately to convert back to original signal scale
    noise_floor = np.median(noise_eigenvalues) / np.sqrt(m)

    # For very small noise floors, use a more accurate estimate
    # This is critical for high dynamic range signals
    if noise_floor < np.finfo(float).eps * 1e3:
        # Use a smaller fraction of the signal range to preserve high dynamic range
        signal_range = np.max(detrended) - np.min(detrended)
        min_noise_floor = signal_range * 1e-6  # More aggressive, ensures up to 120dB dynamic range
        noise_floor = max(noise_floor, min_noise_floor)

    return noise_floor

determine_format_suitability(signal, analysis)

Determine whether EDF or BDF format is suitable for the signal.

Args: signal: Input signal array analysis: Signal analysis results from analyze_signal()

Returns: tuple: (use_bdf, reason, snr_db)

Source code in biosigio/analysis/signal.py
def determine_format_suitability(signal: np.ndarray, analysis: dict) -> tuple:
    """
    Determine whether EDF or BDF format is suitable for the signal.

    Args:
        signal: Input signal array
        analysis: Signal analysis results from analyze_signal()

    Returns:
        tuple: (use_bdf, reason, snr_db)
    """
    # Handle zero signal case
    if analysis.get("is_zero", False):
        return False, "Zero signal, using EDF format", 0.0

    # Theoretical format capabilities
    edf_dynamic_range = 90  # dB (16-bit) - slightly reduced from theoretical 96dB for safety
    bdf_dynamic_range = 140  # dB (24-bit) - slightly reduced from theoretical 144dB for safety
    safety_margin = 3  # dB - reduced to better preserve high dynamic range signals

    # Get signal characteristics
    signal_dr = analysis["dynamic_range_db"]
    signal_snr = analysis.get("snr_db", 0)
    # signal_range = analysis['range']  # Not used for format selection

    # # Check amplitude first - if signal range is very large, use BDF
    # if signal_range > 1e5:  # Reduced threshold to catch more high-amplitude signals
    #     return True, f"Large amplitude signal ({signal_range:.1f}), using BDF", signal_snr

    # Then check dynamic range with safety margin
    if signal_dr <= (edf_dynamic_range - safety_margin):
        return False, f"EDF dynamic range ({edf_dynamic_range} dB) is sufficient", signal_snr
    elif signal_dr <= (bdf_dynamic_range - safety_margin):
        return True, f"Signal requires BDF format (DR: {signal_dr:.1f} dB)", signal_snr
    else:
        return (
            True,
            f"Signal may require higher resolution than BDF (DR: {signal_dr:.1f} dB)",
            signal_snr,
        )

find_elbow_point(singular_values)

Find the elbow point in singular values using the second derivative method.

Args: singular_values: Array of singular values from SVD

Returns: int: Index of the elbow point

Source code in biosigio/analysis/signal.py
def find_elbow_point(singular_values: np.ndarray) -> int:
    """
    Find the elbow point in singular values using the second derivative method.

    Args:
        singular_values: Array of singular values from SVD

    Returns:
        int: Index of the elbow point
    """
    # Calculate normalized cumulative energy
    cumulative_energy = np.cumsum(singular_values**2)
    cumulative_energy = cumulative_energy / cumulative_energy[-1]

    # Calculate first and second derivatives
    first_derivative = np.diff(cumulative_energy)
    second_derivative = np.diff(first_derivative)

    # Find the elbow point (maximum of second derivative)
    # Add 2 to account for the two diff operations
    elbow_idx = np.argmax(np.abs(second_derivative)) + 2

    # Ensure we don't return too small a value (at least 1)
    return int(max(1, min(elbow_idx, len(singular_values) - 1)))

quantization_analysis(signal, bits)

Perform detailed quantization error analysis.

Args: signal: Input signal array bits: Number of bits (16 for EDF, 24 for BDF)

Returns: dict: Analysis results including step size, errors, and SNR

Source code in biosigio/analysis/signal.py
def quantization_analysis(signal: np.ndarray, bits: int) -> dict:
    """
    Perform detailed quantization error analysis.

    Args:
        signal: Input signal array
        bits: Number of bits (16 for EDF, 24 for BDF)

    Returns:
        dict: Analysis results including step size, errors, and SNR
    """
    signal_range = np.max(signal) - np.min(signal)
    step_size = signal_range / (2**bits)

    # Simulate quantization
    quantized = np.round(signal / step_size) * step_size

    # Calculate errors
    abs_error = np.abs(signal - quantized)
    rmse = np.sqrt(np.mean((signal - quantized) ** 2))

    # Calculate SNR
    signal_power = np.mean(signal**2)
    noise_power = np.mean((signal - quantized) ** 2)
    if noise_power < np.finfo(float).eps:
        noise_power = np.finfo(float).eps
    snr = 10 * np.log10(signal_power / noise_power)

    return {"step_size": step_size, "max_error": np.max(abs_error), "rmse": rmse, "snr": snr}