Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 6 additions & 31 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
from numba import cuda, njit, prange
from scipy import linalg
from scipy.ndimage import maximum_filter1d, minimum_filter1d
from scipy.signal import convolve
from scipy.spatial.distance import cdist

from . import config
from . import config, sdp

try:
from numba.cuda.cudadrv.driver import _raise_driver_not_found
Expand Down Expand Up @@ -652,7 +651,8 @@ def check_window_size(m, max_size=None, n=None):
@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _sliding_dot_product(Q, T):
"""
A Numba JIT-compiled implementation of the sliding window dot product.
A wrapper for numba JIT-compiled implementation of
the sliding dot product.

Parameters
----------
Expand All @@ -667,18 +667,12 @@ def _sliding_dot_product(Q, T):
out : numpy.ndarray
Sliding dot product between `Q` and `T`.
"""
m = Q.shape[0]
l = T.shape[0] - m + 1
out = np.empty(l)
for i in range(l):
out[i] = np.dot(Q, T[i : i + m])

return out
return sdp._njit_sliding_dot_product(Q, T)


def sliding_dot_product(Q, T):
"""
Use FFT convolution to calculate the sliding window dot product.
A wrapper for convolution implementation of the sliding dot product.

Parameters
----------
Expand All @@ -692,27 +686,8 @@ def sliding_dot_product(Q, T):
-------
output : numpy.ndarray
Sliding dot product between `Q` and `T`.

Notes
-----
Calculate the sliding dot product

`DOI: 10.1109/ICDM.2016.0179 \
<https://www.cs.ucr.edu/~eamonn/PID4481997_extend_Matrix%20Profile_I.pdf>`__

See Table I, Figure 4

Following the inverse FFT, Fig. 4 states that only cells [m-1:n]
contain valid dot products

Padding is done automatically in fftconvolve step
"""
n = T.shape[0]
m = Q.shape[0]
Qr = np.flipud(Q) # Reverse/flip Q
QT = convolve(Qr, T)

return QT.real[m - 1 : n]
return sdp._convolve_sliding_dot_product(Q, T)


@njit(
Expand Down
292 changes: 292 additions & 0 deletions stumpy/sdp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
import numpy as np
from numba import njit
from scipy.fft import next_fast_len
from scipy.fft._pocketfft.basic import c2r, r2c
from scipy.signal import convolve

from . import config

try: # pragma: no cover
import pyfftw

FFTW_IS_AVAILABLE = True
except ImportError: # pragma: no cover
FFTW_IS_AVAILABLE = False


@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _njit_sliding_dot_product(Q, T):
"""
A Numba JIT-compiled implementation of the sliding dot product.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence

T : numpy.ndarray
Time series or sequence

Returns
-------
out : numpy.ndarray
Sliding dot product between `Q` and `T`.
"""
m = len(Q)
l = T.shape[0] - m + 1
out = np.empty(l)
for i in range(l):
result = 0.0
for j in range(m):
result += Q[j] * T[i + j]
out[i] = result

return out


def _convolve_sliding_dot_product(Q, T):
"""
Use FFT or direct convolution to calculate the sliding dot product.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence

T : numpy.ndarray
Time series or sequence

Returns
-------
output : numpy.ndarray
Sliding dot product between `Q` and `T`.

Notes
-----
Calculate the sliding dot product

`DOI: 10.1109/ICDM.2016.0179 \
<https://www.cs.ucr.edu/~eamonn/PID4481997_extend_Matrix%20Profile_I.pdf>`__

See Table I, Figure 4
"""
# mode='valid' returns output of convolution where the two
# sequences fully overlap.

return convolve(np.flipud(Q), T, mode="valid")


def _pocketfft_sliding_dot_product(Q, T):
"""
Use scipy.fft._pocketfft to compute
the sliding dot product.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence

T : numpy.ndarray
Time series or sequence

Returns
-------
output : numpy.ndarray
Sliding dot product between `Q` and `T`.
"""
n = len(T)
m = len(Q)
next_fast_n = next_fast_len(n, real=True)

tmp = np.empty((2, next_fast_n))
tmp[0, :m] = Q[::-1]
tmp[0, m:] = 0.0
tmp[1, :n] = T
tmp[1, n:] = 0.0
fft_2d = r2c(True, tmp, axis=-1)

return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n]


class _PYFFTW_SLIDING_DOT_PRODUCT: # pragma: no cover
"""
A class to compute the sliding dot product using FFTW via pyfftw.

This class uses FFTW (via pyfftw) to efficiently compute the sliding dot product
between a query sequence Q and a time series T. It preallocates arrays and caches
FFTW objects to optimize repeated computations with similar-sized inputs.

Parameters
----------
max_n : int, default=2**20
Maximum length to preallocate arrays for. This will be the size of the
real-valued array. A complex-valued array of size `1 + (max_n // 2)`
will also be preallocated. If inputs exceed this size, arrays will be
reallocated to accommodate larger sizes.

Attributes
----------
real_arr : pyfftw.empty_aligned
Preallocated real-valued array for FFTW computations.

complex_arr : pyfftw.empty_aligned
Preallocated complex-valued array for FFTW computations.

rfft_objects : dict
Cache of FFTW forward transform objects, keyed by
(next_fast_n, n_threads, planning_flag).

irfft_objects : dict
Cache of FFTW inverse transform objects, keyed by
(next_fast_n, n_threads, planning_flag).

Notes
-----
The class maintains internal caches of FFTW objects to avoid redundant planning
operations when called multiple times with similar-sized inputs and parameters.

Examples
--------
>>> sdp_obj = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=1000)
>>> Q = np.array([1, 2, 3])
>>> T = np.array([4, 5, 6, 7, 8])
>>> result = sdp_obj(Q, T)

References
----------
`FFTW documentation <http://www.fftw.org/>`__

`pyfftw documentation <https://pyfftw.readthedocs.io/>`__
"""

def __init__(self, max_n=2**20):
"""
Initialize the `_PYFFTW_SLIDING_DOT_PRODUCT` object, which can be called
to compute the sliding dot product using FFTW via pyfftw.

Parameters
----------
max_n : int, default=2**20
Maximum length to preallocate arrays for. This will be the size of the
real-valued array. A complex-valued array of size `1 + (max_n // 2)`
will also be preallocated.

Returns
-------
None
"""
# Preallocate arrays
self.real_arr = pyfftw.empty_aligned(max_n, dtype="float64")
self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype="complex128")

# Store FFTW objects, keyed by (next_fast_n, n_threads, planning_flag)
self.rfft_objects = {}
self.irfft_objects = {}

def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"):
"""
Compute the sliding dot product between `Q` and `T` using FFTW via pyfftw,
and cache FFTW objects if not already cached.

Parameters
----------
Q : numpy.ndarray
Query array or subsequence.

T : numpy.ndarray
Time series or sequence.

n_threads : int, default=1
Number of threads to use for FFTW computations.

planning_flag : str, default="FFTW_ESTIMATE"
The planning flag that will be used in FFTW for planning.
See pyfftw documentation for details. Current options, ordered
ascendingly by the level of aggressiveness in planning, are:
"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", and "FFTW_EXHAUSTIVE".
The more aggressive the planning, the longer the planning time, but
the faster the execution time.

Returns
-------
out : numpy.ndarray
Sliding dot product between `Q` and `T`.

Notes
-----
The planning_flag is defaulted to "FFTW_ESTIMATE" to be aligned with
MATLAB's FFTW usage (as of version R2025b)
See: https://www.mathworks.com/help/matlab/ref/fftw.html

This implementation is inspired by the answer on StackOverflow:
https://stackoverflow.com/a/30615425/2955541
"""
m = Q.shape[0]
n = T.shape[0]
next_fast_n = pyfftw.next_fast_len(n)

# Update preallocated arrays if needed
if next_fast_n > len(self.real_arr):
self.real_arr = pyfftw.empty_aligned(next_fast_n, dtype="float64")
self.complex_arr = pyfftw.empty_aligned(
1 + (next_fast_n // 2), dtype="complex128"
)

real_arr = self.real_arr[:next_fast_n]
complex_arr = self.complex_arr[: 1 + (next_fast_n // 2)]

# Get or create FFTW objects
key = (next_fast_n, n_threads, planning_flag)

rfft_obj = self.rfft_objects.get(key, None)
if rfft_obj is None:
rfft_obj = pyfftw.FFTW(
input_array=real_arr,
output_array=complex_arr,
direction="FFTW_FORWARD",
flags=(planning_flag,),
threads=n_threads,
)
self.rfft_objects[key] = rfft_obj
else:
rfft_obj.update_arrays(real_arr, complex_arr)

irfft_obj = self.irfft_objects.get(key, None)
if irfft_obj is None:
irfft_obj = pyfftw.FFTW(
input_array=complex_arr,
output_array=real_arr,
direction="FFTW_BACKWARD",
flags=(planning_flag, "FFTW_DESTROY_INPUT"),
threads=n_threads,
)
self.irfft_objects[key] = irfft_obj
else:
irfft_obj.update_arrays(complex_arr, real_arr)

# RFFT(T)
real_arr[:n] = T
real_arr[n:] = 0.0
rfft_obj.execute() # output is in complex_arr
complex_arr_T = complex_arr.copy()

# RFFT(Q)
# Scale by 1/next_fast_n to account for
# FFTW's unnormalized inverse FFT via execute()
np.multiply(Q[::-1], 1.0 / next_fast_n, out=real_arr[:m])
real_arr[m:] = 0.0
rfft_obj.execute() # output is in complex_arr

# RFFT(T) * RFFT(Q)
np.multiply(complex_arr, complex_arr_T, out=complex_arr)

# IRFFT (input is in complex_arr)
irfft_obj.execute() # output is in real_arr

return real_arr[m - 1 : n]


if FFTW_IS_AVAILABLE: # pragma: no cover
_pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT()
else: # pragma: no cover
_pyfftw_sliding_dot_product = None
Loading