Base Calculator¤

`flatten(input: dict | list | str) -> Iterable` ¤

Yield items from any nested iterable.

Parameters:

Name	Type	Description	Default
`input`	`dict \| list \| str`	A list of lists or dicts.	required

Yields:

Name	Type	Description
`Iterable`	`Iterable`	Items from the nested iterable.

Notes

See https://stackoverflow.com/a/40857703.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def flatten(input: dict | list | str) -> Iterable:
    """Yield items from any nested iterable.

    Args:
        input (dict | list | str): A list of lists or dicts.

    Yields:
        Iterable: Items from the nested iterable.

    Notes:
        See https://stackoverflow.com/a/40857703.
    """
    for x in input:
        if isinstance(x, Iterable) and not isinstance(x, str):
            if isinstance(x, list):
                for sub_x in flatten(x):
                    yield sub_x
            elif isinstance(x, dict):
                yield list(x.values())[0]
        else:
            yield x

`regex_escape(s: str) -> str` ¤

Escape only regex special characters.

Parameters:

Name	Type	Description	Default
`s`	`str`	A string.	required

Returns:

Type	Description
`str`	An escaped string.

Note

See https://stackoverflow.com/a/78136529/22853742.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def regex_escape(s: str) -> str:
    """Escape only regex special characters.

    Args:
        s (str): A string.

    Returns:
        An escaped string.

    Note:
        See https://stackoverflow.com/a/78136529/22853742.
    """
    if isinstance(s, bytes):
        return re.sub(rb"[][(){}?*+.^$]", lambda m: b"\\" + m.group(), s)
    return re.sub(r"[][(){}?*+.^$]", lambda m: "\\" + m.group(), s)

`spacy_rule_to_lower(patterns: dict | list[dict], old_key: Optional[list[str] | str] = ['TEXT', 'ORTH'], new_key: Optional[str] = 'LOWER') -> list` ¤

Convert spacy Rule Matcher patterns to lowercase.

Parameters:

Name	Type	Description	Default
`patterns`	`dict \| list[dict]`	A list of spacy Rule Matcher patterns.	required
`old_key`	`list[str] \| str`	A dictionary key or list of keys to rename.	`['TEXT', 'ORTH']`
`new_key`	`Optional[str]`	The new key name.	`'LOWER'`

Returns:

Type	Description
`list`	A list of spacy Rule Matcher patterns

Source code in lexos/rolling_windows/calculators/base_calculator.py

def spacy_rule_to_lower(
    patterns: dict | list[dict],
    old_key: Optional[list[str] | str] = ["TEXT", "ORTH"],
    new_key: Optional[str] = "LOWER",
) -> list:
    """Convert spacy Rule Matcher patterns to lowercase.

    Args:
        patterns (dict | list[dict]): A list of spacy Rule Matcher patterns.
        old_key (list[str] | str): A dictionary key or list of keys to rename.
        new_key (Optional[str]): The new key name.

    Returns:
        A list of spacy Rule Matcher patterns
    """

    def convert(key):
        """Converts the key to lowercase."""
        if key in old_key:
            return new_key
        else:
            return key

    if isinstance(patterns, dict):
        new_dict = {}
        for key, value in patterns.items():
            key = convert(key)
            new_dict[key] = value
        return new_dict

    if isinstance(patterns, list):
        new_list = []
        for value in patterns:
            new_list.append(spacy_rule_to_lower(value))
        return new_list

`BaseCalculator` `pydantic-model` ¤

Bases: ABC, BaseModel

An abstract base class for calculators.

Fields:

patterns (Optional[list | str])
windows (Optional[Windows])
mode (Optional[bool | str])
case_sensitive (Optional[bool])
alignment_mode (Optional[str])
model (Optional[str])
nlp (Optional[Language])
data (Optional[list])

Source code in lexos/rolling_windows/calculators/base_calculator.py

class BaseCalculator(ABC, BaseModel):
    """An abstract base class for calculators."""

    id: ClassVar[str] = "base_calculator"

    patterns: Optional[list | str] = Field(
        default=None, description="A pattern or list of patterns to search in windows."
    )
    windows: Optional[Windows] = Field(
        default=None, description="A Windows object containing the windows to search."
    )
    mode: Optional[bool | str] = Field(
        default="exact",
        description="The search method to use ('regex', 'spacy_rule', 'multi_token', 'multi_token_exact').",
    )
    case_sensitive: Optional[bool] = Field(
        default=False, description="Whether to make searches case-sensitive."
    )
    alignment_mode: Optional[str] = Field(
        default="strict",
        description="Whether to snap searches to token boundaries. Values are 'strict', 'contract', and 'expand'.",
    )
    model: Optional[str] = Field(
        default="xx_sent_ud_sm",
        description="The language model to be used for searching spaCy tokens/spans.",
    )
    nlp: Optional[Language] = Field(default=None, description="The spaCy nlp object.")
    data: Optional[list] = Field(
        default=[], description="A container for the calculated data."
    )

    model_config = validation_config

    @property
    def metadata(self) -> dict:
        """Return metadata for the calculator."""
        # Note: model_dump() may evaluate computed fields on this model.
        # If any computed properties rely on external state or are expensive
        # to compute, accessing them via model_dump() could raise or cause
        # performance issues. Subclasses should ensure their computed fields
        # are safe to evaluate here or override this property to exclude
        # such fields explicitly (e.g., model_dump(exclude=[...])).
        #
        # Historically, this property intentionally did not return the
        # result of `model_dump()`; it was used to ensure that validators or
        # other side effects ran without exposing the entire dict to the
        # caller. Maintain that behavior by calling `model_dump()` but not
        # returning the value. If you need the metadata dict, override this
        # property in subclasses to return it explicitly.
        self.model_dump()

    @property
    def n(self):
        """Get the number of units per window."""
        if self.windows.n is not None:
            return self.windows.n
        return None

    @property
    def regex_flags(self):
        """Return regex flags based on case_sensitive setting."""
        if not self.case_sensitive:
            return re.IGNORECASE | re.UNICODE
        else:
            return re.UNICODE

    @property
    def window_type(self):
        """Get the type of units in the windows."""
        if self.windows.window_type is not None:
            return self.windows.window_type
        return None

    @abstractmethod
    def __call__(self, *args, **kwargs):
        """Call the instance."""
        ...

    def _count_character_patterns_in_character_windows(
        self, window: str, pattern: str
    ) -> int:
        """Use Python count() to count exact character matches in a character window.

        Args:
            window (str): A string window.
            pattern (str): A string pattern to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        if self.mode == "regex":
            return len(re.findall(pattern, window, self.regex_flags))
        else:
            if not self.case_sensitive:
                window = window.lower()
                pattern = pattern.lower()
            return window.count(pattern)

    def _count_in_character_window(self, window: str, pattern: str) -> int:
        """Choose function for counting matches in character windows.

        Args:
            window (str): A string window.
            pattern (str): A string pattern to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        if self.mode in ["exact", "regex"]:
            return self._count_character_patterns_in_character_windows(window, pattern)
        else:
            raise LexosException("Invalid mode for character windows.")

    def _count_token_patterns_in_token_lists(
        self, window: list[str], pattern: str
    ) -> int:
        """Count patterns in lists of token strings.

        Args:
            window (list[str]): A window consisting of a list of strings.
            pattern (str): A string pattern to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        if self.mode == "regex":
            return sum(
                [len(re.findall(pattern, token, self.regex_flags)) for token in window]
            )
        else:
            if not self.case_sensitive:
                window = [token.lower() for token in window]
                pattern = pattern.lower()
            return window.count(pattern)

    def _count_token_patterns_in_span(self, window: Span, pattern: list | str) -> int:
        """Count patterns in spans or docs.

        Args:
            window (Span): A window consisting of a list of spaCy spans or a spaCy doc.
            pattern (list | str): A string pattern or spaCy rule to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        if self.mode == "exact":
            if not self.case_sensitive:
                window = [token.lower_ for token in window]
                pattern = pattern.lower()
            else:
                window = [token.text for token in window]
            return window.count(pattern)
        elif self.mode == "regex":
            return sum(
                [
                    len(re.findall(pattern, token.text, self.regex_flags))
                    for token in window
                ]
            )
        elif self.mode == "spacy_rule":
            if not self.case_sensitive:
                pattern = spacy_rule_to_lower(pattern)
            matcher = Matcher(self.nlp.vocab)
            matcher.add("Pattern", [pattern])
            return len(matcher(window))

    def _count_token_patterns_in_span_text(self, window: Span, pattern: str) -> int:
        """Count patterns in span or doc text with token alignment.

        Args:
            window (Span): A Span window.
            pattern (str): A string pattern to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        count = 0
        if self.mode == "multi_token_exact":
            pattern = regex_escape(pattern)
        for match in re.finditer(pattern, window.text, self.regex_flags):
            start, end = match.span()
            span = window.char_span(start, end, self.alignment_mode)
            if span is not None:
                count += 1
        return count

    def _count_in_token_window(
        self, window: list[str] | list[Token] | Doc | Span, pattern: list | str
    ) -> int:
        """Choose function for counting matches in token windows.

        Args:
            window (list[str] | Span): A window consisting of a list of token strings, a list of spaCy spans, or a spaCy doc.
            pattern (list | str): A string pattern or spaCy rule to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        # Validate window type for multi_token and spacy_rule modes
        if self.mode in ["multi_token", "spacy_rule"]:
            if not isinstance(window, (Doc, Span)):
                raise LexosException(
                    "You cannot use spaCy rules or perform multi-token searches with a string or list of token strings."
                )
        if isinstance(window, (list)) and self.mode in ["multi_token", "spacy_rule"]:
            raise LexosException(
                "You cannot use spaCy rules or perform multi-token searches with a string or list of token strings."
            )
        elif isinstance(window, list) and all(isinstance(i, str) for i in window):
            # Match in single tokens
            return self._count_token_patterns_in_token_lists(window, pattern)
        elif isinstance(window, Doc | Span):
            # Iterate over the full text with token boundary alignment
            if self.mode.startswith("multi_token"):
                return self._count_token_patterns_in_span_text(window, pattern)
            # Match in single tokens
            else:
                return self._count_token_patterns_in_span(window, pattern)

    def _extract_string_pattern(self, pattern: list[list[dict[str, Any]]]) -> str:
        """Extract a string pattern from a spaCy rule.

        Args:
            pattern (list[list[dict[str, Any]]]): A list of spaCy rule patterns to search.

        Returns:
            str: A string pattern.
        """
        return "|".join(
            [
                item if isinstance(item, str) else list(item.values())[0]
                for item in list(flatten(pattern))
            ]
        )

    def _get_window_count(
        self, window: list[str] | Span | str, pattern: list | str
    ) -> int:
        """Call character or token window methods, as appropriate.

        Args:
            window (list[str] | Span | str]): A window consisting of a list of token strings, a list of spaCy spans, a spaCy doc, or a string.
            pattern (list | str): A string pattern or spaCy rule to search for.

        Returns:
            The number of occurrences of the pattern in the window.
        """
        if self.window_type == "characters":
            return self._count_in_character_window(window, pattern)
        else:
            return self._count_in_token_window(window, pattern)

    def _set_attrs(self, attrs: dict) -> None:
        """Set instance attributes when public method is called.

        Args:
            attrs (dict): A dict of keyword arguments and their values.
        """
        for key, value in attrs.items():
            if value is not None:
                setattr(self, key, value)
            if key == "model" and value is not None:
                self.nlp = spacy.load(self.model)

    @abstractmethod
    def to_df(self, *args, **kwargs) -> pd.DataFrame:
        """Output the calcualtions as a pandas DataFrame."""
        ...

`alignment_mode: Optional[str] = 'strict'` `pydantic-field` ¤

Whether to snap searches to token boundaries. Values are 'strict', 'contract', and 'expand'.

`case_sensitive: Optional[bool] = False` `pydantic-field` ¤

Whether to make searches case-sensitive.

`data: Optional[list] = []` `pydantic-field` ¤

A container for the calculated data.

`metadata: dict` `property` ¤

Return metadata for the calculator.

`mode: Optional[bool | str] = 'exact'` `pydantic-field` ¤

The search method to use ('regex', 'spacy_rule', 'multi_token', 'multi_token_exact').

`model: Optional[str] = 'xx_sent_ud_sm'` `pydantic-field` ¤

The language model to be used for searching spaCy tokens/spans.

`n` `property` ¤

Get the number of units per window.

`nlp: Optional[Language] = None` `pydantic-field` ¤

The spaCy nlp object.

`patterns: Optional[list | str] = None` `pydantic-field` ¤

A pattern or list of patterns to search in windows.

`regex_flags` `property` ¤

Return regex flags based on case_sensitive setting.

`window_type` `property` ¤

Get the type of units in the windows.

`windows: Optional[Windows] = None` `pydantic-field` ¤

A Windows object containing the windows to search.

`call(*args, **kwargs)` `abstractmethod` ¤

Call the instance.

Source code in lexos/rolling_windows/calculators/base_calculator.py

@abstractmethod
def __call__(self, *args, **kwargs):
    """Call the instance."""
    ...

`to_df(*args, **kwargs) -> pd.DataFrame` `abstractmethod` ¤

Output the calcualtions as a pandas DataFrame.

Source code in lexos/rolling_windows/calculators/base_calculator.py

@abstractmethod
def to_df(self, *args, **kwargs) -> pd.DataFrame:
    """Output the calcualtions as a pandas DataFrame."""
    ...

`metadata: dict` `property` ¤

Return metadata for the calculator.

`n` `property` ¤

Get the number of units per window.

`regex_flags` `property` ¤

Return regex flags based on case_sensitive setting.

`window_type` `property` ¤

Get the type of units in the windows.

`call(*args, **kwargs)` `abstractmethod` ¤

Call the instance.

Source code in lexos/rolling_windows/calculators/base_calculator.py

@abstractmethod
def __call__(self, *args, **kwargs):
    """Call the instance."""
    ...

`_count_character_patterns_in_character_windows(window: str, pattern: str) -> int` ¤

Use Python count() to count exact character matches in a character window.

Parameters:

Name	Type	Description	Default
`window`	`str`	A string window.	required
`pattern`	`str`	A string pattern to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _count_character_patterns_in_character_windows(
    self, window: str, pattern: str
) -> int:
    """Use Python count() to count exact character matches in a character window.

    Args:
        window (str): A string window.
        pattern (str): A string pattern to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    if self.mode == "regex":
        return len(re.findall(pattern, window, self.regex_flags))
    else:
        if not self.case_sensitive:
            window = window.lower()
            pattern = pattern.lower()
        return window.count(pattern)

`_count_in_character_window(window: str, pattern: str) -> int` ¤

Choose function for counting matches in character windows.

Parameters:

Name	Type	Description	Default
`window`	`str`	A string window.	required
`pattern`	`str`	A string pattern to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _count_in_character_window(self, window: str, pattern: str) -> int:
    """Choose function for counting matches in character windows.

    Args:
        window (str): A string window.
        pattern (str): A string pattern to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    if self.mode in ["exact", "regex"]:
        return self._count_character_patterns_in_character_windows(window, pattern)
    else:
        raise LexosException("Invalid mode for character windows.")

`_count_token_patterns_in_token_lists(window: list[str], pattern: str) -> int` ¤

Count patterns in lists of token strings.

Parameters:

Name	Type	Description	Default
`window`	`list[str]`	A window consisting of a list of strings.	required
`pattern`	`str`	A string pattern to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _count_token_patterns_in_token_lists(
    self, window: list[str], pattern: str
) -> int:
    """Count patterns in lists of token strings.

    Args:
        window (list[str]): A window consisting of a list of strings.
        pattern (str): A string pattern to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    if self.mode == "regex":
        return sum(
            [len(re.findall(pattern, token, self.regex_flags)) for token in window]
        )
    else:
        if not self.case_sensitive:
            window = [token.lower() for token in window]
            pattern = pattern.lower()
        return window.count(pattern)

`_count_token_patterns_in_span(window: Span, pattern: list | str) -> int` ¤

Count patterns in spans or docs.

Parameters:

Name	Type	Description	Default
`window`	`Span`	A window consisting of a list of spaCy spans or a spaCy doc.	required
`pattern`	`list \| str`	A string pattern or spaCy rule to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _count_token_patterns_in_span(self, window: Span, pattern: list | str) -> int:
    """Count patterns in spans or docs.

    Args:
        window (Span): A window consisting of a list of spaCy spans or a spaCy doc.
        pattern (list | str): A string pattern or spaCy rule to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    if self.mode == "exact":
        if not self.case_sensitive:
            window = [token.lower_ for token in window]
            pattern = pattern.lower()
        else:
            window = [token.text for token in window]
        return window.count(pattern)
    elif self.mode == "regex":
        return sum(
            [
                len(re.findall(pattern, token.text, self.regex_flags))
                for token in window
            ]
        )
    elif self.mode == "spacy_rule":
        if not self.case_sensitive:
            pattern = spacy_rule_to_lower(pattern)
        matcher = Matcher(self.nlp.vocab)
        matcher.add("Pattern", [pattern])
        return len(matcher(window))

`_count_token_patterns_in_span_text(window: Span, pattern: str) -> int` ¤

Count patterns in span or doc text with token alignment.

Parameters:

Name	Type	Description	Default
`window`	`Span`	A Span window.	required
`pattern`	`str`	A string pattern to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _count_token_patterns_in_span_text(self, window: Span, pattern: str) -> int:
    """Count patterns in span or doc text with token alignment.

    Args:
        window (Span): A Span window.
        pattern (str): A string pattern to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    count = 0
    if self.mode == "multi_token_exact":
        pattern = regex_escape(pattern)
    for match in re.finditer(pattern, window.text, self.regex_flags):
        start, end = match.span()
        span = window.char_span(start, end, self.alignment_mode)
        if span is not None:
            count += 1
    return count

`_count_in_token_window(window: list[str] | list[Token] | Doc | Span, pattern: list | str) -> int` ¤

Choose function for counting matches in token windows.

Parameters:

Name	Type	Description	Default
`window`	`list[str] \| Span`	A window consisting of a list of token strings, a list of spaCy spans, or a spaCy doc.	required
`pattern`	`list \| str`	A string pattern or spaCy rule to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _count_in_token_window(
    self, window: list[str] | list[Token] | Doc | Span, pattern: list | str
) -> int:
    """Choose function for counting matches in token windows.

    Args:
        window (list[str] | Span): A window consisting of a list of token strings, a list of spaCy spans, or a spaCy doc.
        pattern (list | str): A string pattern or spaCy rule to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    # Validate window type for multi_token and spacy_rule modes
    if self.mode in ["multi_token", "spacy_rule"]:
        if not isinstance(window, (Doc, Span)):
            raise LexosException(
                "You cannot use spaCy rules or perform multi-token searches with a string or list of token strings."
            )
    if isinstance(window, (list)) and self.mode in ["multi_token", "spacy_rule"]:
        raise LexosException(
            "You cannot use spaCy rules or perform multi-token searches with a string or list of token strings."
        )
    elif isinstance(window, list) and all(isinstance(i, str) for i in window):
        # Match in single tokens
        return self._count_token_patterns_in_token_lists(window, pattern)
    elif isinstance(window, Doc | Span):
        # Iterate over the full text with token boundary alignment
        if self.mode.startswith("multi_token"):
            return self._count_token_patterns_in_span_text(window, pattern)
        # Match in single tokens
        else:
            return self._count_token_patterns_in_span(window, pattern)

`_extract_string_pattern(pattern: list[list[dict[str, Any]]]) -> str` ¤

Extract a string pattern from a spaCy rule.

Parameters:

Name	Type	Description	Default
`pattern`	`list[list[dict[str, Any]]]`	A list of spaCy rule patterns to search.	required

Returns:

Name	Type	Description
`str`	`str`	A string pattern.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _extract_string_pattern(self, pattern: list[list[dict[str, Any]]]) -> str:
    """Extract a string pattern from a spaCy rule.

    Args:
        pattern (list[list[dict[str, Any]]]): A list of spaCy rule patterns to search.

    Returns:
        str: A string pattern.
    """
    return "|".join(
        [
            item if isinstance(item, str) else list(item.values())[0]
            for item in list(flatten(pattern))
        ]
    )

`_get_window_count(window: list[str] | Span | str, pattern: list | str) -> int` ¤

Call character or token window methods, as appropriate.

Parameters:

Name	Type	Description	Default
`window`	`list[str] \| Span \| str]`	A window consisting of a list of token strings, a list of spaCy spans, a spaCy doc, or a string.	required
`pattern`	`list \| str`	A string pattern or spaCy rule to search for.	required

Returns:

Type	Description
`int`	The number of occurrences of the pattern in the window.

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _get_window_count(
    self, window: list[str] | Span | str, pattern: list | str
) -> int:
    """Call character or token window methods, as appropriate.

    Args:
        window (list[str] | Span | str]): A window consisting of a list of token strings, a list of spaCy spans, a spaCy doc, or a string.
        pattern (list | str): A string pattern or spaCy rule to search for.

    Returns:
        The number of occurrences of the pattern in the window.
    """
    if self.window_type == "characters":
        return self._count_in_character_window(window, pattern)
    else:
        return self._count_in_token_window(window, pattern)

`_set_attrs(attrs: dict) -> None` ¤

Set instance attributes when public method is called.

Parameters:

Name	Type	Description	Default
`attrs`	`dict`	A dict of keyword arguments and their values.	required

Source code in lexos/rolling_windows/calculators/base_calculator.py

def _set_attrs(self, attrs: dict) -> None:
    """Set instance attributes when public method is called.

    Args:
        attrs (dict): A dict of keyword arguments and their values.
    """
    for key, value in attrs.items():
        if value is not None:
            setattr(self, key, value)
        if key == "model" and value is not None:
            self.nlp = spacy.load(self.model)

`to_df(*args, **kwargs) -> pd.DataFrame` `abstractmethod` ¤

Output the calcualtions as a pandas DataFrame.

Source code in lexos/rolling_windows/calculators/base_calculator.py

@abstractmethod
def to_df(self, *args, **kwargs) -> pd.DataFrame:
    """Output the calcualtions as a pandas DataFrame."""
    ...

Base Calculator¤

flatten(input: dict | list | str) -> Iterable ¤

regex_escape(s: str) -> str ¤

spacy_rule_to_lower(patterns: dict | list[dict], old_key: Optional[list[str] | str] = ['TEXT', 'ORTH'], new_key: Optional[str] = 'LOWER') -> list ¤

BaseCalculator pydantic-model ¤

alignment_mode: Optional[str] = 'strict' pydantic-field ¤

case_sensitive: Optional[bool] = False pydantic-field ¤

data: Optional[list] = [] pydantic-field ¤

metadata: dict property ¤

mode: Optional[bool | str] = 'exact' pydantic-field ¤

model: Optional[str] = 'xx_sent_ud_sm' pydantic-field ¤

n property ¤

nlp: Optional[Language] = None pydantic-field ¤

patterns: Optional[list | str] = None pydantic-field ¤

regex_flags property ¤

window_type property ¤

windows: Optional[Windows] = None pydantic-field ¤

__call__(*args, **kwargs) abstractmethod ¤

to_df(*args, **kwargs) -> pd.DataFrame abstractmethod ¤

metadata: dict property ¤

n property ¤

regex_flags property ¤

window_type property ¤

__call__(*args, **kwargs) abstractmethod ¤

_count_character_patterns_in_character_windows(window: str, pattern: str) -> int ¤

_count_in_character_window(window: str, pattern: str) -> int ¤

_count_token_patterns_in_token_lists(window: list[str], pattern: str) -> int ¤

_count_token_patterns_in_span(window: Span, pattern: list | str) -> int ¤

_count_token_patterns_in_span_text(window: Span, pattern: str) -> int ¤

_count_in_token_window(window: list[str] | list[Token] | Doc | Span, pattern: list | str) -> int ¤

_extract_string_pattern(pattern: list[list[dict[str, Any]]]) -> str ¤

_get_window_count(window: list[str] | Span | str, pattern: list | str) -> int ¤

_set_attrs(attrs: dict) -> None ¤

to_df(*args, **kwargs) -> pd.DataFrame abstractmethod ¤

`flatten(input: dict | list | str) -> Iterable` ¤

`regex_escape(s: str) -> str` ¤

`spacy_rule_to_lower(patterns: dict | list[dict], old_key: Optional[list[str] | str] = ['TEXT', 'ORTH'], new_key: Optional[str] = 'LOWER') -> list` ¤

`BaseCalculator` `pydantic-model` ¤

`alignment_mode: Optional[str] = 'strict'` `pydantic-field` ¤

`case_sensitive: Optional[bool] = False` `pydantic-field` ¤

`data: Optional[list] = []` `pydantic-field` ¤

`metadata: dict` `property` ¤

`mode: Optional[bool | str] = 'exact'` `pydantic-field` ¤

`model: Optional[str] = 'xx_sent_ud_sm'` `pydantic-field` ¤

`n` `property` ¤

`nlp: Optional[Language] = None` `pydantic-field` ¤

`patterns: Optional[list | str] = None` `pydantic-field` ¤

`regex_flags` `property` ¤

`window_type` `property` ¤

`windows: Optional[Windows] = None` `pydantic-field` ¤

`call(*args, **kwargs)` `abstractmethod` ¤

`to_df(*args, **kwargs) -> pd.DataFrame` `abstractmethod` ¤

`metadata: dict` `property` ¤

`n` `property` ¤

`regex_flags` `property` ¤

`window_type` `property` ¤

`call(*args, **kwargs)` `abstractmethod` ¤

`_count_character_patterns_in_character_windows(window: str, pattern: str) -> int` ¤

`_count_in_character_window(window: str, pattern: str) -> int` ¤

`_count_token_patterns_in_token_lists(window: list[str], pattern: str) -> int` ¤

`_count_token_patterns_in_span(window: Span, pattern: list | str) -> int` ¤

`_count_token_patterns_in_span_text(window: Span, pattern: str) -> int` ¤

`_count_in_token_window(window: list[str] | list[Token] | Doc | Span, pattern: list | str) -> int` ¤

`_extract_string_pattern(pattern: list[list[dict[str, Any]]]) -> str` ¤

`_get_window_count(window: list[str] | Span | str, pattern: list | str) -> int` ¤

`_set_attrs(attrs: dict) -> None` ¤

`to_df(*args, **kwargs) -> pd.DataFrame` `abstractmethod` ¤