Skip to content

Counts¤

Counts pydantic-model ¤

Bases: BaseCalculator

A calculator for counting patterns in rolling windows.

Config:

  • default: validation_config

Fields:

Source code in lexos/rolling_windows/calculators/counts.py
class Counts(BaseCalculator):
    """A calculator for counting patterns in rolling windows."""

    _id: ClassVar[str] = "counts"

    @validate_call(config=validation_config)
    def __call__(
        self,
        patterns: Optional[list | str] = None,
        windows: Optional[Windows] = None,
        mode: Optional[bool | str] = None,
        case_sensitive: Optional[bool] = None,
        alignment_mode: Optional[str] = None,
        model: Optional[str] = None,
    ):
        """Call the calculator."""
        attrs = {
            "patterns": patterns,
            "windows": windows,
            "mode": mode,
            "case_sensitive": case_sensitive,
            "alignment_mode": alignment_mode,
            "model": model,
        }
        self._set_attrs(attrs)
        if self.windows is not None:
            self.data = [
                [self._get_window_count(window, pattern) for pattern in self.patterns]
                for window in self.windows
            ]
            return self.data
        else:
            raise LexosException("Calculator `windows` attribute is empty.")

    @validate_call(config=validation_config)
    def to_df(self, show_spacy_rules: Optional[bool] = False) -> pd.DataFrame:
        """Convert the data to a pandas dataframe.

        Args:
            show_spacy_rules (Optional[bool]): If True, use full spaCy rules for labels; otherwise use only the string pattern.

        Returns:
            pd.DataFrame: A pandas DataFrame.
        """
        if show_spacy_rules:
            patterns = self.patterns
        else:
            patterns = []
            # Extract strings from spaCy rules
            for pattern in self.patterns:
                if isinstance(pattern, list):
                    patterns.append(self._extract_string_pattern(pattern))
                else:
                    patterns.append(pattern)
        # Assign column labels
        cols = []
        for pattern in patterns:
            if not self.case_sensitive and isinstance(pattern, str):
                pattern = pattern.lower()
            elif not self.case_sensitive and isinstance(pattern, list):
                pattern = str(spacy_rule_to_lower(pattern))
            cols.append(str(pattern))
        # Generate dataframe
        return pd.DataFrame(self.data, columns=cols)

alignment_mode: Optional[str] = 'strict' pydantic-field ¤

Whether to snap searches to token boundaries. Values are 'strict', 'contract', and 'expand'.

case_sensitive: Optional[bool] = False pydantic-field ¤

Whether to make searches case-sensitive.

data: Optional[list] = [] pydantic-field ¤

A container for the calculated data.

metadata: dict property ¤

Return metadata for the calculator.

mode: Optional[bool | str] = 'exact' pydantic-field ¤

The search method to use ('regex', 'spacy_rule', 'multi_token', 'multi_token_exact').

model: Optional[str] = 'xx_sent_ud_sm' pydantic-field ¤

The language model to be used for searching spaCy tokens/spans.

n property ¤

Get the number of units per window.

nlp: Optional[Language] = None pydantic-field ¤

The spaCy nlp object.

patterns: Optional[list | str] = None pydantic-field ¤

A pattern or list of patterns to search in windows.

regex_flags property ¤

Return regex flags based on case_sensitive setting.

window_type property ¤

Get the type of units in the windows.

windows: Optional[Windows] = None pydantic-field ¤

A Windows object containing the windows to search.

__call__(patterns: Optional[list | str] = None, windows: Optional[Windows] = None, mode: Optional[bool | str] = None, case_sensitive: Optional[bool] = None, alignment_mode: Optional[str] = None, model: Optional[str] = None) ¤

Call the calculator.

Source code in lexos/rolling_windows/calculators/counts.py
@validate_call(config=validation_config)
def __call__(
    self,
    patterns: Optional[list | str] = None,
    windows: Optional[Windows] = None,
    mode: Optional[bool | str] = None,
    case_sensitive: Optional[bool] = None,
    alignment_mode: Optional[str] = None,
    model: Optional[str] = None,
):
    """Call the calculator."""
    attrs = {
        "patterns": patterns,
        "windows": windows,
        "mode": mode,
        "case_sensitive": case_sensitive,
        "alignment_mode": alignment_mode,
        "model": model,
    }
    self._set_attrs(attrs)
    if self.windows is not None:
        self.data = [
            [self._get_window_count(window, pattern) for pattern in self.patterns]
            for window in self.windows
        ]
        return self.data
    else:
        raise LexosException("Calculator `windows` attribute is empty.")

to_df(show_spacy_rules: Optional[bool] = False) -> pd.DataFrame ¤

Convert the data to a pandas dataframe.

Parameters:

Name Type Description Default
show_spacy_rules Optional[bool]

If True, use full spaCy rules for labels; otherwise use only the string pattern.

False

Returns:

Type Description
DataFrame

pd.DataFrame: A pandas DataFrame.

Source code in lexos/rolling_windows/calculators/counts.py
@validate_call(config=validation_config)
def to_df(self, show_spacy_rules: Optional[bool] = False) -> pd.DataFrame:
    """Convert the data to a pandas dataframe.

    Args:
        show_spacy_rules (Optional[bool]): If True, use full spaCy rules for labels; otherwise use only the string pattern.

    Returns:
        pd.DataFrame: A pandas DataFrame.
    """
    if show_spacy_rules:
        patterns = self.patterns
    else:
        patterns = []
        # Extract strings from spaCy rules
        for pattern in self.patterns:
            if isinstance(pattern, list):
                patterns.append(self._extract_string_pattern(pattern))
            else:
                patterns.append(pattern)
    # Assign column labels
    cols = []
    for pattern in patterns:
        if not self.case_sensitive and isinstance(pattern, str):
            pattern = pattern.lower()
        elif not self.case_sensitive and isinstance(pattern, list):
            pattern = str(spacy_rule_to_lower(pattern))
        cols.append(str(pattern))
    # Generate dataframe
    return pd.DataFrame(self.data, columns=cols)

__call__(patterns: Optional[list | str] = None, windows: Optional[Windows] = None, mode: Optional[bool | str] = None, case_sensitive: Optional[bool] = None, alignment_mode: Optional[str] = None, model: Optional[str] = None) ¤

Call the calculator.

Source code in lexos/rolling_windows/calculators/counts.py
@validate_call(config=validation_config)
def __call__(
    self,
    patterns: Optional[list | str] = None,
    windows: Optional[Windows] = None,
    mode: Optional[bool | str] = None,
    case_sensitive: Optional[bool] = None,
    alignment_mode: Optional[str] = None,
    model: Optional[str] = None,
):
    """Call the calculator."""
    attrs = {
        "patterns": patterns,
        "windows": windows,
        "mode": mode,
        "case_sensitive": case_sensitive,
        "alignment_mode": alignment_mode,
        "model": model,
    }
    self._set_attrs(attrs)
    if self.windows is not None:
        self.data = [
            [self._get_window_count(window, pattern) for pattern in self.patterns]
            for window in self.windows
        ]
        return self.data
    else:
        raise LexosException("Calculator `windows` attribute is empty.")

to_df(show_spacy_rules: Optional[bool] = False) -> pd.DataFrame ¤

Convert the data to a pandas dataframe.

Parameters:

Name Type Description Default
show_spacy_rules Optional[bool]

If True, use full spaCy rules for labels; otherwise use only the string pattern.

False

Returns:

Type Description
DataFrame

pd.DataFrame: A pandas DataFrame.

Source code in lexos/rolling_windows/calculators/counts.py
@validate_call(config=validation_config)
def to_df(self, show_spacy_rules: Optional[bool] = False) -> pd.DataFrame:
    """Convert the data to a pandas dataframe.

    Args:
        show_spacy_rules (Optional[bool]): If True, use full spaCy rules for labels; otherwise use only the string pattern.

    Returns:
        pd.DataFrame: A pandas DataFrame.
    """
    if show_spacy_rules:
        patterns = self.patterns
    else:
        patterns = []
        # Extract strings from spaCy rules
        for pattern in self.patterns:
            if isinstance(pattern, list):
                patterns.append(self._extract_string_pattern(pattern))
            else:
                patterns.append(pattern)
    # Assign column labels
    cols = []
    for pattern in patterns:
        if not self.case_sensitive and isinstance(pattern, str):
            pattern = pattern.lower()
        elif not self.case_sensitive and isinstance(pattern, list):
            pattern = str(spacy_rule_to_lower(pattern))
        cols.append(str(pattern))
    # Generate dataframe
    return pd.DataFrame(self.data, columns=cols)