Skip to content

String Milestones¤

Class for handling text string milestones.

StringMilestones pydantic-model ¤

Bases: BaseModel

String Milestones class.

Milestones object for text strings or spaCy Doc objects to be treated as strings.

Config:

  • arbitrary_types_allowed: True
  • json_schema_extra: DocJSONSchema.schema()

Fields:

  • doc (Doclike)
  • patterns (str | list[str])
  • case_sensitive (bool)
  • flags (Enum)
  • _spans (list)
Source code in lexos/milestones/string_milestones.py
class StringMilestones(BaseModel):
    """String Milestones class.

    Milestones object for text strings or spaCy Doc objects to
    be treated as strings.
    """

    doc: Doclike = Field(
        json_schema_extra={"description": "A string or spaCy Doc object."}
    )

    patterns: str | list[str] = Field(
        default=None,
        json_schema_extra={"description": "The pattern(s) used to match milestones."},
    )
    case_sensitive: bool = Field(
        default=True,
        json_schema_extra={
            "description": "Whether to perform case-sensitive searches."
        },
    )
    flags: Enum = Field(
        default=case_sensitive_flags,
        json_schema_extra={"description": "The regex flags to use."},
    )

    model_config = ConfigDict(
        arbitrary_types_allowed=True, json_schema_extra=DocJSONSchema.schema()
    )

    def __init__(self, **data) -> None:
        """Set regex flags and milestone IOB extensions after initialization."""
        super().__init__(**data)
        self._spans: list = []
        if not self.case_sensitive:
            self.flags = case_insensitive_flags
        self.patterns = ensure_list(self.patterns)
        if self.patterns != [None]:
            self.set()

    @property
    def spans(self) -> list[StringSpan]:
        """Return the Spans.

        Returns:
            list[StringSpan]: A list of StringSpans.
        """
        return self._spans or []

    def __iter__(self) -> Iterator:
        """Make the class iterable.

        Returns:
            Iterator: A generator containing the object's spans.
        """
        return (span for span in self.spans)

    def _set_case_sensitivity(self, case_sensitive: Optional[bool] = None) -> None:
        """Set the object's case sensitivity.

        Args:
            case_sensitive (optional, bool): Whether or not to use case-sensitive searching.
        """
        if case_sensitive is not None:
            self.case_sensitive = case_sensitive
        if self.case_sensitive is True:
            self.flags = case_sensitive_flags
        else:
            self.flags = case_insensitive_flags

    @validate_call()
    def set(
        self,
        patterns: Optional[str | list[str]] = None,
        case_sensitive: Optional[bool] = None,
    ) -> None:
        """Return the milestones.

        Args:
            patterns (Optional[str | list[str]]): The pattern(s) used to match milestones.
            case_sensitive (bool, optional): Whether to perform case-sensitive searches. Defaults to True.

        Note:
            If no parameters are set, the method will use the object's current patterns and case sensitivity.
        """
        if patterns:
            self.patterns = ensure_list(patterns)
        self._set_case_sensitivity(case_sensitive)
        text = self.doc if isinstance(self.doc, str) else self.doc.text
        all_matches = []
        for pattern in self.patterns:
            matches = re.finditer(pattern, text, self.flags)
            all_matches.extend(
                StringSpan(text=match.group(), start=match.start(), end=match.end())
                for match in matches
            )
        all_matches.sort(key=lambda match: match.start)
        self._spans = all_matches

spans: list[StringSpan] property ¤

Return the Spans.

Returns:

Type Description
list[StringSpan]

list[StringSpan]: A list of StringSpans.

__init__(**data) -> None ¤

Set regex flags and milestone IOB extensions after initialization.

Source code in lexos/milestones/string_milestones.py
def __init__(self, **data) -> None:
    """Set regex flags and milestone IOB extensions after initialization."""
    super().__init__(**data)
    self._spans: list = []
    if not self.case_sensitive:
        self.flags = case_insensitive_flags
    self.patterns = ensure_list(self.patterns)
    if self.patterns != [None]:
        self.set()

__iter__() -> Iterator ¤

Make the class iterable.

Returns:

Name Type Description
Iterator Iterator

A generator containing the object's spans.

Source code in lexos/milestones/string_milestones.py
def __iter__(self) -> Iterator:
    """Make the class iterable.

    Returns:
        Iterator: A generator containing the object's spans.
    """
    return (span for span in self.spans)

set(patterns: Optional[str | list[str]] = None, case_sensitive: Optional[bool] = None) -> None ¤

Return the milestones.

Parameters:

Name Type Description Default
patterns Optional[str | list[str]]

The pattern(s) used to match milestones.

None
case_sensitive bool

Whether to perform case-sensitive searches. Defaults to True.

None
Note

If no parameters are set, the method will use the object's current patterns and case sensitivity.

Source code in lexos/milestones/string_milestones.py
@validate_call()
def set(
    self,
    patterns: Optional[str | list[str]] = None,
    case_sensitive: Optional[bool] = None,
) -> None:
    """Return the milestones.

    Args:
        patterns (Optional[str | list[str]]): The pattern(s) used to match milestones.
        case_sensitive (bool, optional): Whether to perform case-sensitive searches. Defaults to True.

    Note:
        If no parameters are set, the method will use the object's current patterns and case sensitivity.
    """
    if patterns:
        self.patterns = ensure_list(patterns)
    self._set_case_sensitivity(case_sensitive)
    text = self.doc if isinstance(self.doc, str) else self.doc.text
    all_matches = []
    for pattern in self.patterns:
        matches = re.finditer(pattern, text, self.flags)
        all_matches.extend(
            StringSpan(text=match.group(), start=match.start(), end=match.end())
            for match in matches
        )
    all_matches.sort(key=lambda match: match.start)
    self._spans = all_matches

__init__(**data) -> None ¤

Set regex flags and milestone IOB extensions after initialization.

Source code in lexos/milestones/string_milestones.py
def __init__(self, **data) -> None:
    """Set regex flags and milestone IOB extensions after initialization."""
    super().__init__(**data)
    self._spans: list = []
    if not self.case_sensitive:
        self.flags = case_insensitive_flags
    self.patterns = ensure_list(self.patterns)
    if self.patterns != [None]:
        self.set()

__iter__() -> Iterator ¤

Make the class iterable.

Returns:

Name Type Description
Iterator Iterator

A generator containing the object's spans.

Source code in lexos/milestones/string_milestones.py
def __iter__(self) -> Iterator:
    """Make the class iterable.

    Returns:
        Iterator: A generator containing the object's spans.
    """
    return (span for span in self.spans)

spans: list[StringSpan] property ¤

Return the Spans.

Returns:

Type Description
list[StringSpan]

list[StringSpan]: A list of StringSpans.

_set_case_sensitivity(case_sensitive: Optional[bool] = None) -> None ¤

Set the object's case sensitivity.

Parameters:

Name Type Description Default
case_sensitive (optional, bool)

Whether or not to use case-sensitive searching.

None
Source code in lexos/milestones/string_milestones.py
def _set_case_sensitivity(self, case_sensitive: Optional[bool] = None) -> None:
    """Set the object's case sensitivity.

    Args:
        case_sensitive (optional, bool): Whether or not to use case-sensitive searching.
    """
    if case_sensitive is not None:
        self.case_sensitive = case_sensitive
    if self.case_sensitive is True:
        self.flags = case_sensitive_flags
    else:
        self.flags = case_insensitive_flags

set(patterns: Optional[str | list[str]] = None, case_sensitive: Optional[bool] = None) -> None ¤

Return the milestones.

Parameters:

Name Type Description Default
patterns Optional[str | list[str]]

The pattern(s) used to match milestones.

None
case_sensitive bool

Whether to perform case-sensitive searches. Defaults to True.

None
Note

If no parameters are set, the method will use the object's current patterns and case sensitivity.

Source code in lexos/milestones/string_milestones.py
@validate_call()
def set(
    self,
    patterns: Optional[str | list[str]] = None,
    case_sensitive: Optional[bool] = None,
) -> None:
    """Return the milestones.

    Args:
        patterns (Optional[str | list[str]]): The pattern(s) used to match milestones.
        case_sensitive (bool, optional): Whether to perform case-sensitive searches. Defaults to True.

    Note:
        If no parameters are set, the method will use the object's current patterns and case sensitivity.
    """
    if patterns:
        self.patterns = ensure_list(patterns)
    self._set_case_sensitivity(case_sensitive)
    text = self.doc if isinstance(self.doc, str) else self.doc.text
    all_matches = []
    for pattern in self.patterns:
        matches = re.finditer(pattern, text, self.flags)
        all_matches.extend(
            StringSpan(text=match.group(), start=match.start(), end=match.end())
            for match in matches
        )
    all_matches.sort(key=lambda match: match.start)
    self._spans = all_matches

StringSpan pydantic-model ¤

Bases: BaseModel

StringSpan class.

A Pydantic model containing the milestone text, and the start and character indices of the milestone in the original text.

Fields:

  • text (str)
  • start (int)
  • end (int)
Source code in lexos/milestones/string_milestones.py
class StringSpan(BaseModel):
    """StringSpan class.

    A Pydantic model containing the milestone text, and the start
    and character indices of the milestone in the original text.
    """

    text: str
    start: int
    end: int