Skip to content

Scrubber¤

The scrubber component of Scrubber contains a class for managing scrubbing pipelines.

lexos.scrubber.scrubber.Scrubber ¤

Scrubber class.

Sample usage

scrubber = Scrubber() scrubber.to_lower(doc)

Source code in lexos\scrubber\scrubber.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
class Scrubber:
    """Scrubber class.

    Sample usage:

        scrubber = Scrubber()
        scrubber.to_lower(doc)
    """
    def __init__(self):
        """Initialize the Scrubber class."""
        self.texts = []
        self.pipeline = None

    def add_pipeline(self, *funcs: Callable[[str], str]):
        """Add a pipeline.

        Args:
            *funcs: The functions to add to the pipeline.
        """
        self.pipeline = pipeline.make_pipeline(funcs)

    def get_pipeline(self) -> tuple:
        """Return a tuple representation of the pipeline."""
        pipeline = []
        for f in self.pipeline:
            if getfullargspec(f).kwonlydefaults:
                pipeline.append((f.__name__, getfullargspec(f).kwonlydefaults))
            else:
                pipeline.append(f.__name__)
        return tuple(pipeline)

    def set_pipeline(self, pipeline: tuple):
        """Set the pipeline.

        This is a variant of add_pipeline that takes a tuple of functions.
        The difference is that function names are given as strings and
        keyword arguments as a dictionary. This is useful if you wanted to
        modify the pipeline after initialisation based on the output of
        `get_pipeline()`, rather than passing callables.

        Args:
            pipeline (tuple): A tuple of functions.
        """
        new_pipeline = []
        for x in pipeline:
            if isinstance(x, tuple):
                new_pipeline.append(new_pipeline.pipe(eval(x[0]), **x[1]))
            else:
                new_pipeline.append(eval(x))
        self.pipeline = pipeline.make_pipeline(new_pipeline)

    def scrub(self, data: Union[List[str], str]) -> List[str]:
        """Scrub a text or list of texts.

        Args:
            data (Union[List[str], str]): The text or list of texts to scrub.

        Returns:
            list: A list of scrubbed texts.
        """
        for text in utils.ensure_list(data):
            self.texts.append(self.pipeline[0](text))
        return self.texts

__init__() ¤

Initialize the Scrubber class.

Source code in lexos\scrubber\scrubber.py
22
23
24
25
def __init__(self):
    """Initialize the Scrubber class."""
    self.texts = []
    self.pipeline = None

add_pipeline(*funcs) ¤

Add a pipeline.

Parameters:

Name Type Description Default
*funcs Callable[[str], str]

The functions to add to the pipeline.

()
Source code in lexos\scrubber\scrubber.py
27
28
29
30
31
32
33
def add_pipeline(self, *funcs: Callable[[str], str]):
    """Add a pipeline.

    Args:
        *funcs: The functions to add to the pipeline.
    """
    self.pipeline = pipeline.make_pipeline(funcs)

get_pipeline() ¤

Return a tuple representation of the pipeline.

Source code in lexos\scrubber\scrubber.py
35
36
37
38
39
40
41
42
43
def get_pipeline(self) -> tuple:
    """Return a tuple representation of the pipeline."""
    pipeline = []
    for f in self.pipeline:
        if getfullargspec(f).kwonlydefaults:
            pipeline.append((f.__name__, getfullargspec(f).kwonlydefaults))
        else:
            pipeline.append(f.__name__)
    return tuple(pipeline)

scrub(data) ¤

Scrub a text or list of texts.

Parameters:

Name Type Description Default
data Union[List[str], str]

The text or list of texts to scrub.

required

Returns:

Name Type Description
list List[str]

A list of scrubbed texts.

Source code in lexos\scrubber\scrubber.py
65
66
67
68
69
70
71
72
73
74
75
76
def scrub(self, data: Union[List[str], str]) -> List[str]:
    """Scrub a text or list of texts.

    Args:
        data (Union[List[str], str]): The text or list of texts to scrub.

    Returns:
        list: A list of scrubbed texts.
    """
    for text in utils.ensure_list(data):
        self.texts.append(self.pipeline[0](text))
    return self.texts

set_pipeline(pipeline) ¤

Set the pipeline.

This is a variant of add_pipeline that takes a tuple of functions. The difference is that function names are given as strings and keyword arguments as a dictionary. This is useful if you wanted to modify the pipeline after initialisation based on the output of get_pipeline(), rather than passing callables.

Parameters:

Name Type Description Default
pipeline tuple

A tuple of functions.

required
Source code in lexos\scrubber\scrubber.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def set_pipeline(self, pipeline: tuple):
    """Set the pipeline.

    This is a variant of add_pipeline that takes a tuple of functions.
    The difference is that function names are given as strings and
    keyword arguments as a dictionary. This is useful if you wanted to
    modify the pipeline after initialisation based on the output of
    `get_pipeline()`, rather than passing callables.

    Args:
        pipeline (tuple): A tuple of functions.
    """
    new_pipeline = []
    for x in pipeline:
        if isinstance(x, tuple):
            new_pipeline.append(new_pipeline.pipe(eval(x[0]), **x[1]))
        else:
            new_pipeline.append(eval(x))
    self.pipeline = pipeline.make_pipeline(new_pipeline)