Skip to content

Cluster¤

Cluster is a module that performs various types of cluster analysis. Currently, the only implementation is hierarchical agglomerative clustering, which is implemented throught the Dendrogram class.

lexos.cluster.dendrogram.Dendrogram ¤

Dendrogram.

Typical usage:

from lexos.cluster.dendrogram import Dendrogram

dendrogram = Dendrogram(dtm, show=True)

or

dendrogram = Dendrogram(dtm, show=False)
dendrogram.fig
Source code in lexos\cluster\dendrogram.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
class Dendrogram:
    """Dendrogram.

    Typical usage:

    ```python
    from lexos.cluster.dendrogram import Dendrogram

    dendrogram = Dendrogram(dtm, show=True)

    or

    dendrogram = Dendrogram(dtm, show=False)
    dendrogram.fig
    ```
    """

    def __init__(
        self,
        dtm: Any,
        labels: List[str] = None,
        metric: str = "euclidean",
        method: str = "average",
        truncate_mode: str = None,
        color_threshold: str = None,
        get_leaves: bool = True,
        orientation: str = "top",
        count_sort: Union[bool, str] = None,
        distance_sort: Union[bool, str] = None,
        show_leaf_counts: bool = False,
        no_plot: bool = False,
        no_labels: bool = False,
        leaf_rotation: int = 90,
        leaf_font_size: int = None,
        leaf_label_func: Callable = None,
        show_contracted: bool = False,
        link_color_func: Callable = None,
        ax=None,
        above_threshold_color: str = "C0",
        title: str = None,
        figsize: tuple = (10, 10),
        show: bool = False,
    ) -> dict:
        """Initialise the Dendrogram."""
        # Create an empty plot for matplotlib
        self.dtm = dtm
        self.labels = labels
        self.metric = metric
        self.method = method
        self.truncate_mode = truncate_mode
        self.color_threshold = color_threshold
        self.get_leaves = get_leaves
        self.orientation = orientation
        self.count_sort = count_sort
        self.distance_sort = distance_sort
        self.show_leaf_counts = show_leaf_counts
        self.no_plot = no_plot
        self.no_labels = no_labels
        self.leaf_rotation = leaf_rotation
        self.leaf_font_size = leaf_font_size
        self.leaf_label_func = leaf_label_func
        self.show_contracted = show_contracted
        self.link_color_func = link_color_func
        self.ax = ax
        self.above_threshold_color = above_threshold_color
        self.title = title
        self.figsize = figsize
        self.show = show

        # Get the dtm table
        self.dtm_table = dtm.get_table()

        # Use default labels from the DTM table
        if self.labels is None:
            self.labels = self.dtm_table.columns.values.tolist()[1:]

        # Set "terms" as the index and transpose the table
        self.dtm_table = self.dtm_table.set_index("terms").T

        # Build the dendrogram
        self.build()

    def build(self):
        """Build a dendrogram."""
        # Create the distance and linkage matrixes for matplotlib
        X = pdist(self.dtm_table, metric=self.metric)
        Z = sch.linkage(X, self.method)
        fig, ax = plt.subplots(figsize=self.figsize)
        if self.title:
            plt.title(self.title)
        sch.dendrogram(
            Z,
            labels=self.labels,
            truncate_mode=self.truncate_mode,
            color_threshold=self.color_threshold,
            get_leaves=self.get_leaves,
            orientation=self.orientation,
            count_sort=self.count_sort,
            distance_sort=self.distance_sort,
            show_leaf_counts=self.show_leaf_counts,
            no_plot=self.no_plot,
            no_labels=self.no_labels,
            leaf_rotation=self.leaf_rotation,
            leaf_font_size=self.leaf_font_size,
            leaf_label_func=self.leaf_label_func,
            show_contracted=self.show_contracted,
            link_color_func=self.link_color_func,
            ax=self.ax,
            above_threshold_color=self.above_threshold_color,
        )
        self.fig = fig

        if not self.show:
            plt.close()

    def savefig(self, filename: str):
        """Show the figure if it is hidden.

        Args:
            filename (str): The name of the file to save.
        """
        self.fig.savefig(filename)

    def showfig(self):
        """Show the figure if it is hidden.

        This is a helper method. You can also reference the figure
        using `Dendrogram.fig`. This will generally display in a
        Jupyter notebook.
        """
        return self.fig

__init__(dtm, labels=None, metric='euclidean', method='average', truncate_mode=None, color_threshold=None, get_leaves=True, orientation='top', count_sort=None, distance_sort=None, show_leaf_counts=False, no_plot=False, no_labels=False, leaf_rotation=90, leaf_font_size=None, leaf_label_func=None, show_contracted=False, link_color_func=None, ax=None, above_threshold_color='C0', title=None, figsize=(10, 10), show=False) ¤

Initialise the Dendrogram.

Source code in lexos\cluster\dendrogram.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def __init__(
    self,
    dtm: Any,
    labels: List[str] = None,
    metric: str = "euclidean",
    method: str = "average",
    truncate_mode: str = None,
    color_threshold: str = None,
    get_leaves: bool = True,
    orientation: str = "top",
    count_sort: Union[bool, str] = None,
    distance_sort: Union[bool, str] = None,
    show_leaf_counts: bool = False,
    no_plot: bool = False,
    no_labels: bool = False,
    leaf_rotation: int = 90,
    leaf_font_size: int = None,
    leaf_label_func: Callable = None,
    show_contracted: bool = False,
    link_color_func: Callable = None,
    ax=None,
    above_threshold_color: str = "C0",
    title: str = None,
    figsize: tuple = (10, 10),
    show: bool = False,
) -> dict:
    """Initialise the Dendrogram."""
    # Create an empty plot for matplotlib
    self.dtm = dtm
    self.labels = labels
    self.metric = metric
    self.method = method
    self.truncate_mode = truncate_mode
    self.color_threshold = color_threshold
    self.get_leaves = get_leaves
    self.orientation = orientation
    self.count_sort = count_sort
    self.distance_sort = distance_sort
    self.show_leaf_counts = show_leaf_counts
    self.no_plot = no_plot
    self.no_labels = no_labels
    self.leaf_rotation = leaf_rotation
    self.leaf_font_size = leaf_font_size
    self.leaf_label_func = leaf_label_func
    self.show_contracted = show_contracted
    self.link_color_func = link_color_func
    self.ax = ax
    self.above_threshold_color = above_threshold_color
    self.title = title
    self.figsize = figsize
    self.show = show

    # Get the dtm table
    self.dtm_table = dtm.get_table()

    # Use default labels from the DTM table
    if self.labels is None:
        self.labels = self.dtm_table.columns.values.tolist()[1:]

    # Set "terms" as the index and transpose the table
    self.dtm_table = self.dtm_table.set_index("terms").T

    # Build the dendrogram
    self.build()

build() ¤

Build a dendrogram.

Source code in lexos\cluster\dendrogram.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def build(self):
    """Build a dendrogram."""
    # Create the distance and linkage matrixes for matplotlib
    X = pdist(self.dtm_table, metric=self.metric)
    Z = sch.linkage(X, self.method)
    fig, ax = plt.subplots(figsize=self.figsize)
    if self.title:
        plt.title(self.title)
    sch.dendrogram(
        Z,
        labels=self.labels,
        truncate_mode=self.truncate_mode,
        color_threshold=self.color_threshold,
        get_leaves=self.get_leaves,
        orientation=self.orientation,
        count_sort=self.count_sort,
        distance_sort=self.distance_sort,
        show_leaf_counts=self.show_leaf_counts,
        no_plot=self.no_plot,
        no_labels=self.no_labels,
        leaf_rotation=self.leaf_rotation,
        leaf_font_size=self.leaf_font_size,
        leaf_label_func=self.leaf_label_func,
        show_contracted=self.show_contracted,
        link_color_func=self.link_color_func,
        ax=self.ax,
        above_threshold_color=self.above_threshold_color,
    )
    self.fig = fig

    if not self.show:
        plt.close()

savefig(filename) ¤

Show the figure if it is hidden.

Parameters:

Name Type Description Default
filename str

The name of the file to save.

required
Source code in lexos\cluster\dendrogram.py
125
126
127
128
129
130
131
def savefig(self, filename: str):
    """Show the figure if it is hidden.

    Args:
        filename (str): The name of the file to save.
    """
    self.fig.savefig(filename)

showfig() ¤

Show the figure if it is hidden.

This is a helper method. You can also reference the figure using Dendrogram.fig. This will generally display in a Jupyter notebook.

Source code in lexos\cluster\dendrogram.py
133
134
135
136
137
138
139
140
def showfig(self):
    """Show the figure if it is hidden.

    This is a helper method. You can also reference the figure
    using `Dendrogram.fig`. This will generally display in a
    Jupyter notebook.
    """
    return self.fig