Skip to content

Merge¤

Merge is a set of standalone functions for merging strings, spaCy Docs, and files.

lexos.cutter.merge.merge(segments, sep=None) ¤

Merge a list of segments into a single string.

Parameters:

Name Type Description Default
segments _type_

The list of segments to merge.

required
sep str

The separator to use when merging strings. Defaults to None.

None
Source code in lexos\cutter\merge.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def merge(segments: Union[List[str], List[spacy.tokens.doc.Doc]], sep=None):
    """Merge a list of segments into a single string.

    Args:
        segments (_type_): The list of segments to merge.
        sep (str, optional): The separator to use when merging strings. Defaults to None.
    """
    if all(isinstance(segment, str) for segment in segments):
        if sep is None:
            sep = ""
        return sep.join(segments)
    elif all(isinstance(segment, spacy.tokens.doc.Doc) for segment in segments):
        return Doc.from_docs(segments)
    else:
        raise LexosException(
            "All segments must be either strings or spacy.tokens.doc.Doc."
        )

lexos.cutter.merge.merge_files(segment_files, output_file='merged_files.txt', binary=False) ¤

Merge two files into a single string.

Parameters:

Name Type Description Default
segment_files List[str]

List of files to be merged.

required
output_file str

The name of the output file.

'merged_files.txt'
binary bool

Whether to read and write files as binary. Defaults to False.

False
Source code in lexos\cutter\merge.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def merge_files(
    segment_files: List[str],
    output_file: str = "merged_files.txt",
    binary: bool = False,
) -> None:
    """Merge two files into a single string.

    Args:
        segment_files (List[str]): List of files to be merged.
        output_file (str, optional): The name of the output file.
        binary (bool, optional): Whether to read and write files as binary. Defaults to False.
    """
    if binary:
        read_mode = "rb"
        write_mode = "wb"
    else:
        read_mode = "r"
        write_mode = "w"
    with open("merged_file.txt", write_mode) as out_file:
        for file in segment_files:
            try:
                with open(output_file, read_mode) as f:
                    shutil.copyfileobj(file, out_file, 1024 * 1024 * 10)
            except Exception as e:
                raise LexosException(f"Error merging files: {e}.")