Skip to content

Reference

Function references are listed here.

batch_compress(input_file)

Compresses a file using multiple compression configurations and returns performance metrics.

Parameters:

Name Type Description Default
input_file str

The path to the input file to be compressed.

required

Returns:

Type Description

List[Dict[str, Any]]: A list of dictionaries containing metrics for each compression configuration, including file sizes and compression times.

Source code in xcompress/batch_ops.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def batch_compress(input_file):
    """
    Compresses a file using multiple compression configurations and returns performance metrics.

    Args:
        input_file (str): The path to the input file to be compressed.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing metrics for each compression configuration,
                              including file sizes and compression times.
    """
    config_files = get_configs()
    result_list = []
    for config_file in config_files:
        name = config_file["name"]
        output_file, compression_time_ns = compress_with_config(config_file, input_file)
        file_size = os.path.getsize(input_file)
        compressed_size = os.path.getsize(output_file)
        result_list.append(
            {
                "name": name,
                "file_size": file_size,
                "compressed_size": compressed_size,
                "compression_time_ns": compression_time_ns,
            }
        )
    return result_list

batch_compress_decompress(input_file, out_folder, skip_if_file_exists)

Compresses and decompresses a file using multiple compression configurations, returns performance metrics for each configuration. Not reachable from cli or menu. For getting batch results for all defined config files. These results are used to train CSM module.

Parameters:

Name Type Description Default
input_file str

The path to the input file to be compressed and decompressed.

required
out_folder str

The directory where output files will be saved.

required
skip_if_file_exists bool

If True, skips compression if the output file already exists.

required

Returns:

Type Description

List[Dict[str, Any]]: A list of dictionaries containing metrics for each compression configuration, including file sizes and times for compression and decompression.

Source code in xcompress/batch_ops.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def batch_compress_decompress(input_file, out_folder, skip_if_file_exists):
    """
    Compresses and decompresses a file using multiple compression configurations, returns performance metrics for each configuration.
    Not reachable from cli or menu. For getting batch results for all defined config files. These results are used to train CSM module.

    Args:
        input_file (str): The path to the input file to be compressed and decompressed.
        out_folder (str): The directory where output files will be saved.
        skip_if_file_exists (bool): If True, skips compression if the output file already exists.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing metrics for each compression configuration,
                              including file sizes and times for compression and decompression.
    """
    configs = get_configs()
    result_list = []
    for config_file in configs:
        name = config_file["name"]
        extension = config_file["extension"]

        if not os.path.exists(os.path.join(out_folder, name)):
            os.makedirs(os.path.join(out_folder, name))
        out_file_name = os.path.join(
            out_folder, name, f"{os.path.basename(input_file)}.{extension}"
        )
        if skip_if_file_exists and os.path.exists(out_file_name):
            print(f"Skipping... File exists for method: {name} for file: {input_file}")
            continue
        output_file, compression_time_ns = compress_with_config(
            config_file, input_file, out_file_name
        )
        if output_file != "":
            _, decompression_time_ns = decompress_with_config(config_file, output_file)

            file_size = os.path.getsize(input_file)
            compressed_size = os.path.getsize(output_file)
            result_list.append(
                {
                    "filename": input_file,
                    "name": name,
                    "file_size": file_size,
                    "compressed_size": compressed_size,
                    "compression_time_ns": compression_time_ns,
                    "decompression_time_ns": decompression_time_ns,
                }
            )
    return result_list

get_configs()

Retrieves compression configuration data from JSON files located in the compression_configs directory.

Returns:

Type Description

List[Dict[str, Any]]: A list of dictionaries, each containing the configuration data loaded from a JSON file.

Source code in xcompress/batch_ops.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
def get_configs():
    """
    Retrieves compression configuration data from JSON files located in the `compression_configs` directory.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries, each containing the configuration data loaded from a JSON file.
    """
    directory = os.path.join(os.path.dirname(__file__), "compression_configs")
    files = os.listdir(directory)
    config_data = []
    for file in files:
        if file.endswith(".json"):
            file_path = os.path.join(directory, file)
            with open(file_path, "r") as f:
                config_data.append(json.load(f))
    return config_data

benchmark()

Main function for running benchmarks. Allows the user to select configurations, benchmark type, and output options.

Source code in xcompress/benchmark.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def benchmark():
    """
    Main function for running benchmarks. Allows the user to select configurations, benchmark type, and output options.
    """
    selected_configs = select_config()
    if selected_configs is None:
        return
    benchmark_type = select_benchmark_type()
    if benchmark_type is None:
        print("No benchmark selected.")
    else:
        filename = input("\033[1mEnter input filename: \033[0m")
        output_filename = input("\033[1mEnter output filename (optional): \033[0m")
        output_to_file = read_boolean_input("Output to file? (y/n): ")
        output_plots = read_boolean_input("Output plots? (y/n): ")
        benchmark_param(
            [x["name"] for x in selected_configs],
            benchmark_type,
            filename,
            output_filename,
            output_to_file,
            output_plots,
        )

benchmark_param(selected_config_names, benchmark_type, filename, output_filename, output_to_file=False, output_plots=False)

Benchmarks the selected compression configurations and either outputs results to a file or displays them.

Parameters:

Name Type Description Default
selected_config_names List[str]

List of names of selected configurations (compression methods).

required
benchmark_type str

Type of benchmark ("compress" or "compress_decompress").

required
filename str

Path to the input file.

required
output_filename str

Path to the output file.

required
output_to_file bool

If True, results are saved to a file.

False
output_plots bool

If True, plots are generated and displayed.

False
Source code in xcompress/benchmark.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def benchmark_param(
    selected_config_names,
    benchmark_type,
    filename,
    output_filename,
    output_to_file=False,
    output_plots=False,
):
    """
    Benchmarks the selected compression configurations and either outputs results to a file or displays them.

    Args:
        selected_config_names (List[str]): List of names of selected configurations (compression methods).
        benchmark_type (str): Type of benchmark ("compress" or "compress_decompress").
        filename (str): Path to the input file.
        output_filename (str): Path to the output file.
        output_to_file (bool, optional): If True, results are saved to a file.
        output_plots (bool, optional): If True, plots are generated and displayed.
    """
    try:
        configs_folder = "compression_configs"
        configs = load_configs(configs_folder)
        selected_configs = [get_config(configs, x) for x in selected_config_names]
        result_list = []
        for config in selected_configs:
            print("\033[1mSelected compression algorithm:\033[0m", config["name"])
            print("\033[1mInput filename:\033[0m", filename)
            print("\033[1mOutput filename:\033[0m", output_filename)
            if benchmark_type == "compress":
                output_file, compression_time_ns = compress_with_config(
                    config, filename, output_filename
                )
                file_size = os.path.getsize(filename)
                compressed_size = os.path.getsize(output_file)
                result_list.append(
                    {
                        "filename": filename,
                        "name": config["name"],
                        "file_size": file_size,
                        "compressed_size": compressed_size,
                        "compression_time_ns": compression_time_ns,
                    }
                )
            elif benchmark_type == "compress_decompress":
                output_file, compression_time_ns = compress_with_config(
                    config, filename, output_filename
                )
                file_size = os.path.getsize(filename)
                if output_file != "":
                    _, decompression_time_ns = decompress_with_config(config, output_file)
                compressed_size = os.path.getsize(output_file)
                result_list.append(
                    {
                        "filename": filename,
                        "name": config["name"],
                        "file_size": file_size,
                        "compressed_size": compressed_size,
                        "compression_time_ns": compression_time_ns,
                        "decompression_time_ns": decompression_time_ns,
                    }
                )

        if output_to_file:
            with open("benchmark_results.json", "w") as file:
                json.dump(result_list, file, indent=4)
        else:
            print("Results:")
            for result in result_list:
                print("Filename:", result["filename"])
                print("Name:", result["name"])
                print("File Size:", result["file_size"])
                print("Compressed Size:", result["compressed_size"])
                print("Compression Time (ns):", result["compression_time_ns"])
                print(
                    "Decompression Time (ns):", result.get("decompression_time_ns", "N/A")
                )
                print()

        if output_plots:
            visualization_param(result_list)
    except Exception as e:
        print(f"Error: {e}")

display_select_benchmark_menu(menu_options, selected_index)

Displays the benchmark type selection menu.

Parameters:

Name Type Description Default
menu_options List[str]

List of benchmark type options.

required
selected_index int

Index of the currently selected option.

required
Source code in xcompress/benchmark.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def display_select_benchmark_menu(menu_options, selected_index):
    """
    Displays the benchmark type selection menu.

    Args:
        menu_options (List[str]): List of benchmark type options.
        selected_index (int): Index of the currently selected option.
    """
    clear_screen()
    print("Select benchmark type:")
    for i, option in enumerate(menu_options):
        if i == selected_index:
            print("\033[1;32m-> {}\033[0m".format(option))
        else:
            print("   {}".format(option))

print_menu(options, selected_rows, config_count, current_row)

Prints the menu for selecting compression algorithms.

Parameters:

Name Type Description Default
options List[Dict[str, str]]

List of defined compression algorithms with configuration files.

required
selected_rows Set[int]

Set of indices of selected options.

required
config_count int

Total number of configuration options.

required
current_row int

The currently selected row in the menu.

required
Source code in xcompress/benchmark.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def print_menu(options, selected_rows, config_count, current_row):
    """
    Prints the menu for selecting compression algorithms.

    Args:
        options (List[Dict[str, str]]): List of defined compression algorithms with configuration files.
        selected_rows (Set[int]): Set of indices of selected options.
        config_count (int): Total number of configuration options.
        current_row (int): The currently selected row in the menu.
    """
    clear_screen()
    print(f"{config_count} configuration file(s) found.\n")
    print("Use spacebar to select algorithms; selected ones will be shown in green")
    print("Select Compression Algorithm(s):")

    # Print "Back to main menu" option
    if current_row == 0:
        print("\033[1;32m->\033[0m", "\033[1;32m", "Back to main menu", "\033[0m")
    else:
        print("   ", "Back to main menu")

    # Print the rest of the options
    for idx, option in enumerate(options):
        if idx in selected_rows:
            if (
                current_row == idx + 1
            ):  # Shift by 1 to account for "Back to main menu" option
                print("\033[1;32m->\033[0m", "\033[1;32m", option["name"], "\033[0m")
            else:
                print("\033[1;32m", option["name"], "\033[0m")
        else:
            if (
                current_row == idx + 1
            ):  # Shift by 1 to account for "Back to main menu" option
                print("\033[1;32m->\033[0m", option["name"])
            else:
                print("   ", option["name"])

    # Print the "Accept" option
    if len(options) + 1 in selected_rows:  # Adjusted index for the Accept option
        if (
            current_row == len(options) + 1
        ):  # Shift by 1 to account for "Back to main menu" option
            print("\033[1;32m->\033[0m", "\033[1;32m", "Accept", "\033[0m")
        else:
            print("\033[1;32m", "Accept", "\033[0m")
    else:
        if (
            current_row == len(options) + 1
        ):  # Shift by 1 to account for "Back to main menu" option
            print("\033[1;32m->\033[0m", "Accept")
        else:
            print("   ", "Accept")

read_boolean_input(prompt)

Reads a boolean input from the user.

Parameters:

Name Type Description Default
prompt str

The prompt message to display.

required

Returns:

Name Type Description
bool

True if 'y' is entered, False if 'n' is entered.

Source code in xcompress/benchmark.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
def read_boolean_input(prompt):
    """
    Reads a boolean input from the user.

    Args:
        prompt (str): The prompt message to display.

    Returns:
        bool: True if 'y' is entered, False if 'n' is entered.
    """
    print(prompt)
    while True:
        key = readchar.readkey()
        if key.lower() == "y":
            return True
        elif key.lower() == "n":
            return False

select_benchmark_type()

Allows the user to select a benchmark type from a menu.

Returns:

Name Type Description
str

The selected benchmark type ("compress" or "compress_decompress"), or None if canceled.

Source code in xcompress/benchmark.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def select_benchmark_type():
    """
    Allows the user to select a benchmark type from a menu.

    Returns:
        str: The selected benchmark type ("compress" or "compress_decompress"), or None if canceled.
    """
    menu_options = ["Back to menu", "Compress", "Compress-decompress"]
    selected_index = 0

    display_select_benchmark_menu(menu_options, selected_index)

    while True:
        key = readchar.readkey()
        if key == readchar.key.ENTER:
            if selected_index == 0:
                return None
            elif selected_index == 1:
                return "compress"
            elif selected_index == 2:
                return "compress_decompress"
        elif key == readchar.key.UP:
            selected_index = (selected_index - 1) % 3
            display_select_benchmark_menu(menu_options, selected_index)
        elif key == readchar.key.DOWN:
            selected_index = (selected_index + 1) % 3
            display_select_benchmark_menu(menu_options, selected_index)

select_config(prev_configs=None)

Allows the user to select compression configurations from a menu.

Parameters:

Name Type Description Default
prev_configs Set[int]

Previously selected configurations.

None

Returns:

Type Description

Set[int]: Set of indices of selected configurations.

Source code in xcompress/benchmark.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def select_config(prev_configs=None):
    """
    Allows the user to select compression configurations from a menu.

    Args:
        prev_configs (Set[int], optional): Previously selected configurations.

    Returns:
        Set[int]: Set of indices of selected configurations.
    """
    configs_folder = "compression_configs"
    global configs
    configs = load_configs(configs_folder)
    if prev_configs is None:
        selected_configs = set()
    else:
        selected_configs = prev_configs
    current_row = 0
    continue_selection = True
    while continue_selection:
        print_menu(configs, selected_configs, len(configs), current_row)
        key = readchar.readkey()
        if key == readchar.key.UP and current_row > 0:
            current_row -= 1
        elif key == readchar.key.DOWN and current_row < len(configs) + 1:
            current_row += 1
        elif key in ("\r", "\n"):
            if current_row == 0:
                return None
            if current_row == len(configs) + 1:
                return selected_configs
        elif key == " ":
            if current_row != len(configs) + 2:
                if current_row == 0:
                    return None
                elif current_row - 1 in selected_configs:
                    selected_configs.remove(current_row - 1)
                else:
                    selected_configs.add(current_row - 1)

brute_force_compression()

Prompts the user for input and output file details, and performs brute-force compression using all available configurations and selects the one with the minimum compressed size.

Source code in xcompress/brute_force.py
87
88
89
90
91
92
93
94
95
96
97
98
99
def brute_force_compression():
    """
    Prompts the user for input and output file details, and performs brute-force compression using all available configurations and selects the one with the minimum compressed size.
    """
    filename = input("\033[1mEnter input filename: \033[0m")
    output_filename = input("\033[1mEnter output filename (optional): \033[0m")
    delete_except_minimum = input("\033[1mKeep only minimum sized file([Y]/n) \033[0m")

    delete_except_minimum = delete_except_minimum.lower() in ["y", ""]

    return brute_force_param(
        filename, os.path.dirname(__file__), output_filename, delete_except_minimum
    )

brute_force_param(filename, out_folder, delete_except_minimum=False)

Performs brute-force compression using all available configurations and selects the one with the minimum compressed size.

Parameters:

Name Type Description Default
filename str

Path to the input file.

required
out_folder str

Directory where output files are stored.

required
output_filename str

Path for the output file.

required
delete_except_minimum bool

If True, deletes all files except the one with the minimum compressed size.

False
Source code in xcompress/brute_force.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def brute_force_param(filename, out_folder, delete_except_minimum=False):
    """
    Performs brute-force compression using all available configurations and selects the one with the minimum compressed size.

    Args:
        filename (str): Path to the input file.
        out_folder (str): Directory where output files are stored.
        output_filename (str): Path for the output file.
        delete_except_minimum (bool, optional): If True, deletes all files except the one with the minimum compressed size.
    """
    result_list = []
    configs_folder = "compression_configs"
    configs = load_configs(configs_folder)

    for config_file in configs:
        name = config_file["name"]
        try:
            extension = config_file["extension"]
            print(f"Trying {name}...")
            out_file_name = os.path.join(
                out_folder, f"{os.path.basename(filename)}.{extension}"
            )
            output_file, compression_time_ns = compress_with_config(
                config_file, filename, out_file_name
            )
            compressed_size = os.path.getsize(output_file)
            result_list.append(
                {
                    "name": name,
                    "compressed_size": compressed_size,
                    "output_file": output_file,
                }
            )
        except Exception as e:
            print(f"Error getting results for {name}. Error message:", e)

    # Determine the compression configuration with the minimum size
    min_size_name = min(result_list, key=lambda x: x["compressed_size"])["name"]

    if delete_except_minimum:
        for result in result_list:
            if result["name"] != min_size_name:
                os.remove(result["output_file"])

    selected_config = get_config(configs, min_size_name)

    output, _ = compress_with_config(selected_config, filename, "")

    print("\033[1mSelected compression algorithm:\033[0m", min_size_name)
    print("\033[1mInput filename:\033[0m", filename)
    print("\033[1mOutput filename:\033[0m", output)

    print(f"Compression completed successfully. Filename is \033[1m{output}\033[0m")
    input("Press any key to exit")
    return selected_config

print_menu(selected_row, options)

Prints a menu for selecting compression modes.

Parameters:

Name Type Description Default
selected_row int

The index of the currently selected row.

required
options List[str]

List of compression mode options.

required
Source code in xcompress/brute_force.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def print_menu(selected_row, options):
    """
    Prints a menu for selecting compression modes.

    Args:
        selected_row (int): The index of the currently selected row.
        options (List[str]): List of compression mode options.
    """
    clear_screen()
    print("Select Compression Mode:")
    for idx, option in enumerate(options):
        if idx == 0:
            if selected_row == 0:
                print(
                    "\033[1;32m->\033[0m", "\033[1;32m", "Back to main menu", "\033[0m"
                )
            else:
                print("\033[32m   ", "Back to main menu", "\033[0m")
        elif selected_row == idx:
            print("\033[1;32m->\033[0m", "\033[1;32m", option, "\033[0m")
        else:
            print("\033[32m   ", option, "\033[0m")

compress_with_config(config_data, input_file, output_file='')

Compresses a file using specified compression configuration.

Parameters:

Name Type Description Default
config_data dict

Dictionary containing compression parameters, executable path, and file parameter placeholders.

required
input_file str

Path to the input file to be compressed.

required
output_file str

Path to the output file. If not provided, defaults to generating a file based on config.

''

Returns:

Name Type Description
tuple

A tuple containing the output file path and the compression execution time in nanoseconds.

Source code in xcompress/compress.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def compress_with_config(config_data, input_file, output_file=""):
    """
    Compresses a file using specified compression configuration.

    Args:
        config_data (dict): Dictionary containing compression parameters, executable path, and file parameter placeholders.
        input_file (str): Path to the input file to be compressed.
        output_file (str, optional): Path to the output file. If not provided, defaults to generating a file based on config.

    Returns:
        tuple: A tuple containing the output file path and the compression execution time in nanoseconds.
    """
    try:
        # Check if the executable is available
        executable_path = config_data["executable_path"]
        if not shutil.which(executable_path):
            raise FileNotFoundError(
                f"Executable {executable_path} not found on the system"
            )

        compression_params = config_data["compression_params"]

        input_file_param = config_data["input_file_param"]
        input_file_param = input_file_param.replace("{input_file}", input_file)
        compression_params = [
            input_file_param if x == "@input_file_param" else x
            for x in compression_params
        ]

        output_file_param = config_data["output_file_param"]
        if not output_file:
            if output_file_param != "stdout":
                output_file = output_file_param.replace(
                    "{output_file}", f"{input_file}.{config_data['extension']}"
                )
                compression_params = [
                    (
                        x.replace("@output_file_param", output_file)
                        if "@output_file_param" in x
                        else x
                    )
                    for x in compression_params
                ]
            else:
                output_file = f"{input_file}.{config_data['extension']}"
        else:
            output_file_param = output_file_param.replace("{output_file}", output_file)
            compression_params = [
                (
                    x.replace("@output_file_param", output_file)
                    if "@output_file_param" in x
                    else x
                )
                for x in compression_params
            ]

        # Construct the command for compression
        command = [config_data["executable_path"]]
        command.extend(compression_params)

        # Start the timer
        start_time = timeit.default_timer()

        # Execute the compression command
        if output_file_param == "stdout":
            with open(output_file, "wb") as outfile:
                process = subprocess.Popen(
                    command, stdout=outfile, stderr=subprocess.PIPE
                )
                process.communicate()
        else:
            process = subprocess.Popen(command, stderr=subprocess.PIPE)
            process.wait()

        # End the timer
        end_time = timeit.default_timer()
        execution_time_ns = (end_time - start_time) * 1e9

    except Exception as e:
        print(f"Error during compression: {e}")
        return "", 0

    return output_file, execution_time_ns

create_config()

Collects configuration inputs from the user, creates a configuration dictionary, and saves it to a file in the 'compression_configs' folder.

Source code in xcompress/create_config.py
106
107
108
109
110
111
112
113
114
115
def create_config():
    """
    Collects configuration inputs from the user, creates a configuration dictionary,
    and saves it to a file in the 'compression_configs' folder.
    """
    config = get_config_input()
    save_config_to_file(
        config, os.path.join(os.path.dirname(__file__), "compression_configs")
    )
    input("Press any key to continue...")

create_config_param(name, executable_path, input_file_param, output_file_param, compression_params, decompression_params, extension)

Creates a configuration dictionary from given parameters.

Parameters:

Name Type Description Default
name str

The name of the configuration.

required
executable_path str

Path to the executable.

required
input_file_param str

Input file parameter.

required
output_file_param str

Output file parameter.

required
compression_params list

List of compression parameters.

required
decompression_params list

List of decompression parameters.

required
extension str

File extension.

required

Returns:

Name Type Description
dict

A configuration dictionary.

Source code in xcompress/create_config.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def create_config_param(
    name,
    executable_path,
    input_file_param,
    output_file_param,
    compression_params,
    decompression_params,
    extension,
):
    """
    Creates a configuration dictionary from given parameters.

    Args:
        name (str): The name of the configuration.
        executable_path (str): Path to the executable.
        input_file_param (str): Input file parameter.
        output_file_param (str): Output file parameter.
        compression_params (list): List of compression parameters.
        decompression_params (list): List of decompression parameters.
        extension (str): File extension.

    Returns:
        dict: A configuration dictionary.
    """
    config = {
        "name": name,
        "executable_path": executable_path,
        "input_file_param": input_file_param + "{input_file}",
        "output_file_param": (
            output_file_param + "{output_file}"
            if output_file_param != "stdout"
            else "stdout"
        ),
        "compression_params": compression_params,
        "decompression_params": decompression_params,
        "extension": extension,
    }

    return config

get_config_input()

Prompts the user for configuration parameters and returns a dictionary containing the inputs.

Returns:

Name Type Description
dict

A dictionary with configuration parameters.

Source code in xcompress/create_config.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def get_config_input():
    """
    Prompts the user for configuration parameters and returns a dictionary containing the inputs.

    Returns:
        dict: A dictionary with configuration parameters.
    """
    config = {}

    print("Enter configuration parameters:")

    config["name"] = input("Name: ").strip()
    config["executable_path"] = input(
        "Executable Path \n Enter just the name if it exists on path, full path otherwise: "
    ).strip()
    config["input_file_param"] = input(
        "Input File Parameter\n (If input file needs special prefix or suffix)\n [Press Enter for default]: "
    ).strip()
    config["input_file_param"] += " {input_file}"
    config["output_file_param"] = input(
        "Output File Parameter\n (If output file needs special prefix or suffix\n if algorithm gives output to console, use stdout keyword)\n [Press Enter for default]: "
    ).strip()
    if config["output_file_param"] != "stdout":
        config["output_file_param"] += " {output_file}"
    config["compression_params"] = [
        x.strip()
        for x in input(
            "Compression Parameters (comma-separated)\n For example, if algorithm uses -c for compression and -k for keeping files -c,-k can be entered: "
        ).split(",")
    ]
    config["decompression_params"] = [
        x.strip()
        for x in input(
            "Decompression Parameters (comma-separated) \n For example, if algorithm uses -d for decompression and -k for keeping files -d,-k can be entered: "
        ).split(",")
    ]
    config["extension"] = input(
        "Extension \n If stdout is used, extension parameter will be used as compressed file extension: "
    ).strip()

    return config

save_config_to_file(config, folder_path)

Saves the configuration dictionary to a JSON file in the specified folder.

Parameters:

Name Type Description Default
config dict

The configuration dictionary.

required
folder_path str

The path to the folder where the config file will be saved (compression_configs preferred).

required
Source code in xcompress/create_config.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def save_config_to_file(config, folder_path):
    """
    Saves the configuration dictionary to a JSON file in the specified folder.

    Args:
        config (dict): The configuration dictionary.
        folder_path (str): The path to the folder where the config file will be saved (compression_configs preferred).
    """
    name = config["name"]
    file_path = os.path.join(folder_path, f"{name}.json")

    with open(file_path, "w") as file:
        json.dump(config, file, indent=4)

    print(f"Configurations saved to {file_path}")

model_compression()

Displays the model compression menu and handles user selection.

Source code in xcompress/csm.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def model_compression():
    """
    Displays the model compression menu and handles user selection.
    """
    mode_list = [
        "Back to main menu",
        "Fast Compression",
        "Fast Decompression",
        "Best Compression",
    ]
    current_row = 0  # Start from the first option

    while True:
        print_menu(current_row, mode_list)
        key = readchar.readkey()

        if key == readchar.key.UP and current_row > 0:
            current_row -= 1
        elif key == readchar.key.DOWN and current_row < len(mode_list) - 1:
            current_row += 1
        elif key in {"\r", "\n"}:
            if current_row == 0:
                return
            mode = current_row  # Mode is directly mapped from the row index

            filename = input("\033[1mEnter input filename: \033[0m").strip()
            output_filename = input(
                "\033[1mEnter output filename (optional): \033[0m"
            ).strip()

            if not filename:
                print("Error: Input filename cannot be empty.")
                continue

            model_compression_param(filename, mode_list[mode], output_filename)

model_compression_param(mode, filename, output_filename)

Executes compression based on the selected algorithm and mode.

Parameters:

Name Type Description Default
mode str

The compression mode (fast-compress, fast-decompress or best-compress).

required
filename str

The input file to compress.

required
output_filename str

The name of the output file.

required
Source code in xcompress/csm.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def model_compression_param(mode, filename, output_filename):
    """
    Executes compression based on the selected algorithm and mode.

    Args:
        mode (str): The compression mode (fast-compress, fast-decompress or best-compress).
        filename (str): The input file to compress.
        output_filename (str): The name of the output file.
    """
    configs_folder = "compression_configs"
    configs = load_configs(configs_folder)

    # Detect the algorithm based on filename and mode
    selected_algorithm = detect_algorithm(filename, mode.lower().replace(" ", "-"))
    selected_config = get_config(configs, selected_algorithm)

    if selected_config is None:
        print(f"Error: No configuration found for algorithm {selected_algorithm}.")
        input("Press any key to return to menu")
        return


    try:
        print("\033[1mSelected compression algorithm:\033[0m", selected_algorithm)
        print("\033[1mInput filename:\033[0m", filename)
        print("\033[1mOutput filename:\033[0m", output_filename)
        output, _ = compress_with_config(selected_config, filename, output_filename)
        print(f"Compression completed successfully. Filename is \033[1m{output}\033[0m")
    except Exception as e:
        print(f"Error during compression: {e}")

    input("Press any key to return to menu")

print_menu(selected_row, options)

Prints the menu options, highlighting the selected option.

Parameters:

Name Type Description Default
selected_row int

Index of the currently selected option.

required
options list of str

List of menu options to display.

required
Source code in xcompress/csm.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
def print_menu(selected_row, options):
    """
    Prints the menu options, highlighting the selected option.

    Args:
        selected_row (int): Index of the currently selected option.
        options (list of str): List of menu options to display.
    """
    clear_screen()
    print("Select Compression Mode:")
    for idx, option in enumerate(options):
        if selected_row == idx:
            print("\033[1;32m->\033[0m", "\033[1;32m", option, "\033[0m")
        else:
            print("\033[37m   ", option, "\033[0m")  # Set to white

decompress_with_config(config_data, input_file, output_file='')

Decompresses a file using the specified configuration parameters.

Parameters:

Name Type Description Default
config_data dict

A dictionary containing configuration details such as executable path, decompression parameters, and file parameters.

required
input_file str

Path to the file that needs to be decompressed.

required
output_file str

Path to the file where the decompressed output should be saved. If not specified, a default name based on the input file is used.

''

Returns:

Name Type Description
tuple

A tuple containing: - output_file (str): Path to the decompressed file. - execution_time_ns (float): Time taken for decompression in nanoseconds.

Source code in xcompress/decompress.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def decompress_with_config(config_data, input_file, output_file=""):
    """
    Decompresses a file using the specified configuration parameters.

    Args:
        config_data (dict): A dictionary containing configuration details such as
                            executable path, decompression parameters, and file parameters.
        input_file (str): Path to the file that needs to be decompressed.
        output_file (str): Path to the file where the decompressed output should be saved.
                           If not specified, a default name based on the input file is used.

    Returns:
        tuple: A tuple containing:
            - output_file (str): Path to the decompressed file.
            - execution_time_ns (float): Time taken for decompression in nanoseconds.
    """
    try:
        executable_path = config_data["executable_path"]
        if not shutil.which(executable_path):
            raise FileNotFoundError(
                f"Executable {executable_path} not found on the system"
            )

        decompression_params = config_data["decompression_params"]

        input_file_param = config_data["input_file_param"]
        input_file_param = input_file_param.replace("{input_file}", input_file)
        decompression_params = [
            input_file_param if x == "@input_file_param" else x
            for x in decompression_params
        ]

        output_file_param = config_data["output_file_param"]
        if output_file == "":
            base_filename, _ = os.path.splitext(input_file)
            file_name_without_extension, extension = os.path.splitext(base_filename)
            output_file = f"{file_name_without_extension}.o{extension}"
            if output_file_param != "stdout":
                decompression_params = [
                    (
                        x.replace("@output_file_param", output_file)
                        if "@output_file_param" in x
                        else x
                    )
                    for x in decompression_params
                ]
        else:
            output_file_param = output_file_param.replace("{output_file}", output_file)
            decompression_params = [
                (
                    x.replace("@output_file_param", output_file)
                    if "@output_file_param" in x
                    else x
                )
                for x in decompression_params
            ]

        command = [config_data["executable_path"]]
        command.extend(decompression_params)

        start_time = timeit.default_timer()

        if output_file_param == "stdout":
            with open(output_file, "wb") as outfile:
                process = subprocess.Popen(
                    command, stdout=outfile, stderr=subprocess.PIPE
                )
        else:
            process = subprocess.Popen(command, stderr=subprocess.PIPE)
            process.wait()

        end_time = timeit.default_timer()
        execution_time_ns = (end_time - start_time) * 1e9

    except Exception as e:
        print(f"An error occurred: {e}")
        return "", 0

    return output_file, execution_time_ns

detect_algorithm(filename, mode)

Detects the likely compression algorithm for a given file based on its characteristics and the specified mode.

Parameters:

Name Type Description Default
filename str

Path to the file for which the algorithm needs to be detected.

required
mode str

Compression mode to be used for detection ("fast-compression", "best-compression","fast-decompression").

required

Returns:

Name Type Description
str

The name of the detected compression algorithm.

Source code in xcompress/llm_model.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def detect_algorithm(filename, mode):
    """
    Detects the likely compression algorithm for a given file based on its characteristics and the specified mode.

    Args:
        filename (str): Path to the file for which the algorithm needs to be detected.
        mode (str): Compression mode to be used for detection ("fast-compression", "best-compression","fast-decompression").

    Returns:
        str: The name of the detected compression algorithm.
    """
    device = "cuda" if torch.cuda.is_available() else "mps"
    base_model_name = "emirozturk/CSM"
    model = AutoModelForCausalLM.from_pretrained(base_model_name).to(device)
    tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side="right")
    tokenizer.pad_token = tokenizer.eos_token

    usc = bin_usc(count_unique_symbols(filename))
    file_size = round_to_class(get_file_size(filename))
    eval_prompt = f"### Instruction: We need to find algorithm from given input params: (usc:{usc}, file_size:{file_size}, compression_type: {mode})."
    model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)

    model.eval()
    with torch.no_grad():
        result = tokenizer.decode(
            model.generate(**model_input, max_new_tokens=150, repetition_penalty=1.15)[
                0
            ],
            skip_special_tokens=True,
        )
    algorithm = result.split("The algorithm is: ")[1].strip().replace(".", "")
    return algorithm

CustomHelpAction

Bases: Action

A custom argparse action that displays help information for both the main parser and its subparsers.

This action overrides the default help behavior to include detailed help messages for all available subcommands.

Methods:

Name Description
__init__

Initializes the CustomHelpAction instance.

__call__

Displays help information for the parser and its subparsers.

Source code in xcompress/main.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class CustomHelpAction(argparse.Action):
    """
    A custom argparse action that displays help information for both the main parser and its subparsers.

    This action overrides the default help behavior to include detailed help messages for all available subcommands.

    Methods:
        __init__: Initializes the CustomHelpAction instance.
        __call__: Displays help information for the parser and its subparsers.
    """

    def __init__(
        self,
        option_strings,
        dest=argparse.SUPPRESS,
        default=argparse.SUPPRESS,
        help=None,
    ):
        """
        Initializes the CustomHelpAction.

        Args:
            option_strings (list): The option strings for this action.
            dest (str): The destination for the parsed value (default: argparse.SUPPRESS).
            default (any): The default value (default: argparse.SUPPRESS).
            help (str): The help message (default: None).
        """
        super().__init__(
            option_strings=option_strings,
            dest=dest,
            default=default,
            nargs=0,
            help=help,
        )

    def __call__(self, parser, namespace, values, option_string=None):
        """
        Displays help information for the parser and its subparsers.

        Args:
            parser (argparse.ArgumentParser): The parser to display help for.
            namespace (argparse.Namespace): The namespace for the parsed values.
            values (any): The values for the action (default: None).
            option_string (str): The option string (default: None).
        """
        parser.print_help()
        subparsers_actions = [
            action
            for action in parser._actions
            if isinstance(action, argparse._SubParsersAction)
        ]
        for subparsers_action in subparsers_actions:
            for choice, subparser in subparsers_action.choices.items():
                print(f"{choice}:")
                print(subparser.format_help())
        parser.exit()

__call__(parser, namespace, values, option_string=None)

Displays help information for the parser and its subparsers.

Parameters:

Name Type Description Default
parser ArgumentParser

The parser to display help for.

required
namespace Namespace

The namespace for the parsed values.

required
values any

The values for the action (default: None).

required
option_string str

The option string (default: None).

None
Source code in xcompress/main.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def __call__(self, parser, namespace, values, option_string=None):
    """
    Displays help information for the parser and its subparsers.

    Args:
        parser (argparse.ArgumentParser): The parser to display help for.
        namespace (argparse.Namespace): The namespace for the parsed values.
        values (any): The values for the action (default: None).
        option_string (str): The option string (default: None).
    """
    parser.print_help()
    subparsers_actions = [
        action
        for action in parser._actions
        if isinstance(action, argparse._SubParsersAction)
    ]
    for subparsers_action in subparsers_actions:
        for choice, subparser in subparsers_action.choices.items():
            print(f"{choice}:")
            print(subparser.format_help())
    parser.exit()

__init__(option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help=None)

Initializes the CustomHelpAction.

Parameters:

Name Type Description Default
option_strings list

The option strings for this action.

required
dest str

The destination for the parsed value (default: argparse.SUPPRESS).

SUPPRESS
default any

The default value (default: argparse.SUPPRESS).

SUPPRESS
help str

The help message (default: None).

None
Source code in xcompress/main.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    option_strings,
    dest=argparse.SUPPRESS,
    default=argparse.SUPPRESS,
    help=None,
):
    """
    Initializes the CustomHelpAction.

    Args:
        option_strings (list): The option strings for this action.
        dest (str): The destination for the parsed value (default: argparse.SUPPRESS).
        default (any): The default value (default: argparse.SUPPRESS).
        help (str): The help message (default: None).
    """
    super().__init__(
        option_strings=option_strings,
        dest=dest,
        default=default,
        nargs=0,
        help=help,
    )

main()

The main entry point for the XCompress tool.

This function sets up the command-line argument parser and handles user input through either command-line arguments or an interactive menu. - For command-line arguments, it parses the arguments and calls the appropriate function based on the specified command. - For interactive mode, it displays a menu for the user to select various functionalities such as selecting a compression algorithm, benchmarking, creating configurations, and visualizing results.

Source code in xcompress/main.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
def main():
    """
    The main entry point for the XCompress tool.

    This function sets up the command-line argument parser and handles user input through either command-line arguments or an interactive menu.
    - For command-line arguments, it parses the arguments and calls the appropriate function based on the specified command.
    - For interactive mode, it displays a menu for the user to select various functionalities such as selecting a compression algorithm, benchmarking, creating configurations, and visualizing results.
    """
    parser = argparse.ArgumentParser(
        description="XCompress Compression Tool", add_help=False
    )
    parser.add_argument(
        "-h", "--help", action=CustomHelpAction, help="show this help message and exit"
    )

    subparsers = parser.add_subparsers(dest="command", required=False)

    # Select Compression (with help message)
    parser_select_compression = subparsers.add_parser(
        "manual", help="Selects a specific compression algorithm for the input file."
    )
    parser_select_compression.add_argument(
        "algorithm_name", help="The name of the compression algorithm to use."
    )
    parser_select_compression.add_argument(
        "input_filename", help="The path to the file to compress."
    )
    parser_select_compression.add_argument(
        "--output_filename",
        default=None,
        help="The path to save the compressed file (defaults to input filename with .<algorithm> extension).",
    )

    # Model Compression (with help message)
    parser_model_compression = subparsers.add_parser(
        "csm",
        help="Uses CSM (LLM) to automatically select the best compression algorithm based on your preferences (Fast compression, Fast decompression or Best compression).",
    )
    parser_model_compression.add_argument(
        "mode",
        choices=["fast-compression", "fast-decompression", "best-compression"],
        help="The optimization mode for CSM (fast compression, fast decompression or best compression).",
    )
    parser_model_compression.add_argument(
        "input_filename", help="The path to the file to compress."
    )
    parser_model_compression.add_argument(
        "--output_filename",
        default=None,
        help="The path to save the compressed file (defaults to input filename with .<algorithm> extension).",
    )

    # Brute Force Compression (with help message)
    parser_brute_force = subparsers.add_parser(
        "brute_force",
        help="Finds the best compression algorithm for the input file by trying all available algorithms and keeping the one with the smallest size.",
    )
    parser_brute_force.add_argument(
        "input_filename", help="The path to the file to compress."
    )
    parser_brute_force.add_argument(
        "out_folder", help="Directory where output files are stored."
    )
    parser_brute_force.add_argument(
        "--delete_except_minimum",
        action="store_true",
        help="Whether to delete all compressed files except the one with the smallest size (default: False).",
    )

    # Benchmark (with help message)
    parser_benchmark = subparsers.add_parser(
        "benchmark",
        help="Runs benchmarks on specified compression configurations and outputs results to file or plots.",
    )
    parser_benchmark.add_argument(
        "benchmark_type",
        choices=["compress", "compress_decompress"],
        help="The type of benchmark to run (compression or compression-decompression).",
    )
    parser_benchmark.add_argument(
        "input_filename", help="The path to the file to compress."
    )
    parser_benchmark.add_argument(
        "--output_to_file",
        action="store_true",
        help="Whether to output results to a file",
    )
    parser_benchmark.add_argument(
        "--output_plots",
        action="store_true",
        help="Whether to generate plots from the benchmark results.",
    )
    parser_benchmark.add_argument(
        "algorithm_names",
        nargs="+",
        help="Space-separated list of config names to benchmark.",
    )
    parser_benchmark.add_argument(
        "--output_filename",
        default=None,
        help="The path to save the compressed file (defaults to input filename with .<algorithm> extension).",
    )

    # Create Config (with help message)
    parser_create_config = subparsers.add_parser(
        "config_creation",
        help="Creates a configuration file for a custom compression algorithm.",
    )
    parser_create_config.add_argument("name", help="The name of the new configuration.")
    parser_create_config.add_argument(
        "executable_path",
        help="The path to the executable for the compression algorithm.",
    )
    parser_create_config.add_argument(
        "--input_file_param",
        default=None,
        help="The parameter for the input file in the executable command (optional).",
    )
    parser_create_config.add_argument(
        "--output_file_param",
        default=None,
        help="The parameter for the output file in the executable command (optional).",
    )
    parser_create_config.add_argument(
        "--compression_params",
        nargs="*",
        help="List of parameters for the compression process.",
    )
    parser_create_config.add_argument(
        "--decompression_params",
        nargs="*",
        help="List of parameters for the decompression process (if applicable).",
    )
    parser_create_config.add_argument(
        "extension",
        help="The file extension to use for compressed files generated by this configuration.",
    )

    # Visualization (with help message)
    parser_visualization = subparsers.add_parser(
        "visualization", help="Visualizes benchmark results from specified JSON files."
    )
    parser_visualization.add_argument(
        "file_path", help="The path to the JSON file to visualize."
    )

    args = parser.parse_args()

    if args.command is None:
        current_row = 0
        print_menu(current_row)
        while True:
            key = readchar.readkey()
            if key == readchar.key.UP and current_row > 0:
                current_row -= 1
            elif key == readchar.key.DOWN and current_row < 5:
                current_row += 1
            elif key == "\r" or key == "\n":
                sys.stdout.write("\033[F")
                sys.stdout.write("\033[K")
                if current_row == 0:
                    select_compression()
                elif current_row == 1:
                    model_compression()
                elif current_row == 2:
                    brute_force_compression()
                elif current_row == 3:
                    benchmark()
                elif current_row == 4:
                    create_config()
                elif current_row == 5:
                    visualization()
            elif key.lower() == "q":
                break

            sys.stdout.write("\033[{}A".format(8))
            print_menu(current_row)
    else:
        if args.command == "manual":
            select_compression_param(
                args.algorithm_name, args.input_filename, args.output_filename
            )
        elif args.command == "csm":
            model_compression_param(
                args.mode, args.input_filename, args.output_filename
            )
        elif args.command == "brute_force":
            brute_force_param(
                args.input_filename, args.out_folder, args.delete_except_minimum
            )
        elif args.command == "benchmark":
            benchmark_param(
                args.algorithm_names,
                args.benchmark_type,
                args.input_filename,
                args.output_filename,
                args.output_to_file,
                args.output_plots,
            )
        elif args.command == "config_creation":
            create_config_param(
                args.name,
                args.executable_path,
                args.input_file_param,
                args.output_file_param,
                args.compression_params,
                args.decompression_params,
                args.extension,
            )
        elif args.command == "visualization":
            visualization_param(args.file_path)
        elif args.command == "help":
            for subparser in [
                parser_select_compression,
                parser_model_compression,
                parser_brute_force,
                parser_benchmark,
                parser_create_config,
                parser_visualization,
            ]:
                print("\n" + subparser.prog)
                subparser.print_help()

print_menu(selected_row)

Prints the main menu for the XCompress tool.

Parameters:

Name Type Description Default
selected_row int

The index of the currently selected menu item to highlight.

required
Source code in xcompress/main.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def print_menu(selected_row):
    """
    Prints the main menu for the XCompress tool.

    Args:
        selected_row (int): The index of the currently selected menu item to highlight.
    """
    clear_screen()
    print(
        "\n\033[1mXCompress\033[0m is a tool for selecting the best text compression algorithm for a given input."
    )
    print(
        "You can choose any algorithm you want, determine the best compression or fastest compression algorithm with CSM,"
    )
    print("and select the best compression algorithm with brute force.\n")
    print("Please make a selection:\n")
    for idx, row in enumerate(menu):
        if idx == selected_row:
            print("\033[1;32m->\033[0m", "\033[1;32m", row, "\033[0m")
        else:
            print("   ", row)
    print("Press q to quit")

print_menu(options, selected_row, config_count)

Displays a menu for selecting a compression algorithm from a list of available configurations.

Parameters:

Name Type Description Default
options list of dict

List of available compression configurations. Each configuration is a dictionary with a "name" key.

required
selected_row int

The index of the currently selected menu item to highlight.

required
config_count int

The total number of configuration files found.

required
Source code in xcompress/select_compression.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def print_menu(options, selected_row, config_count):
    """
    Displays a menu for selecting a compression algorithm from a list of available configurations.

    Args:
        options (list of dict): List of available compression configurations. Each configuration is a dictionary with a "name" key.
        selected_row (int): The index of the currently selected menu item to highlight.
        config_count (int): The total number of configuration files found.
    """
    clear_screen()
    print(f"{config_count-1} configuration file(s) found.\n")
    print("Select Compression Algorithm:")
    for idx, option in enumerate(options):
        if idx == 0:
            if selected_row == 0:
                print(
                    "\033[1;32m->\033[0m", "\033[1;32m", "Back to the menu", "\033[0m"
                )
            else:
                print("\033[32m   ", "Back to the menu", "\033[0m")
        elif idx == selected_row:
            print("\033[1;32m->\033[0m", "\033[1m", option["name"], "\033[0m")
        else:
            print("   ", option["name"])

select_compression()

Allows the user to select a compression algorithm from a menu and apply it to a specified file.

The function loads available compression configurations from a folder, displays them in a menu, and allows the user to navigate and select one. After selection, it prompts the user for input and output filenames and performs the compression.

Source code in xcompress/select_compression.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def select_compression():
    """
    Allows the user to select a compression algorithm from a menu and apply it to a specified file.

    The function loads available compression configurations from a folder, displays them in a menu, and allows the user to navigate and select one.
    After selection, it prompts the user for input and output filenames and performs the compression.
    """
    current_row = 0
    while True:
        configs_folder = "compression_configs"
        configs = load_configs(configs_folder)
        print_menu(configs, current_row, len(configs))
        key = readchar.readkey()

        if key == readchar.key.UP and current_row > 0:
            current_row -= 1
        elif key == readchar.key.DOWN and current_row < len(configs) - 1:
            current_row += 1
        elif key == "\r" or key == "\n":
            if current_row == 0:
                return
            else:
                filename = input("\033[1mEnter input filename: \033[0m")
                output_filename = input(
                    "\033[1mEnter output filename (optional): \033[0m"
                )
                select_compression_param(
                    configs[current_row]["name"], filename, output_filename
                )
                input("Press any key to return to menu")
                return

        elif key.lower() == "q":
            break

select_compression_param(selected_config_name, filename, output_filename)

Selects a specific compression configuration and applies it to the input file.

Parameters:

Name Type Description Default
selected_config_name str

The name of the selected compression configuration.

required
filename str

The path to the file to compress.

required
output_filename str

The path to save the compressed file (optional; defaults to input filename with an appropriate extension if not provided).

required
Source code in xcompress/select_compression.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def select_compression_param(selected_config_name, filename, output_filename):
    """
    Selects a specific compression configuration and applies it to the input file.

    Args:
        selected_config_name (str): The name of the selected compression configuration.
        filename (str): The path to the file to compress.
        output_filename (str): The path to save the compressed file (optional; defaults to input filename with an appropriate extension if not provided).
    """
    try:
        if not os.path.exists(filename):
            raise FileNotFoundError(f"File not found: {filename}")
        configs_folder = "compression_configs"
        configs = load_configs(configs_folder)
        selected_config = [x for x in configs if x["name"] == selected_config_name][0]
        print("\033[1mSelected compression algorithm:\033[0m", selected_config)
        print("\033[1mInput filename:\033[0m", filename)
        print("\033[1mOutput filename:\033[0m", output_filename)
        clear_screen()

        output = compress_with_config(selected_config, filename, output_filename)
        print(f"Compression completed successfully. Filename is \033[1m{output}\033[0m")
    except Exception as e:
        print(f"Error during compression: {e}")
        return "", 0

bin_usc(usc_value)

Bins the unique symbol count (USC) value into a multiple of 50. To use USC as a feature to train ai model, selected features must have limited values. If USC is given raw, accuracy decreases. To prevent this, USC values have been divided into groups of 50.

Parameters:

Name Type Description Default
usc_value int

The unique symbol count value.

required

Returns:

Name Type Description
int

The binned unique symbol count.

Source code in xcompress/util.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def bin_usc(usc_value):
    """
    Bins the unique symbol count (USC) value into a multiple of 50.
    To use USC as a feature to train ai model, selected features must have limited values. If USC is given raw, accuracy decreases.
    To prevent this, USC values have been divided into groups of 50.

    Args:
        usc_value (int): The unique symbol count value.

    Returns:
        int: The binned unique symbol count.
    """
    bin_size = 50
    return int((usc_value // bin_size) * bin_size)

clear_screen()

Clears the terminal screen.

The method used to clear the screen depends on the operating system: - Windows: Uses 'cls' command. - Other systems (e.g., Unix-based): Uses 'clear' command.

Source code in xcompress/util.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
def clear_screen():
    """
    Clears the terminal screen.

    The method used to clear the screen depends on the operating system:
    - Windows: Uses 'cls' command.
    - Other systems (e.g., Unix-based): Uses 'clear' command.
    """
    if platform.system() == "Windows":
        os.system("cls")
    else:
        os.system("clear")

count_unique_symbols(file_path)

Counts the number of unique symbols in a text file.

Parameters:

Name Type Description Default
file_path str

The path to the text file.

required

Returns:

Name Type Description
int

The number of unique symbols in the file.

Source code in xcompress/util.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def count_unique_symbols(file_path):
    """
    Counts the number of unique symbols in a text file.

    Args:
        file_path (str): The path to the text file.

    Returns:
        int: The number of unique symbols in the file.
    """
    with open(file_path, "r") as file:
        text = file.read()
        unique_symbols = set(text)
        return len(unique_symbols)

get_config(configs, selected_algorithm)

Retrieves a configuration dictionary based on the selected algorithm name.

Parameters:

Name Type Description Default
configs list of dict

List of configuration dictionaries.

required
selected_algorithm str

The name of the desired algorithm to find.

required

Returns:

Type Description

dict or None: The configuration dictionary for the selected algorithm, or None if not found.

Source code in xcompress/util.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def get_config(configs, selected_algorithm):
    """
    Retrieves a configuration dictionary based on the selected algorithm name.

    Args:
        configs (list of dict): List of configuration dictionaries.
        selected_algorithm (str): The name of the desired algorithm to find.

    Returns:
        dict or None: The configuration dictionary for the selected algorithm, or None if not found.
    """
    for config in configs:
        if config["name"] == selected_algorithm:
            return config
    return None

get_file_size(file_path)

Gets the size of a file in bytes.

Parameters:

Name Type Description Default
file_path str

The path to the file.

required

Returns:

Name Type Description
int

The size of the file in bytes.

Source code in xcompress/util.py
113
114
115
116
117
118
119
120
121
122
123
124
def get_file_size(file_path):
    """
    Gets the size of a file in bytes.

    Args:
        file_path (str): The path to the file.

    Returns:
        int: The size of the file in bytes.
    """
    size = os.path.getsize(file_path)
    return size

load_configs(folder_path)

Loads configuration files from a specified folder.

Parameters:

Name Type Description Default
folder_path str

The path to the folder containing configuration files.

required

Returns:

Type Description

list of dict: A list of configuration dictionaries loaded from JSON files.

Source code in xcompress/util.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def load_configs(folder_path):
    """
    Loads configuration files from a specified folder.

    Args:
        folder_path (str): The path to the folder containing configuration files.

    Returns:
        list of dict: A list of configuration dictionaries loaded from JSON files.
    """
    configs = []
    folder_path = os.path.join(os.path.dirname(__file__), folder_path)
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            with open(os.path.join(folder_path, filename), "r") as file:
                config = json.load(file)
                configs.append(config)
    return configs

round_to_class(file_size_bytes)

Rounds the file size to a specific class based on byte ranges.

Parameters:

Name Type Description Default
file_size_bytes int

The size of the file in bytes.

required

Returns:

Name Type Description
str

A string representing the rounded size class ("1kb", "10kb", "100kb", "1mb", "10mb", or "100mb").

Source code in xcompress/util.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def round_to_class(file_size_bytes):
    """
    Rounds the file size to a specific class based on byte ranges.

    Args:
        file_size_bytes (int): The size of the file in bytes.

    Returns:
        str: A string representing the rounded size class ("1kb", "10kb", "100kb", "1mb", "10mb", or "100mb").
    """
    if file_size_bytes < 1050:  # For 1025 issue
        return "1kb"
    elif file_size_bytes < 10500:  # For 10241 issue
        return "10kb"
    elif file_size_bytes < 105000:
        return "100kb"
    elif file_size_bytes < 1050000:
        return "1mb"
    elif file_size_bytes < 10500000:
        return "10mb"
    else:
        return "100mb"

read_results_from_file(file_path)

Reads the results in a JSON file.

Parameters:

Name Type Description Default
file_path str

Path to JSON file.

required

Returns:

Type Description

list of dict: A combined list of results from all specified JSON files.

Source code in xcompress/visualization.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def read_results_from_file(file_path):
    """
    Reads the results in a JSON file.

    Args:
        file_path (str): Path to JSON file.

    Returns:
        list of dict: A combined list of results from all specified JSON files.
    """
    all_results = []
    if os.path.exists(file_path):
        with open(file_path, "r") as file:
            results = json.load(file)
            all_results.extend(results)
    else:
        print(f"File not found: {file_path}")
    return all_results

visualization()

Main function to handle the visualization process: - Prompts the user for file paths. - Reads results from the specified files. - Generates and displays a bar chart visualizing the benchmark results.

Source code in xcompress/visualization.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def visualization():
    """
    Main function to handle the visualization process:
    - Prompts the user for file paths.
    - Reads results from the specified files.
    - Generates and displays a bar chart visualizing the benchmark results.
    """
    clear_screen()
    file_path = input("Enter the JSON file path")

    if file_path:
        visualization_param(file_path)
    else:
        print("No file paths provided.")

visualization_param(file_path)

Generates and displays a bar chart visualizing compression benchmark results.

Parameters:

Name Type Description Default
file_path str

Path to JSON file containing metrics like compressed size, compression time, and optionally decompression time.

required
Source code in xcompress/visualization.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def visualization_param(file_path):
    """
    Generates and displays a bar chart visualizing compression benchmark results.

    Args:
        file_path (str): Path to JSON file containing metrics like compressed size,
                                compression time, and optionally decompression time.
    """

    results = read_results_from_file(file_path)

    algorithms = list(set([result["name"] for result in results]))

    # Create traces for compressed size
    compressed_size_trace = go.Bar(
        x=algorithms,
        y=[result["compressed_size"] for result in results],
        name="Compressed Size",
        marker_color="skyblue",
    )

    # Create traces for compression time
    compression_time_trace = go.Bar(
        x=algorithms,
        y=[result["compression_time_ns"] for result in results],
        name="Compression Time",
        marker_color="lightgreen",
    )

    # Create traces for decompression time if available
    decompression_times = [
        result["decompression_time_ns"]
        for result in results
        if "decompression_time_ns" in result
    ]
    if decompression_times:
        decompression_time_trace = go.Bar(
            x=algorithms,
            y=decompression_times,
            name="Decompression Time",
            marker_color="salmon",
        )

    # Create subplots
    fig = go.Figure()

    fig.add_trace(compressed_size_trace)
    fig.add_trace(compression_time_trace)
    if decompression_times:
        fig.add_trace(decompression_time_trace)

    fig.update_layout(
        title="Compression Benchmark Results",
        barmode="group",
        xaxis_title="Algorithms",
        yaxis_title="Values",
        legend_title="Metrics",
        updatemenus=[
            {
                "buttons": [
                    {
                        "label": "Compressed Size",
                        "method": "update",
                        "args": [{"visible": [True, False, False]}],
                    },
                    {
                        "label": "Compression Time",
                        "method": "update",
                        "args": [{"visible": [False, True, False]}],
                    },
                    {
                        "label": "Decompression Time",
                        "method": "update",
                        "args": [{"visible": [False, False, True]}],
                    },
                    {
                        "label": "All",
                        "method": "update",
                        "args": [{"visible": [True, True, bool(decompression_times)]}],
                    },
                ],
                "direction": "down",
                "showactive": True,
            }
        ],
    )

    fig.show()