From ee298a70c94e52240c209dc844ccaf504a6ec516 Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Tue, 25 Jun 2024 18:24:27 +0700 Subject: [PATCH] First commit --- README.md | 27 +++++++ main.py | 196 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + static/index.html | 81 +++++++++++++++++++ 4 files changed, 305 insertions(+) create mode 100644 README.md create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 static/index.html diff --git a/README.md b/README.md new file mode 100644 index 0000000..895214c --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# Compress ECG byte + +## Instructions + +After downloading the source code, ensure you have Python and Python Flask installed. +Then, run `python main.py` and navigate to `localhost:8888`. + +## Design Considerations and Key Decisions + +- + +## Compression Algorithm + +I chose to go with a simple Huffman Coding algorithm + +## + +After doing some initial research, I learned there's no 1 standardized format for ECG files. +So there's nothing special we can do in terms of optimizing our compression algorithm for specific headers, special data structures, etc. +So I figured I should treat the entire file with characters being 24 bits (3 bytes) each. + +I chose to go with Huffman coding since it was the most common file format. +After compressing, we need information about which code is assigned to which 3 bytes in order to reconstruct the original file. +This requires storing some extra information in the header/front of the compressed file so that we can reliably reconstruct the original file. + +Although it isn't explicity stated as part of the requirements, we should ensure that our compressed file can be decompressed into the original file. +This functionality doesn't necessarily need to be available in the Web UI for users to decompress their file. \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..de31f7f --- /dev/null +++ b/main.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +import heapq + +import uuid +import os +import shutil + +from flask import Flask, current_app, request, Response, send_file +app = Flask(__name__) + +INPUT_DIR = "./input/" +OUTPUT_DIR = "./output/" + +def init(): + if os.path.exists(INPUT_DIR): + shutil.rmtree(INPUT_DIR) + if os.path.exists(OUTPUT_DIR): + shutil.rmtree(OUTPUT_DIR) + os.mkdir(INPUT_DIR) + os.mkdir(OUTPUT_DIR) + +def main(): + init() + app.run(host="0.0.0.0", debug=False, port=8888) + +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +COMPRESSION +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +class CompressionInfo(): + def __init__(self, file_id: str, input_size: int, output_size: int): + self.file_id = file_id + self.input_size = input_size + self.output_size = output_size + +def save_file(file) -> str: + """Save FILE and return its corresponding ID""" + file_id = uuid.uuid4() + file.save(get_input_path(file_id)) + return file_id + +def compress(file_id: str): + """Compress file corresponding to FILE_ID""" + input_path = get_input_path(file_id) + output_path = get_output_path(file_id) + + compress_file(input_path, output_path) + +def get_compression_info(file_id: str) -> CompressionInfo: + """Retrieve information about size of input and output file for FILE_ID""" + input_path = get_input_path(file_id) + output_path = get_output_path(file_id) + + input_size = os.path.getsize(input_path) + output_size = os.path.getsize(output_path) + + return CompressionInfo(file_id, input_size, output_size) + +def cleanup(file_id: str): + """Clean up any files created relating to FILE_ID""" + input_path = get_input_path(file_id) + output_path = get_output_path(file_id) + + os.remove(input_path) + os.remove(output_path) + + +def get_input_path(file_id) -> str: + return f"{INPUT_DIR}/{file_id}" + +def get_output_path(file_id) -> str: + return f"{OUTPUT_DIR}/{file_id}" + +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +FLASK REST API +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +@app.route("/", methods=["GET"]) +def index(): + return current_app.send_static_file('index.html') + +@app.route("/api/accept_file", methods=["POST"]) +def accept_file(): + if 'file' not in request.files: + return "File required", 400 + + file_id = save_file(request.files['file']) + compress(file_id) + compression_info = get_compression_info(file_id) + + response = { + "file_id": compression_info.file_id, + "original_size": compression_info.input_size, + "compressed_size": compression_info.output_size + } + return response, 200 + +@app.route("/api/download_file/", methods=["GET"]) +def download_file(file_id): + return send_file(get_output_path(file_id)) + +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +HUFFMAN CODING +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +class TreeNode: + def __init__(self, byte, count, left = None, right = None): + self.byte = byte + self.count = count + self.left = left + self.right = right + + def __lt__(self, nxt): + return self.count < nxt.count + + def __repr__(self): + return f"{self.byte} [{self.left}_{self.right}]" + +def get_codes(input_file: str) -> dict[str, str]: + """ + Given INPUT_FILE, read its contents and return a dictionary containing a code for each byte + Uses Huffman coding + """ + # Get frequency of every byte in the file + char_count = {} + for i in range(256): + key = i.to_bytes(1, 'big') + char_count[key] = 0 + + with open(input_file, "rb") as file: + byte = file.read(1) + while byte != b"": + char_count[byte] += 1 + byte = file.read(1) + + # Create the initial heap + char_queue = [] + for byte, count in char_count.items(): + heapq.heappush(char_queue, TreeNode(byte, count)) + + # Create the tree + while len(char_queue) > 1: + left = heapq.heappop(char_queue) + right = heapq.heappop(char_queue) + new_node = TreeNode(None, left.count + right.count, left, right) + heapq.heappush(char_queue, new_node) + + codes = {} # {byte: code} + queue = [(char_queue[0], '')] # (TreeNode, code) + while queue: + node, code = queue.pop() + + if not node.left and not node.right: + codes[node.byte] = code + continue + + if node.left: + queue.append((node.left, code + '0')) + if node.right: + queue.append((node.right, code + '1')) + return codes + +def to_bytes(data): + """ + Helper function to convert DATA to valid bytes + """ + b = bytearray() + for i in range(0, len(data), 8): + b.append(int(data[i:i+8], 2)) + return bytes(b) + +def write_compressed_file(input_file: str, output_file: str, codes: dict[str, str]): + """ + Compress contents of INPUT_FILE to OUTPUT_FILE using CODES + """ + with open(input_file, "rb") as input_file: + data_str = "" + byte = input_file.read(1) + while byte != b"": + data_str += codes[byte] + byte = input_file.read(1) + data_bytes = to_bytes(data_str) + + with open(output_file, "wb") as output_file: + output_file.write(data_bytes) + +def compress_file(input_file: str, output_file: str): + """ + Compress contents of INPUT_FILE and save to OUTPUT_FILE + """ + codes = get_codes(input_file) + write_compressed_file(input_file, output_file, codes) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8e2bba2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +Flask~=2.3.3 \ No newline at end of file diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000..0db9837 --- /dev/null +++ b/static/index.html @@ -0,0 +1,81 @@ + + + + + + Compress Files + + + + +

Compress Files

+

Compress files using Huffman Coding

+ +
+
+ + +
+
+ +
+
+ +
+

Original File Size

+

+

Compressed File Size

+

+

Compression Ratio

+

+ + + +
+ + + + + \ No newline at end of file