diff --git a/README.md b/README.md index 895214c..5547206 100644 --- a/README.md +++ b/README.md @@ -7,21 +7,17 @@ Then, run `python main.py` and navigate to `localhost:8888`. ## Design Considerations and Key Decisions -- +After doing some initial research, I learned there's no standardized format for ECG files. +Therefore, there's nothing special we can do in terms of optimizing our compression algorithm for specific headers, special data structures, etc. + +The project would require both a frontend and backend component. The frontend only required a simple web interface to upload the file and display information about the resulting compressed file. + +For the backend, I went with Python Flask because it is the Python framework I have had the most experience with. +When a user uploads a file, the backend will save the uploaded file to a specific input folder and immediately compress it into a separate output folder. +Each file would be associated with a unique id (UUIDv4 for it's randomness) to prevent filename clashes. + +This application could benefit from multithreading if there was enough load and big enough files as each file can be independently processed, but that is out of scope for this project. ## Compression Algorithm -I chose to go with a simple Huffman Coding algorithm - -## - -After doing some initial research, I learned there's no 1 standardized format for ECG files. -So there's nothing special we can do in terms of optimizing our compression algorithm for specific headers, special data structures, etc. -So I figured I should treat the entire file with characters being 24 bits (3 bytes) each. - -I chose to go with Huffman coding since it was the most common file format. -After compressing, we need information about which code is assigned to which 3 bytes in order to reconstruct the original file. -This requires storing some extra information in the header/front of the compressed file so that we can reliably reconstruct the original file. - -Although it isn't explicity stated as part of the requirements, we should ensure that our compressed file can be decompressed into the original file. -This functionality doesn't necessarily need to be available in the Web UI for users to decompress their file. \ No newline at end of file +I chose to go with a simple Huffman Coding algorithm because it is a true and tested algorithm. The only difficult part was deciding whether to make each node 3-bytes or 1-byte, but it was much better to go with 1-byte because it only has 256 possible combinations. \ No newline at end of file diff --git a/main.py b/main.py index de31f7f..7651899 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import uuid import os import shutil -from flask import Flask, current_app, request, Response, send_file +from flask import Flask, current_app, request, send_file app = Flask(__name__) INPUT_DIR = "./input/" @@ -25,12 +25,38 @@ def main(): app.run(host="0.0.0.0", debug=False, port=8888) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -COMPRESSION +FLASK REST API +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +@app.route("/", methods=["GET"]) +def index(): + return current_app.send_static_file('index.html') + +@app.route("/api/upload_file", methods=["POST"]) +def upload_file(): + if 'file' not in request.files: + return "File required", 400 + + file_id = save_file(request.files['file']) + compression_info = compress(file_id) + + response = { + "file_id": file_id, + "original_size": compression_info.input_size, + "compressed_size": compression_info.output_size + } + return response, 200 + +@app.route("/api/download_file/", methods=["GET"]) +def download_file(file_id): + return send_file(get_output_path(file_id)) + +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +FILE I/O """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" class CompressionInfo(): - def __init__(self, file_id: str, input_size: int, output_size: int): - self.file_id = file_id + def __init__(self, input_size: int, output_size: int): self.input_size = input_size self.output_size = output_size @@ -40,22 +66,17 @@ def save_file(file) -> str: file.save(get_input_path(file_id)) return file_id -def compress(file_id: str): - """Compress file corresponding to FILE_ID""" +def compress(file_id: str) -> CompressionInfo: + """Compress file corresponding to FILE_ID and return related information""" input_path = get_input_path(file_id) output_path = get_output_path(file_id) compress_file(input_path, output_path) -def get_compression_info(file_id: str) -> CompressionInfo: - """Retrieve information about size of input and output file for FILE_ID""" - input_path = get_input_path(file_id) - output_path = get_output_path(file_id) - input_size = os.path.getsize(input_path) output_size = os.path.getsize(output_path) - return CompressionInfo(file_id, input_size, output_size) + return CompressionInfo(input_size, output_size) def cleanup(file_id: str): """Clean up any files created relating to FILE_ID""" @@ -65,41 +86,12 @@ def cleanup(file_id: str): os.remove(input_path) os.remove(output_path) - def get_input_path(file_id) -> str: return f"{INPUT_DIR}/{file_id}" def get_output_path(file_id) -> str: return f"{OUTPUT_DIR}/{file_id}" -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -FLASK REST API -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" - -@app.route("/", methods=["GET"]) -def index(): - return current_app.send_static_file('index.html') - -@app.route("/api/accept_file", methods=["POST"]) -def accept_file(): - if 'file' not in request.files: - return "File required", 400 - - file_id = save_file(request.files['file']) - compress(file_id) - compression_info = get_compression_info(file_id) - - response = { - "file_id": compression_info.file_id, - "original_size": compression_info.input_size, - "compressed_size": compression_info.output_size - } - return response, 200 - -@app.route("/api/download_file/", methods=["GET"]) -def download_file(file_id): - return send_file(get_output_path(file_id)) - """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" HUFFMAN CODING """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -119,8 +111,8 @@ class TreeNode: def get_codes(input_file: str) -> dict[str, str]: """ - Given INPUT_FILE, read its contents and return a dictionary containing a code for each byte - Uses Huffman coding + Given INPUT_FILE, read its contents and return a dictionary containing a + code for each byte. Uses Huffman coding """ # Get frequency of every byte in the file char_count = {} diff --git a/static/index.html b/static/index.html index 0db9837..cd5d641 100644 --- a/static/index.html +++ b/static/index.html @@ -18,7 +18,7 @@

Compress Files

Compress files using Huffman Coding

-
+