This commit is contained in:
Andrew Dinh 2024-06-25 18:49:28 +07:00
parent ee298a70c9
commit 8fdf28033f
3 changed files with 47 additions and 59 deletions

View File

@ -7,21 +7,17 @@ Then, run `python main.py` and navigate to `localhost:8888`.
## Design Considerations and Key Decisions
-
After doing some initial research, I learned there's no standardized format for ECG files.
Therefore, there's nothing special we can do in terms of optimizing our compression algorithm for specific headers, special data structures, etc.
The project would require both a frontend and backend component. The frontend only required a simple web interface to upload the file and display information about the resulting compressed file.
For the backend, I went with Python Flask because it is the Python framework I have had the most experience with.
When a user uploads a file, the backend will save the uploaded file to a specific input folder and immediately compress it into a separate output folder.
Each file would be associated with a unique id (UUIDv4 for it's randomness) to prevent filename clashes.
This application could benefit from multithreading if there was enough load and big enough files as each file can be independently processed, but that is out of scope for this project.
## Compression Algorithm
I chose to go with a simple Huffman Coding algorithm
##
After doing some initial research, I learned there's no 1 standardized format for ECG files.
So there's nothing special we can do in terms of optimizing our compression algorithm for specific headers, special data structures, etc.
So I figured I should treat the entire file with characters being 24 bits (3 bytes) each.
I chose to go with Huffman coding since it was the most common file format.
After compressing, we need information about which code is assigned to which 3 bytes in order to reconstruct the original file.
This requires storing some extra information in the header/front of the compressed file so that we can reliably reconstruct the original file.
Although it isn't explicity stated as part of the requirements, we should ensure that our compressed file can be decompressed into the original file.
This functionality doesn't necessarily need to be available in the Web UI for users to decompress their file.
I chose to go with a simple Huffman Coding algorithm because it is a true and tested algorithm. The only difficult part was deciding whether to make each node 3-bytes or 1-byte, but it was much better to go with 1-byte because it only has 256 possible combinations.

78
main.py
View File

@ -6,7 +6,7 @@ import uuid
import os
import shutil
from flask import Flask, current_app, request, Response, send_file
from flask import Flask, current_app, request, send_file
app = Flask(__name__)
INPUT_DIR = "./input/"
@ -25,12 +25,38 @@ def main():
app.run(host="0.0.0.0", debug=False, port=8888)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
COMPRESSION
FLASK REST API
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@app.route("/", methods=["GET"])
def index():
return current_app.send_static_file('index.html')
@app.route("/api/upload_file", methods=["POST"])
def upload_file():
if 'file' not in request.files:
return "File required", 400
file_id = save_file(request.files['file'])
compression_info = compress(file_id)
response = {
"file_id": file_id,
"original_size": compression_info.input_size,
"compressed_size": compression_info.output_size
}
return response, 200
@app.route("/api/download_file/<file_id>", methods=["GET"])
def download_file(file_id):
return send_file(get_output_path(file_id))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
FILE I/O
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class CompressionInfo():
def __init__(self, file_id: str, input_size: int, output_size: int):
self.file_id = file_id
def __init__(self, input_size: int, output_size: int):
self.input_size = input_size
self.output_size = output_size
@ -40,22 +66,17 @@ def save_file(file) -> str:
file.save(get_input_path(file_id))
return file_id
def compress(file_id: str):
"""Compress file corresponding to FILE_ID"""
def compress(file_id: str) -> CompressionInfo:
"""Compress file corresponding to FILE_ID and return related information"""
input_path = get_input_path(file_id)
output_path = get_output_path(file_id)
compress_file(input_path, output_path)
def get_compression_info(file_id: str) -> CompressionInfo:
"""Retrieve information about size of input and output file for FILE_ID"""
input_path = get_input_path(file_id)
output_path = get_output_path(file_id)
input_size = os.path.getsize(input_path)
output_size = os.path.getsize(output_path)
return CompressionInfo(file_id, input_size, output_size)
return CompressionInfo(input_size, output_size)
def cleanup(file_id: str):
"""Clean up any files created relating to FILE_ID"""
@ -65,41 +86,12 @@ def cleanup(file_id: str):
os.remove(input_path)
os.remove(output_path)
def get_input_path(file_id) -> str:
return f"{INPUT_DIR}/{file_id}"
def get_output_path(file_id) -> str:
return f"{OUTPUT_DIR}/{file_id}"
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
FLASK REST API
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@app.route("/", methods=["GET"])
def index():
return current_app.send_static_file('index.html')
@app.route("/api/accept_file", methods=["POST"])
def accept_file():
if 'file' not in request.files:
return "File required", 400
file_id = save_file(request.files['file'])
compress(file_id)
compression_info = get_compression_info(file_id)
response = {
"file_id": compression_info.file_id,
"original_size": compression_info.input_size,
"compressed_size": compression_info.output_size
}
return response, 200
@app.route("/api/download_file/<file_id>", methods=["GET"])
def download_file(file_id):
return send_file(get_output_path(file_id))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
HUFFMAN CODING
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@ -119,8 +111,8 @@ class TreeNode:
def get_codes(input_file: str) -> dict[str, str]:
"""
Given INPUT_FILE, read its contents and return a dictionary containing a code for each byte
Uses Huffman coding
Given INPUT_FILE, read its contents and return a dictionary containing a
code for each byte. Uses Huffman coding
"""
# Get frequency of every byte in the file
char_count = {}

View File

@ -18,7 +18,7 @@
<h1>Compress Files</h1>
<p>Compress files using Huffman Coding</p>
<form action="/api/accept_file" method="POST" enctype="multipart/form-data">
<form action="/api/upload_file" method="POST" enctype="multipart/form-data">
<div>
<label for="file">Choose file to upload</label>
<input type="file" id="file" name="file" />