From ee298a70c94e52240c209dc844ccaf504a6ec516 Mon Sep 17 00:00:00 2001
From: Andrew Dinh <andrewkdinh@protonmail.com>
Date: Tue, 25 Jun 2024 18:24:27 +0700
Subject: [PATCH] First commit

---
 README.md         |  27 +++++++
 main.py           | 196 ++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt  |   1 +
 static/index.html |  81 +++++++++++++++++++
 4 files changed, 305 insertions(+)
 create mode 100644 README.md
 create mode 100644 main.py
 create mode 100644 requirements.txt
 create mode 100644 static/index.html

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..895214c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,27 @@
+# Compress ECG byte
+
+## Instructions
+
+After downloading the source code, ensure you have Python and Python Flask installed. 
+Then, run `python main.py` and navigate to `localhost:8888`.
+
+## Design Considerations and Key Decisions
+
+- 
+
+## Compression Algorithm 
+
+I chose to go with a simple Huffman Coding algorithm
+
+## 
+
+After doing some initial research, I learned there's no 1 standardized format for ECG files.
+So there's nothing special we can do in terms of optimizing our compression algorithm for specific headers, special data structures, etc.
+So I figured I should treat the entire file with characters being 24 bits (3 bytes) each.
+
+I chose to go with Huffman coding since it was the most common file format.
+After compressing, we need information about which code is assigned to which 3 bytes in order to reconstruct the original file.
+This requires storing some extra information in the header/front of the compressed file so that we can reliably reconstruct the original file.
+
+Although it isn't explicity stated as part of the requirements, we should ensure that our compressed file can be decompressed into the original file.
+This functionality doesn't necessarily need to be available in the Web UI for users to decompress their file.
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..de31f7f
--- /dev/null
+++ b/main.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+
+import heapq
+
+import uuid
+import os
+import shutil
+
+from flask import Flask, current_app, request, Response, send_file
+app = Flask(__name__)
+
+INPUT_DIR = "./input/"
+OUTPUT_DIR = "./output/"
+
+def init():
+    if os.path.exists(INPUT_DIR):
+        shutil.rmtree(INPUT_DIR)
+    if os.path.exists(OUTPUT_DIR):
+        shutil.rmtree(OUTPUT_DIR)
+    os.mkdir(INPUT_DIR)
+    os.mkdir(OUTPUT_DIR)
+
+def main():
+    init()
+    app.run(host="0.0.0.0", debug=False, port=8888)
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+COMPRESSION
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+class CompressionInfo():
+    def __init__(self, file_id: str, input_size: int, output_size: int):
+        self.file_id = file_id
+        self.input_size = input_size
+        self.output_size = output_size
+
+def save_file(file) -> str:
+    """Save FILE and return its corresponding ID"""
+    file_id = uuid.uuid4()
+    file.save(get_input_path(file_id))
+    return file_id
+
+def compress(file_id: str):
+    """Compress file corresponding to FILE_ID"""
+    input_path = get_input_path(file_id)
+    output_path = get_output_path(file_id)
+
+    compress_file(input_path, output_path)
+
+def get_compression_info(file_id: str) -> CompressionInfo:
+    """Retrieve information about size of input and output file for FILE_ID"""
+    input_path = get_input_path(file_id)
+    output_path = get_output_path(file_id)
+
+    input_size = os.path.getsize(input_path)
+    output_size = os.path.getsize(output_path)
+
+    return CompressionInfo(file_id, input_size, output_size)
+
+def cleanup(file_id: str):
+    """Clean up any files created relating to FILE_ID"""
+    input_path = get_input_path(file_id)
+    output_path = get_output_path(file_id)
+
+    os.remove(input_path)
+    os.remove(output_path)
+
+
+def get_input_path(file_id) -> str:
+    return f"{INPUT_DIR}/{file_id}"
+
+def get_output_path(file_id) -> str:
+    return f"{OUTPUT_DIR}/{file_id}"
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+FLASK REST API
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+@app.route("/", methods=["GET"])
+def index():
+    return current_app.send_static_file('index.html')
+
+@app.route("/api/accept_file", methods=["POST"])
+def accept_file():
+    if 'file' not in request.files:
+        return "File required", 400
+
+    file_id = save_file(request.files['file'])
+    compress(file_id)
+    compression_info = get_compression_info(file_id)
+
+    response = {
+        "file_id": compression_info.file_id,
+        "original_size": compression_info.input_size,
+        "compressed_size": compression_info.output_size
+    }
+    return response, 200
+
+@app.route("/api/download_file/<file_id>", methods=["GET"])
+def download_file(file_id):
+    return send_file(get_output_path(file_id))
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+HUFFMAN CODING
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+class TreeNode:
+    def __init__(self, byte, count, left = None, right = None):
+        self.byte = byte
+        self.count = count
+        self.left = left
+        self.right = right
+    
+    def __lt__(self, nxt):
+        return self.count < nxt.count
+
+    def __repr__(self):
+        return f"{self.byte} [{self.left}_{self.right}]"
+
+def get_codes(input_file: str) -> dict[str, str]:
+    """
+    Given INPUT_FILE, read its contents and return a dictionary containing a code for each byte
+    Uses Huffman coding
+    """
+    # Get frequency of every byte in the file
+    char_count = {}
+    for i in range(256):
+        key = i.to_bytes(1, 'big')
+        char_count[key] = 0
+
+    with open(input_file, "rb") as file:
+        byte = file.read(1)
+        while byte != b"":
+            char_count[byte] += 1
+            byte = file.read(1)
+
+    # Create the initial heap
+    char_queue = []
+    for byte, count in char_count.items():
+        heapq.heappush(char_queue, TreeNode(byte, count))
+
+    # Create the tree
+    while len(char_queue) > 1:
+        left = heapq.heappop(char_queue)
+        right = heapq.heappop(char_queue)
+        new_node = TreeNode(None, left.count + right.count, left, right)
+        heapq.heappush(char_queue, new_node)
+
+    codes = {} # {byte: code}
+    queue = [(char_queue[0], '')] # (TreeNode, code)
+    while queue:
+        node, code = queue.pop()
+
+        if not node.left and not node.right:
+            codes[node.byte] = code
+            continue
+
+        if node.left:
+            queue.append((node.left, code + '0'))
+        if node.right:
+            queue.append((node.right, code + '1'))
+    return codes
+
+def to_bytes(data):
+    """
+    Helper function to convert DATA to valid bytes
+    """
+    b = bytearray()
+    for i in range(0, len(data), 8):
+        b.append(int(data[i:i+8], 2))
+    return bytes(b)
+
+def write_compressed_file(input_file: str, output_file: str, codes: dict[str, str]):
+    """
+    Compress contents of INPUT_FILE to OUTPUT_FILE using CODES
+    """
+    with open(input_file, "rb") as input_file:
+        data_str = ""
+        byte = input_file.read(1)
+        while byte != b"":
+            data_str += codes[byte]
+            byte = input_file.read(1)
+        data_bytes = to_bytes(data_str)
+
+    with open(output_file, "wb") as output_file:
+        output_file.write(data_bytes)
+
+def compress_file(input_file: str, output_file: str):
+    """
+    Compress contents of INPUT_FILE and save to OUTPUT_FILE
+    """
+    codes = get_codes(input_file)
+    write_compressed_file(input_file, output_file, codes)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8e2bba2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+Flask~=2.3.3
\ No newline at end of file
diff --git a/static/index.html b/static/index.html
new file mode 100644
index 0000000..0db9837
--- /dev/null
+++ b/static/index.html
@@ -0,0 +1,81 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Compress Files</title>
+
+    <style>
+        /* water.css: https://watercss.kognise.dev/ */
+        :root{--background-body:#202b38;--background:#161f27;--background-alt:#1a242f;--selection:#1c76c5;--text-main:#dbdbdb;--text-bright:#fff;--text-muted:#a9b1ba;--links:#41adff;--focus:rgba(0,150,191,0.67);--border:#526980;--code:#ffbe85;--animation-duration:0.1s;--button-base:#0c151c;--button-hover:#040a0f;--scrollbar-thumb:var(--button-hover);--scrollbar-thumb-hover:#000;--form-placeholder:#a9a9a9;--form-text:#fff;--variable:#d941e2;--highlight:#efdb43;--select-arrow:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' height='63' width='117' fill='%23efefef'%3E%3Cpath d='M115 2c-1-2-4-2-5 0L59 53 7 2a4 4 0 00-5 5l54 54 2 2 3-2 54-54c2-1 2-4 0-5z'/%3E%3C/svg%3E")}html{scrollbar-color:#040a0f #202b38;scrollbar-color:var(--scrollbar-thumb) var(--background-body);scrollbar-width:thin}body{font-family:system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,Segoe UI Emoji,Apple Color Emoji,Noto Color Emoji,sans-serif;line-height:1.4;max-width:800px;margin:20px auto;padding:0 10px;word-wrap:break-word;color:#dbdbdb;color:var(--text-main);background:#202b38;background:var(--background-body);text-rendering:optimizeLegibility}button,input,textarea{transition:background-color .1s linear,border-color .1s linear,color .1s linear,box-shadow .1s linear,transform .1s ease;transition:background-color var(--animation-duration) linear,border-color var(--animation-duration) linear,color var(--animation-duration) linear,box-shadow var(--animation-duration) linear,transform var(--animation-duration) ease}h1{font-size:2.2em;margin-top:0}h1,h2,h3,h4,h5,h6{margin-bottom:12px;margin-top:24px}h1,h2,h3,h4,h5,h6,strong{color:#fff;color:var(--text-bright)}b,h1,h2,h3,h4,h5,h6,strong,th{font-weight:600}q:after,q:before{content:none}blockquote,q{border-left:4px solid rgba(0,150,191,.67);border-left:4px solid var(--focus);margin:1.5em 0;padding:.5em 1em;font-style:italic}blockquote>footer{font-style:normal;border:0}address,blockquote cite{font-style:normal}a[href^=mailto\:]:before{content:"📧 "}a[href^=tel\:]:before{content:"📞 "}a[href^=sms\:]:before{content:"💬 "}mark{background-color:#efdb43;background-color:var(--highlight);border-radius:2px;padding:0 2px;color:#000}a>code,a>strong{color:inherit}button,input[type=button],input[type=checkbox],input[type=radio],input[type=range],input[type=reset],input[type=submit],select{cursor:pointer}input,select{display:block}[type=checkbox],[type=radio]{display:initial}button,input,select,textarea{color:#fff;color:var(--form-text);background-color:#161f27;background-color:var(--background);font-family:inherit;font-size:inherit;margin-right:6px;margin-bottom:6px;padding:10px;border:none;border-radius:6px;outline:none}button,input[type=button],input[type=reset],input[type=submit]{background-color:#0c151c;background-color:var(--button-base);padding-right:30px;padding-left:30px}button:hover,input[type=button]:hover,input[type=reset]:hover,input[type=submit]:hover{background:#040a0f;background:var(--button-hover)}input[type=color]{min-height:2rem;padding:8px;cursor:pointer}input[type=checkbox],input[type=radio]{height:1em;width:1em}input[type=radio]{border-radius:100%}input{vertical-align:top}label{vertical-align:middle;margin-bottom:4px;display:inline-block}button,input:not([type=checkbox]):not([type=radio]),input[type=range],select,textarea{-webkit-appearance:none}textarea{display:block;margin-right:0;box-sizing:border-box;resize:vertical}textarea:not([cols]){width:100%}textarea:not([rows]){min-height:40px;height:140px}select{background:#161f27 url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' height='63' width='117' fill='%23efefef'%3E%3Cpath d='M115 2c-1-2-4-2-5 0L59 53 7 2a4 4 0 00-5 5l54 54 2 2 3-2 54-54c2-1 2-4 0-5z'/%3E%3C/svg%3E") calc(100% - 12px) 50%/12px no-repeat;background:var(--background) var(--select-arrow) calc(100% - 12px) 50%/12px no-repeat;padding-right:35px}select::-ms-expand{display:none}select[multiple]{padding-right:10px;background-image:none;overflow-y:auto}button:focus,input:focus,select:focus,textarea:focus{box-shadow:0 0 0 2px rgba(0,150,191,.67);box-shadow:0 0 0 2px var(--focus)}button:active,input[type=button]:active,input[type=checkbox]:active,input[type=radio]:active,input[type=range]:active,input[type=reset]:active,input[type=submit]:active{transform:translateY(2px)}button:disabled,input:disabled,select:disabled,textarea:disabled{cursor:not-allowed;opacity:.5}::-moz-placeholder{color:#a9a9a9;color:var(--form-placeholder)}:-ms-input-placeholder{color:#a9a9a9;color:var(--form-placeholder)}::-ms-input-placeholder{color:#a9a9a9;color:var(--form-placeholder)}::placeholder{color:#a9a9a9;color:var(--form-placeholder)}fieldset{border:1px solid rgba(0,150,191,.67);border:1px solid var(--focus);border-radius:6px;margin:0 0 12px;padding:10px}legend{font-size:.9em;font-weight:600}input[type=range]{margin:10px 0;padding:10px 0;background:transparent}input[type=range]:focus{outline:none}input[type=range]::-webkit-slider-runnable-track{width:100%;height:9.5px;-webkit-transition:.2s;transition:.2s;background:#161f27;background:var(--background);border-radius:3px}input[type=range]::-webkit-slider-thumb{box-shadow:0 1px 1px #000,0 0 1px #0d0d0d;height:20px;width:20px;border-radius:50%;background:#526980;background:var(--border);-webkit-appearance:none;margin-top:-7px}input[type=range]:focus::-webkit-slider-runnable-track{background:#161f27;background:var(--background)}input[type=range]::-moz-range-track{width:100%;height:9.5px;-moz-transition:.2s;transition:.2s;background:#161f27;background:var(--background);border-radius:3px}input[type=range]::-moz-range-thumb{box-shadow:1px 1px 1px #000,0 0 1px #0d0d0d;height:20px;width:20px;border-radius:50%;background:#526980;background:var(--border)}input[type=range]::-ms-track{width:100%;height:9.5px;background:transparent;border-color:transparent;border-width:16px 0;color:transparent}input[type=range]::-ms-fill-lower,input[type=range]::-ms-fill-upper{background:#161f27;background:var(--background);border:.2px solid #010101;border-radius:3px;box-shadow:1px 1px 1px #000,0 0 1px #0d0d0d}input[type=range]::-ms-thumb{box-shadow:1px 1px 1px #000,0 0 1px #0d0d0d;border:1px solid #000;height:20px;width:20px;border-radius:50%;background:#526980;background:var(--border)}input[type=range]:focus::-ms-fill-lower,input[type=range]:focus::-ms-fill-upper{background:#161f27;background:var(--background)}a{text-decoration:none;color:#41adff;color:var(--links)}a:hover{text-decoration:underline}code,samp,time{background:#161f27;background:var(--background);color:#ffbe85;color:var(--code);padding:2.5px 5px;border-radius:6px;font-size:1em}pre>code{padding:10px;display:block;overflow-x:auto}var{color:#d941e2;color:var(--variable);font-style:normal;font-family:monospace}kbd{background:#161f27;background:var(--background);border:1px solid #526980;border:1px solid var(--border);border-radius:2px;color:#dbdbdb;color:var(--text-main);padding:2px 4px}img,video{max-width:100%;height:auto}hr{border:none;border-top:1px solid #526980;border-top:1px solid var(--border)}table{border-collapse:collapse;margin-bottom:10px;width:100%;table-layout:fixed}table caption,td,th{text-align:left}td,th{padding:6px;vertical-align:top;word-wrap:break-word}thead{border-bottom:1px solid #526980;border-bottom:1px solid var(--border)}tfoot{border-top:1px solid #526980;border-top:1px solid var(--border)}tbody tr:nth-child(2n){background-color:#161f27;background-color:var(--background)}tbody tr:nth-child(2n) button{background-color:#1a242f;background-color:var(--background-alt)}tbody tr:nth-child(2n) button:hover{background-color:#202b38;background-color:var(--background-body)}::-webkit-scrollbar{height:10px;width:10px}::-webkit-scrollbar-track{background:#161f27;background:var(--background);border-radius:6px}::-webkit-scrollbar-thumb{background:#040a0f;background:var(--scrollbar-thumb);border-radius:6px}::-webkit-scrollbar-thumb:hover{background:#000;background:var(--scrollbar-thumb-hover)}::-moz-selection{background-color:#1c76c5;background-color:var(--selection);color:#fff;color:var(--text-bright)}::selection{background-color:#1c76c5;background-color:var(--selection);color:#fff;color:var(--text-bright)}details{display:flex;flex-direction:column;align-items:flex-start;background-color:#1a242f;background-color:var(--background-alt);padding:10px 10px 0;margin:1em 0;border-radius:6px;overflow:hidden}details[open]{padding:10px}details>:last-child{margin-bottom:0}details[open] summary{margin-bottom:10px}summary{display:list-item;background-color:#161f27;background-color:var(--background);padding:10px;margin:-10px -10px 0;cursor:pointer;outline:none}summary:focus,summary:hover{text-decoration:underline}details>:not(summary){margin-top:0}summary::-webkit-details-marker{color:#dbdbdb;color:var(--text-main)}dialog{background-color:#1a242f;background-color:var(--background-alt);color:#dbdbdb;color:var(--text-main);border-radius:6px;border:#526980;border-color:var(--border);padding:10px 30px}dialog>header:first-child{background-color:#161f27;background-color:var(--background);border-radius:6px 6px 0 0;margin:-10px -30px 10px;padding:10px;text-align:center}dialog::-webkit-backdrop{background:rgba(0,0,0,.61);-webkit-backdrop-filter:blur(4px);backdrop-filter:blur(4px)}dialog::backdrop{background:rgba(0,0,0,.61);-webkit-backdrop-filter:blur(4px);backdrop-filter:blur(4px)}footer{border-top:1px solid #526980;border-top:1px solid var(--border);padding-top:10px;color:#a9b1ba;color:var(--text-muted)}body>footer{margin-top:40px}@media print{body,button,code,details,input,pre,summary,textarea{background-color:#fff}button,input,textarea{border:1px solid #000}body,button,code,footer,h1,h2,h3,h4,h5,h6,input,pre,strong,summary,textarea{color:#000}summary::marker{color:#000}summary::-webkit-details-marker{color:#000}tbody tr:nth-child(2n){background-color:#f2f2f2}a{color:#00f;text-decoration:underline}}
+
+        #download-section {
+            display: none;
+        }
+    </style>
+</head>
+<body>
+    <h1>Compress Files</h1>
+    <p>Compress files using Huffman Coding</p>
+
+    <form action="/api/accept_file" method="POST" enctype="multipart/form-data">
+        <div>
+            <label for="file">Choose file to upload</label>
+            <input type="file" id="file" name="file" />
+        </div>
+        <div>
+            <button>Compress</button>
+        </div>
+    </form>
+
+    <div id="download-section">
+        <h3>Original File Size</h3>
+        <p id="original-size"></p>
+        <h3>Compressed File Size</h3>
+        <p id="compressed-size"></p>
+        <h3>Compression Ratio</h3>
+        <p id="ratio"></p>
+        <a id="download" href="#" download>
+            <button>Download compressed file</button>
+        </a>
+    </div>
+
+    <script>
+        "use strict";
+        const form = document.querySelector('form');
+        form.addEventListener('submit', handleSubmit);
+
+        function handleSubmit(event) {
+            const url = new URL(form.action);
+            const formData = new FormData(form);
+
+            /** @type {Parameters<fetch>[1]} */
+            const fetchOptions = {
+                method: form.method,
+                body: formData,
+            };
+
+            // original_file_name = formData.getAll("file")[0].name;
+
+            fetch(url, fetchOptions)
+            .then(response => {
+                if (!response.ok) {
+                    throw new Error("HTTP error " + response.status);
+                }
+                return response.json();
+            })
+            .then(json => {
+                document.getElementById("original-size").innerText = json.original_size + " bytes";
+                document.getElementById("compressed-size").innerText = json.compressed_size + " bytes";
+                document.getElementById("ratio").innerText = (json.compressed_size / json.original_size).toFixed(3);
+                document.getElementById("download").href = "/api/download_file/" + json.file_id;
+                document.getElementById("download-section").style.display = "unset";
+            })
+
+            event.preventDefault();
+        }
+
+    </script>
+
+</body>
+</html>
\ No newline at end of file