file upload

import os

import docx2txt  # pip install docx2txt
import fitz  # pip install PyMuPDF
import pandas as pd  # pip install pandas
import streamlit as st
from PIL import Image  # pip install pillow
from streamlit.runtime.uploaded_file_manager import UploadedFile


def get_file_details(file: UploadedFile) -> dict[str, str | int]:
    return {"FileName": file.name, "FileType": file.type, "FileSize": file.size}


@st.cache_data
def load_image(img_file: UploadedFile) -> Image.Image:
    return Image.open(img_file)


def read_pdf(pdf_file: UploadedFile, page_number: int | None = None) -> list[str]:
    with fitz.open(stream=pdf_file.read(), filetype="pdf") as pdf_document:
        pages = len(pdf_document) if page_number is None else page_number
        return [pdf_document.load_page(p).get_text() for p in range(pages)]


def save_uploaded_file(uploaded_file, filename: str):
    path = os.path.join("./temp_dir", filename)
    with open(path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return st.success(f"Saved file in: {path}")


st.title("File Upload Tutorial")

menu = ["Home", "Dataset", "DocumentFiles", "Audio"]
choice = st.sidebar.selectbox("Menu", menu)

if choice == "Home":
    st.subheader("Home")
    image_files = st.file_uploader(
        "Upload Image", type=["png", "jpg", "jpeg"], accept_multiple_files=True
    )
    if image_files is not None:
        for image_file in image_files:
            st.write(get_file_details(image_file))
            st.image(load_image(image_file))
            save_uploaded_file(image_file, image_file.name)
elif choice == "Dataset":
    st.subheader("Dataset")
    data_file = st.file_uploader("Upload CSV", type=["csv"])
    if data_file is not None:
        st.write(get_file_details(data_file))
        df = pd.read_csv(data_file)
        st.dataframe(df)
        edited_df = st.data_editor(df)

        st.download_button(
            label="Download data as CSV",
            data=df.to_csv().encode("utf-8"),
            file_name="large_df.csv",
            mime="text/csv",
        )

        st.download_button(
            label="Download edit_data as CSV",
            data=edited_df.to_csv().encode("utf-8"),
            file_name="large_df.csv",
            mime="text/csv",
        )
elif choice == "DocumentFiles":
    st.subheader("DocumentFiles")
    doc_file = st.file_uploader("Upload Document", type=["pdf", "docx", "txt"])
    file_tpe = {
        "txt": "text/plain",
        "pdf": "application/pdf",
        "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    }
    if doc_file is not None:
        st.write(get_file_details(doc_file))
        if doc_file.type == file_tpe["txt"]:
            text = str(doc_file.read(), encoding="utf-8")
            st.text(text)  # 用 st.write(text) 換變成 markdown
        elif doc_file.type == file_tpe["pdf"]:
            texts = read_pdf(doc_file)
            for i, text in enumerate(texts):
                st.subheader(f"Page {i + 1}")
                st.text(text)
                st.divider()
        elif doc_file.type == file_tpe["docx"]:
            text = docx2txt.process(doc_file)
            st.text(text)
        else:
            st.warning("File type not supported")
elif choice == "Audio":
    st.subheader("Audio")
    audio_file = st.file_uploader("Upload Audio", type=["mp3", "wav", "m4a", "flac"])
    if audio_file is not None:
        st.write(get_file_details(audio_file))
        st.audio(audio_file)

        st.download_button("Download Audio", audio_file, "audio.mp3", "audio/mp3")