diff --git a/.github/workflows/build-and-publish-hkt-day-lists.yml b/.github/workflows/build-and-publish-hkt-day-lists.yml new file mode 100644 index 0000000..e7343eb --- /dev/null +++ b/.github/workflows/build-and-publish-hkt-day-lists.yml @@ -0,0 +1,38 @@ +on: + schedule: + # Explanation of the Cron Expression: + # `*/5`: Every 5 minutes. + # `6-8`: From 06:00 AM to 08:59 AM. + # `*`: Every day of the month. + # `*`: Every month. + # `1-5`: Monday to Friday. + - cron: '*/5 6-8 * * 1-5' + - cron: '*/5 0-23 16-18 11 1-7' + pull_request: + branches: [main] + push: + branches: [main] +jobs: + build-and-publish-hkt-day-lists: + name: Setup, build, and publish HKT day lists + runs-on: ubuntu-latest + steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Checkout code + uses: actions/checkout@v4 + - name: Install Python packages + run: pip install -r requirements.txt + - name: Install wkhtmltopdf + run: | + sudo apt-get update + sudo apt-get install wkhtmltopdf + - name: Build HKT day lists and upload them to Google Drive + env: + SERVICE_ACCOUNT_JSON_BASE64: ${{ secrets.SERVICE_ACCOUNT_JSON_BASE64 }} + id: build-hkt-day-lists + run: | + echo "$SERVICE_ACCOUNT_JSON_BASE64" | base64 --decode > /tmp/elternvertretung-b7713037bac6.json + python build-hkt-day-lists.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..332a559 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# Home Kita Tage (HKT) + +## Preconditions + +### Google account + +There is a Google account for the parent representatives. The login user name +is the email address of the parent representatives and the password shall be +the same as for the Microsoft (email) account given by the leadership of the +kindergarten. + +### Input Excel file stored on Google Drive + +Home Kita days are to be named by parents and need to be stored in an Excel +file on Google Drive at . + +The Excel file is named `HomeKitaTage.xlsx` and must be located at top level +in the Google Drive. + +The first sheet in the Excel file must come with the following columns: + +- `Name` +- `Group` +- `Monday\nmorning` +- `Monday\nafternoon` +- `Tuesday\nmorning` +- `Tuesday\nafternoon` +- `Wednesday\nmorning` +- `Wednesday\nafternoon` +- `Thursday\nmorning` +- `Thursday\nafternoon` +- `Friday\nmorning` +- `Friday\nafternoon` +- `Comments` + +Above, the characters `\n` are to be understood as a newline character. + +The column `Name` contains the name of the child, `Group` the group to which +the child belongs to in the kindergarten. The entries in the day columns shall +be empty or filled with a number `1` meaning that the child is absent (at home) +at this day and time. The digit `1` has been chosen to be able to sum up the +number of children that are absent at a given day and time to see, if the +absence times are equally distributed. diff --git a/build-hkt-day-lists.py b/build-hkt-day-lists.py new file mode 100644 index 0000000..e71dd13 --- /dev/null +++ b/build-hkt-day-lists.py @@ -0,0 +1,221 @@ +import io +import pathlib +import typing as t + +import google.oauth2.credentials +import google.oauth2.service_account +import googleapiclient.errors # type: ignore +import googleapiclient.http # type: ignore +import pandas as pd +import pdfkit # type: ignore +from docx import Document +from docx.oxml import parse_xml +from docx.oxml.ns import nsdecls +from docx.shared import Inches +from googleapiclient.discovery import build # type: ignore + +HKT_FILE_NAME = "HomeKitaTage.xlsx" +HKT_FILE_PATH = pathlib.Path(f"/tmp/{HKT_FILE_NAME}") +SCOPES = ["https://www.googleapis.com/auth/drive"] +SERVICE_ACCOUNT_FILE = pathlib.Path("/tmp/elternvertretung-b7713037bac6.json") +if not SERVICE_ACCOUNT_FILE.is_file(): + SERVICE_ACCOUNT_FILE = pathlib.Path( + "/data/elternvertretung-b7713037bac6.json" + ) +CREDENTIALS = ( + google.oauth2.service_account.Credentials.from_service_account_file( + SERVICE_ACCOUNT_FILE, scopes=SCOPES + ) +) + + +def dataframe_to_word(df, docx_file_path): + document = Document() + + # Set custom margins (e.g., 0.5 inches for top and bottom) + sections = document.sections + for section in sections: + section.top_margin = Inches(0.5) + section.bottom_margin = Inches(0.5) + + # Add a table with borders + table = document.add_table(rows=1, cols=len(df.columns)) + table.style = "Table Grid" # Use a built-in style with borders + + # Add header row + hdr_cells = table.rows[0].cells + for i, column in enumerate(df.columns): + hdr_cells[i].text = str(column) + + # Add data rows + for index, row in df.iterrows(): + row_cells = table.add_row().cells + for i, value in enumerate(row): + row_cells[i].text = str(value) + + # Apply borders to each cell (if needed) + for row in table.rows: + for cell in row.cells: + cell._element.get_or_add_tcPr().append( + parse_xml(r''.format(nsdecls("w"))) + ) + cell._element.get_or_add_tcPr().append( + parse_xml( + ( + r'' + r'' + r'' + r'' + r"" + ) + % nsdecls("w") + ) + ) + + document.save(docx_file_path) + + +def export_excel_file_from_google_drive(filename) -> None: + HKT_FILE_PATH.unlink(missing_ok=True) + file: t.Optional[io.BytesIO] = None + try: + service = build("drive", "v3", credentials=CREDENTIALS) + results = ( + service.files() + .list(fields="nextPageToken, files(id, name)") + .execute() + ) + items = results.get("files", []) + + if not items: + print("No files found.") + for item in items: + if item["name"] != filename: + continue + file_id = item["id"] + request = service.files().get_media(fileId=file_id) + file = io.BytesIO() + downloader = googleapiclient.http.MediaIoBaseDownload( + file, request + ) + done = False + while done is False: + status, done = downloader.next_chunk() + HKT_FILE_PATH.write_bytes(file.getvalue()) + if not HKT_FILE_PATH.is_file(): + print(f"File {filename} not found.") + except googleapiclient.errors.HttpError as error: + print(f"An error occurred: {error}") + + +def upload_overview_files_to_google_drive( + daily_overview_file_paths: list[pathlib.Path], +) -> None: + try: + service = build("drive", "v3", credentials=CREDENTIALS) + existing_files = ( + service.files() + .list(fields="nextPageToken, files(id, name)") + .execute() + ).get("files", []) + for file_path in daily_overview_file_paths: + if existing_files: + for existing_file in existing_files: + if existing_file["name"] == file_path.stem: + service.files().delete( + fileId=existing_file["id"] + ).execute() + existing_files.remove(existing_file) + break + file_metadata = { + "name": file_path.stem, + "parents": ["19PV7rVVVA1uPS-LIDU5bdHGYvz3lODXB"], + } + media = googleapiclient.http.MediaFileUpload( + file_path, chunksize=-1 + ) + file = ( + service.files() + .create( + body=file_metadata, + media_body=media, + fields="id,name,webViewLink", + ) + .execute() + ) + print(f"Uploaded {file.get('name')} to {file.get('webViewLink')}.") + # print(f'Link: {file.get("webViewLink")}') + # permission = { + # "type": "user", + # "role": "writer", + # "emailAddress": "elternvertretung@bluetezeit-berlin.de" + # } + # # https://developers.google.com/drive/api/reference/rest/v3/permissions/create + # service.permissions().create( + # fileId=file.get("id"), + # body=permission, + # transferOwnership=False, + # sendNotificationEmail=False, + # supportsAllDrives=True, + # # moveToNewOwnersRoot=True, + # ).execute() + except googleapiclient.errors.HttpError as error: + print(f"An error occurred: {error}") + + +def generate_daily_overview_files() -> list[pathlib.Path]: + daily_overview_file_paths: list[pathlib.Path] = [] + df = pd.read_excel(HKT_FILE_PATH, sheet_name="HKT Erfassung") + for group_name, group_df in df.groupby("Group"): + for no, day in enumerate( + ( + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + ), + start=1, + ): + day_df = group_df[ + (group_df[f"{day}\nmorning"] == 1.0) + | (group_df[f"{day}\nafternoon"] == 1.0) + ] + day_df = day_df.replace(1.0, "Stay at home") + html_file_path = pathlib.Path(f"/tmp/{group_name}_{no}_{day}.html") + df = day_df[ + [ + "Name", + "Group", + f"{day}\nmorning", + f"{day}\nafternoon", + ] + ].fillna("") + df.to_html(html_file_path, index=False) + pdf_file_path = html_file_path.with_suffix(".pdf") + options = {"encoding": "UTF-8", "user-style-sheet": "style.css"} + pdfkit.from_file( + input=str(html_file_path), + output_path=str(pdf_file_path), + options=options, + verbose=False, + ) + docx_file_path = html_file_path.with_suffix(".docx") + dataframe_to_word(df, docx_file_path) + # html_file_path.unlink(missing_ok=True) + for file_path in ( + pdf_file_path, + docx_file_path, + ): + if file_path.is_file(): + daily_overview_file_paths.append(file_path) + return daily_overview_file_paths + + +if __name__ == "__main__": + export_excel_file_from_google_drive(filename=HKT_FILE_NAME) + if not HKT_FILE_PATH.is_file(): + print(f"File {HKT_FILE_NAME} not found.") + raise SystemExit(1) + daily_overview_file_paths = generate_daily_overview_files() + upload_overview_files_to_google_drive(daily_overview_file_paths) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ffcc36e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +google-api-python-client +google-auth-httplib2 +google-auth-oauthlib +openpyxl +pandas +pandas-stubs +pdfkit # Needs system pkg wkhtmltopdf +python-docx diff --git a/style.css b/style.css new file mode 100644 index 0000000..e7da2f0 --- /dev/null +++ b/style.css @@ -0,0 +1,8 @@ +body { + font-family: 'Noto Emoji', sans-serif; +} +table { + border-collapse: collapse; + width: 100%; +} +