Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

Commit

Permalink
feat: initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jamilraichouni committed Nov 16, 2024
0 parents commit 09fda4d
Show file tree
Hide file tree
Showing 5 changed files with 318 additions and 0 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/build-and-publish-hkt-day-lists.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
on:
schedule:
# Explanation of the Cron Expression:
# `*/5`: Every 5 minutes.
# `6-8`: From 06:00 AM to 08:59 AM.
# `*`: Every day of the month.
# `*`: Every month.
# `1-5`: Monday to Friday.
- cron: '*/5 6-8 * * 1-5'
- cron: '*/5 0-23 16-18 11 1-7'
pull_request:
branches: [main]
push:
branches: [main]
jobs:
build-and-publish-hkt-day-lists:
name: Setup, build, and publish HKT day lists
runs-on: ubuntu-latest
steps:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Checkout code
uses: actions/checkout@v4
- name: Install Python packages
run: pip install -r requirements.txt
- name: Install wkhtmltopdf
run: |
sudo apt-get update
sudo apt-get install wkhtmltopdf
- name: Build HKT day lists and upload them to Google Drive
env:
SERVICE_ACCOUNT_JSON_BASE64: ${{ secrets.SERVICE_ACCOUNT_JSON_BASE64 }}
id: build-hkt-day-lists
run: |
echo "$SERVICE_ACCOUNT_JSON_BASE64" | base64 --decode > /tmp/elternvertretung-b7713037bac6.json
python build-hkt-day-lists.py
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Home Kita Tage (HKT)

## Preconditions

### Google account

There is a Google account for the parent representatives. The login user name
is the email address of the parent representatives and the password shall be
the same as for the Microsoft (email) account given by the leadership of the
kindergarten.

### Input Excel file stored on Google Drive

Home Kita days are to be named by parents and need to be stored in an Excel
file on Google Drive at <https://drive.google.com/drive/u/3/my-drive>.

The Excel file is named `HomeKitaTage.xlsx` and must be located at top level
in the Google Drive.

The first sheet in the Excel file must come with the following columns:

- `Name`
- `Group`
- `Monday\nmorning`
- `Monday\nafternoon`
- `Tuesday\nmorning`
- `Tuesday\nafternoon`
- `Wednesday\nmorning`
- `Wednesday\nafternoon`
- `Thursday\nmorning`
- `Thursday\nafternoon`
- `Friday\nmorning`
- `Friday\nafternoon`
- `Comments`

Above, the characters `\n` are to be understood as a newline character.

The column `Name` contains the name of the child, `Group` the group to which
the child belongs to in the kindergarten. The entries in the day columns shall
be empty or filled with a number `1` meaning that the child is absent (at home)
at this day and time. The digit `1` has been chosen to be able to sum up the
number of children that are absent at a given day and time to see, if the
absence times are equally distributed.
221 changes: 221 additions & 0 deletions build-hkt-day-lists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import io
import pathlib
import typing as t

import google.oauth2.credentials
import google.oauth2.service_account
import googleapiclient.errors # type: ignore
import googleapiclient.http # type: ignore
import pandas as pd
import pdfkit # type: ignore
from docx import Document
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.shared import Inches
from googleapiclient.discovery import build # type: ignore

HKT_FILE_NAME = "HomeKitaTage.xlsx"
HKT_FILE_PATH = pathlib.Path(f"/tmp/{HKT_FILE_NAME}")
SCOPES = ["https://www.googleapis.com/auth/drive"]
SERVICE_ACCOUNT_FILE = pathlib.Path("/tmp/elternvertretung-b7713037bac6.json")
if not SERVICE_ACCOUNT_FILE.is_file():
SERVICE_ACCOUNT_FILE = pathlib.Path(
"/data/elternvertretung-b7713037bac6.json"
)
CREDENTIALS = (
google.oauth2.service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES
)
)


def dataframe_to_word(df, docx_file_path):
document = Document()

# Set custom margins (e.g., 0.5 inches for top and bottom)
sections = document.sections
for section in sections:
section.top_margin = Inches(0.5)
section.bottom_margin = Inches(0.5)

# Add a table with borders
table = document.add_table(rows=1, cols=len(df.columns))
table.style = "Table Grid" # Use a built-in style with borders

# Add header row
hdr_cells = table.rows[0].cells
for i, column in enumerate(df.columns):
hdr_cells[i].text = str(column)

# Add data rows
for index, row in df.iterrows():
row_cells = table.add_row().cells
for i, value in enumerate(row):
row_cells[i].text = str(value)

# Apply borders to each cell (if needed)
for row in table.rows:
for cell in row.cells:
cell._element.get_or_add_tcPr().append(
parse_xml(r'<w:shd {} w:fill="FFFFFF"/>'.format(nsdecls("w")))
)
cell._element.get_or_add_tcPr().append(
parse_xml(
(
r'<w:tcBorders %s><w:top w:val="single" w:sz="4"/>'
r'<w:left w:val="single" w:sz="4"/>'
r'<w:bottom w:val="single" w:sz="4"/>'
r'<w:right w:val="single" w:sz="4"/>'
r"</w:tcBorders>"
)
% nsdecls("w")
)
)

document.save(docx_file_path)


def export_excel_file_from_google_drive(filename) -> None:
HKT_FILE_PATH.unlink(missing_ok=True)
file: t.Optional[io.BytesIO] = None
try:
service = build("drive", "v3", credentials=CREDENTIALS)
results = (
service.files()
.list(fields="nextPageToken, files(id, name)")
.execute()
)
items = results.get("files", [])

if not items:
print("No files found.")
for item in items:
if item["name"] != filename:
continue
file_id = item["id"]
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = googleapiclient.http.MediaIoBaseDownload(
file, request
)
done = False
while done is False:
status, done = downloader.next_chunk()
HKT_FILE_PATH.write_bytes(file.getvalue())
if not HKT_FILE_PATH.is_file():
print(f"File {filename} not found.")
except googleapiclient.errors.HttpError as error:
print(f"An error occurred: {error}")


def upload_overview_files_to_google_drive(
daily_overview_file_paths: list[pathlib.Path],
) -> None:
try:
service = build("drive", "v3", credentials=CREDENTIALS)
existing_files = (
service.files()
.list(fields="nextPageToken, files(id, name)")
.execute()
).get("files", [])
for file_path in daily_overview_file_paths:
if existing_files:
for existing_file in existing_files:
if existing_file["name"] == file_path.stem:
service.files().delete(
fileId=existing_file["id"]
).execute()
existing_files.remove(existing_file)
break
file_metadata = {
"name": file_path.stem,
"parents": ["19PV7rVVVA1uPS-LIDU5bdHGYvz3lODXB"],
}
media = googleapiclient.http.MediaFileUpload(
file_path, chunksize=-1
)
file = (
service.files()
.create(
body=file_metadata,
media_body=media,
fields="id,name,webViewLink",
)
.execute()
)
print(f"Uploaded {file.get('name')} to {file.get('webViewLink')}.")
# print(f'Link: {file.get("webViewLink")}')
# permission = {
# "type": "user",
# "role": "writer",
# "emailAddress": "[email protected]"
# }
# # https://developers.google.com/drive/api/reference/rest/v3/permissions/create
# service.permissions().create(
# fileId=file.get("id"),
# body=permission,
# transferOwnership=False,
# sendNotificationEmail=False,
# supportsAllDrives=True,
# # moveToNewOwnersRoot=True,
# ).execute()
except googleapiclient.errors.HttpError as error:
print(f"An error occurred: {error}")


def generate_daily_overview_files() -> list[pathlib.Path]:
daily_overview_file_paths: list[pathlib.Path] = []
df = pd.read_excel(HKT_FILE_PATH, sheet_name="HKT Erfassung")
for group_name, group_df in df.groupby("Group"):
for no, day in enumerate(
(
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
),
start=1,
):
day_df = group_df[
(group_df[f"{day}\nmorning"] == 1.0)
| (group_df[f"{day}\nafternoon"] == 1.0)
]
day_df = day_df.replace(1.0, "Stay at home")
html_file_path = pathlib.Path(f"/tmp/{group_name}_{no}_{day}.html")
df = day_df[
[
"Name",
"Group",
f"{day}\nmorning",
f"{day}\nafternoon",
]
].fillna("")
df.to_html(html_file_path, index=False)
pdf_file_path = html_file_path.with_suffix(".pdf")
options = {"encoding": "UTF-8", "user-style-sheet": "style.css"}
pdfkit.from_file(
input=str(html_file_path),
output_path=str(pdf_file_path),
options=options,
verbose=False,
)
docx_file_path = html_file_path.with_suffix(".docx")
dataframe_to_word(df, docx_file_path)
# html_file_path.unlink(missing_ok=True)
for file_path in (
pdf_file_path,
docx_file_path,
):
if file_path.is_file():
daily_overview_file_paths.append(file_path)
return daily_overview_file_paths


if __name__ == "__main__":
export_excel_file_from_google_drive(filename=HKT_FILE_NAME)
if not HKT_FILE_PATH.is_file():
print(f"File {HKT_FILE_NAME} not found.")
raise SystemExit(1)
daily_overview_file_paths = generate_daily_overview_files()
upload_overview_files_to_google_drive(daily_overview_file_paths)
8 changes: 8 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
google-api-python-client
google-auth-httplib2
google-auth-oauthlib
openpyxl
pandas
pandas-stubs
pdfkit # Needs system pkg wkhtmltopdf
python-docx
8 changes: 8 additions & 0 deletions style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
body {
font-family: 'Noto Emoji', sans-serif;
}
table {
border-collapse: collapse;
width: 100%;
}

0 comments on commit 09fda4d

Please sign in to comment.