sim-manajement-book/scripts/generate-marp-slides.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Generate Marp slides (Gaia theme, invert) for all 18 chapters of:
  "Sistem Informasi Manajemen di Era AI: Perspektif Strategis dan Pengambilan Keputusan"
  Helmi Bahar Alim, S.Kom., M.Kom.

Output: slides/marp/slide-bab-NN.md (one file per chapter)
Run   : python scripts/generate-marp-slides.py
"""

import re
import os
from pathlib import Path

# ─── Paths ────────────────────────────────────────────────────────────────────
ROOT       = Path(__file__).parent.parent
CHAPTERS   = ROOT / "chapters"
OUT_DIR    = ROOT / "slides" / "marp"
OUT_DIR.mkdir(parents=True, exist_ok=True)

AUTHOR      = "Helmi Bahar Alim, S.Kom., M.Kom."
INSTITUTION = "Universitas Putra Bangsa — Kebumen"

# ─── Extractors ──────────────────────────────────────────────────────────────

def get_text(content: str) -> str:
    """Strip all markdown formatting for plain text usage."""
    txt = re.sub(r'\*+', '', content)
    txt = re.sub(r'`+[^`]*`+', lambda m: m.group(0).strip('`'), txt)
    return txt.strip()


def extract_title_bagian(content: str):
    """Return (bab_num, title, bagian, reader_outcome, level)."""
    # Strip BOM and search multiline for H1
    m = re.search(r'^[\ufeff]*# BAB (\d+)\s+[—\-]{1,3}\s+(.+)', content, re.MULTILINE)
    bab_num = int(m.group(1)) if m else 0
    title   = m.group(2).strip() if m else "?"

    bagian_m  = re.search(r'Bagian\s*:\s*(.+)', content)
    bagian    = bagian_m.group(1).strip() if bagian_m else ""

    # reader outcome may span multiple lines inside the code‑block
    ro_m = re.search(
        r'Reader Outcome\s*:\s*(.+?)(?=Level\s*:|\`\`\`)',
        content, re.DOTALL
    )
    if ro_m:
        ro = re.sub(r'\s+', ' ', ro_m.group(1)).strip().rstrip(',.')
    else:
        ro = ""

    level_m = re.search(r'Level\s*:\s*(.+)', content)
    level   = level_m.group(1).strip() if level_m else "Menengah"

    return bab_num, title, bagian, ro, level


def extract_opening(content: str, bab_num: int):
    """Return (hook_sentence, pemantik_question)."""
    header = f'## {bab_num}.1 Pembuka'
    next_h = f'## {bab_num}.2'
    m = re.search(
        re.escape(header) + r'\n(.+?)(?=' + re.escape(next_h) + ')',
        content, re.DOTALL
    )
    if not m:
        return ("", "")
    body = m.group(1).strip()
    # split into paragraphs (double newline separated)
    paras = [p.strip() for p in re.split(r'\n{2,}', body) if p.strip()]
    # First non-empty, non-'---' paragraph is the hook
    hook = ""
    for p in paras:
        if p != '---' and not p.startswith('#') and len(p) > 30:
            hook = p
            break
    # Last paragraph is usually the pemantik question
    pemantik = ""
    for p in reversed(paras):
        if '?' in p and len(p) > 20 and not p.startswith('---'):
            pemantik = p
            break
    # Trim hook to end at a sentence boundary (max 350 chars)
    if len(hook) > 350:
        hook = hook[:350]
        last_dot = max(hook.rfind('. '), hook.rfind('! '), hook.rfind('? '))
        if last_dot > 150:
            hook = hook[:last_dot+1]
    hook     = hook.rstrip()
    pemantik = pemantik[:300].rstrip()
    return hook, pemantik


def extract_mermaid(content: str, bab_num: int):
    """Extract the first mermaid code block (Gambar N.1 — Signature Model)."""
    m = re.search(r'```mermaid\n(.+?)```', content, re.DOTALL)
    if m:
        return m.group(1).strip()
    return ""


def extract_gambar_caption(content: str, gambar_label: str):
    """Find **Gambar N.1 — ...** caption."""
    m = re.search(
        re.escape(f'**{gambar_label}') + r' — (.+?)\*\*',
        content
    )
    return m.group(1).strip() if m else ""


def extract_definisi(content: str, bab_num: int):
    """Return list of (term, first_sentence_of_def, manajerial_relevance) up to 3."""
    section_m = re.search(
        rf'## {bab_num}\.3 Definisi Kunci\n(.+?)(?=\n## {bab_num}\.\d)',
        content, re.DOTALL
    )
    if not section_m:
        return []
    body = section_m.group(1)
    # Each definition starts with **Term (*English*)** or **Term**
    entries = re.split(r'\n---\n', body)
    results = []
    for entry in entries:
        term_m = re.match(r'\*\*(.+?)\*\*', entry.strip())
        if not term_m:
            continue
        term = re.sub(r'\(.+?\)', '', term_m.group(1)).strip()
        # Definition text: first sentence (up to first .)
        def_text_m = re.search(r'\*\*\n(.+?)(?=\*\*Relevansi|\Z)', entry, re.DOTALL)
        if not def_text_m:
            # try alt: text right after term line
            lines = entry.strip().split('\n')
            def_text = ' '.join(l.strip() for l in lines[1:4] if l.strip() and not l.startswith('**'))
        else:
            def_text = def_text_m.group(1).strip().split('\n')[0]
        rel_m = re.search(r'\*\*Relevansi manajerial:\*\*\s*(.+)', entry)
        relevance = rel_m.group(1).strip()[:160] if rel_m else ""
        results.append((
            term.strip('* \n'),
            re.sub(r'\s+', ' ', def_text)[:150],
            re.sub(r'\s+', ' ', relevance)
        ))
        if len(results) >= 3:
            break
    return results


def extract_konsep_subsections(content: str, bab_num: int):
    """Return list of (number, title) for all 3.4.x subsections."""
    pattern = rf'### {bab_num}\.4\.(\d+)\s+(.+)'
    matches = re.findall(pattern, content)
    return [(n, t.strip()) for n, t in matches]


def extract_salah_kaprah(content: str, bab_num: int):
    """Return list of (myth, correction) up to 4."""
    section_m = re.search(
        rf'## {bab_num}\.7 Salah Kaprah\n(.+?)(?=\n## {bab_num}\.\d)',
        content, re.DOTALL
    )
    if not section_m:
        return []
    body = section_m.group(1)
    # Myths: ***"..."*** or ***...***
    myths       = re.findall(r'\*{3}(.+?)\*{3}', body)
    corrections = re.findall(r'\*\*Koreksi:\*\*\s*(.+?)(?=\n\*{3}|\Z)', body, re.DOTALL)
    pairs = []
    for i, myth in enumerate(myths[:4]):
        raw_corr = corrections[i].strip() if i < len(corrections) else ""
        # Take first sentence only for slide
        first_sent = re.split(r'(?<=[.!?])\s', raw_corr)[0]
        corr = first_sent.strip()[:160]
        pairs.append((myth.strip('"').strip()[:80], corr))
    return pairs


def extract_studi_kasus_titles(content: str, bab_num: int):
    """Return (case_a_title, case_b_title)."""
    a_m = re.search(
        rf'### Studi Kasus A \(Dasar\): (.+?)(?:\n|$)',
        content
    )
    b_m = re.search(
        rf'### Studi Kasus B \(Lanjutan\): (.+?)(?:\n|$)',
        content
    )
    a = a_m.group(1).strip() if a_m else ""
    b = b_m.group(1).strip() if b_m else ""
    return a, b


def extract_studi_kasus_body(content: str, label: str, bab_num: int):
    """Extract first ~300 chars body text from a studi kasus block."""
    key = f"### Studi Kasus {label}"
    m = re.search(
        re.escape(key) + r'.+?\n\n(.+?)(?=\n###|\n## )',
        content, re.DOTALL
    )
    if m:
        body = re.sub(r'\*\*[^*]+\*\*\n', '', m.group(1)).strip()
        body = re.sub(r'\s+', ' ', body)
        return body[:280]
    return ""


def extract_template(content: str, bab_num: int):
    """Return (template_name, template_body_first_section)."""
    name_m = re.search(
        rf'### Template A\.{bab_num} — (.+)',
        content
    )
    name = name_m.group(1).strip() if name_m else f"Template A.{bab_num}"
    # Get the template table / code block
    tmpl_body_m = re.search(
        rf'Template A\.{bab_num} — .+?\n(.+?)(?=\n## {bab_num}\.\d)',
        content, re.DOTALL
    )
    if tmpl_body_m:
        body = tmpl_body_m.group(1).strip()
        # trim to first code block or first 10 lines
        lines = body.split('\n')[:30]
        return name, '\n'.join(lines)
    return name, ""


def extract_rangkuman(content: str, bab_num: int):
    """Return list of rangkuman bullet points (up to 5)."""
    section_m = re.search(
        rf'## {bab_num}\.11 Rangkuman\n(.+?)(?=\n## {bab_num}\.\d|\n## Referensi)',
        content, re.DOTALL
    )
    if not section_m:
        return []
    body = section_m.group(1)
    items = re.findall(r'\d+\.\s+(.+?)(?=\n\d+\.|\n\n---|\Z)', body, re.DOTALL)
    result = []
    for item in items[:5]:
        item_clean = re.sub(r'\s+', ' ', item).strip()
        # keep first sentence
        first = re.split(r'(?<=[.!?])\s', item_clean)[0]
        result.append(first[:160])
    return result


def extract_final_statement(content: str):
    """Return the 🔥 final statement quote."""
    # Look for blockquote near the end of rangkuman section
    # Pattern: > "..." standalone quote
    m_list = re.findall(r'^\*"(.+?)"\*', content, re.MULTILINE)
    if m_list:
        return m_list[-1].strip()
    # Try blockquote form
    m2 = re.search(r'> "(.+?)"', content, re.DOTALL)
    if m2:
        return re.sub(r'\s+', ' ', m2.group(1)).strip()
    return ""


def extract_latihan(content: str, bab_num: int):
    """Return latihan artefact title and first instruction line."""
    lat_m = re.search(
        rf'\*\*Latihan {bab_num}\.1 — (.+?)\*\*',
        content
    )
    title = lat_m.group(1).strip() if lat_m else ""
    instr_m = re.search(
        rf'Gunakan Template A\.{bab_num} (.+?)(?=\n\n|\Z)',
        content
    )
    instr = instr_m.group(1).strip()[:200] if instr_m else ""
    return title, instr


def extract_menuju_bab(content: str):
    """Return (next_bab_num, transition_text)."""
    m = re.search(
        r'\*\*Menuju Bab (\d+):\*\*\n(.+?)(?=\n---|\n\*")',
        content, re.DOTALL
    )
    if m:
        nxt  = int(m.group(1))
        text = re.sub(r'\s+', ' ', m.group(2)).strip()[:280]
        return nxt, text
    return None, ""

# ─── Slide builder ────────────────────────────────────────────────────────────

MARP_HEADER = '''\
---
marp: true
theme: gaia
class: invert
paginate: true
header: "{header}"
footer: "{author} &nbsp;|&nbsp; {institution}"
style: |
  section {{
    font-family: 'Segoe UI', Helvetica, sans-serif;
    font-size: 21px;
  }}
  h1 {{ color: #ffd166; }}
  h2 {{
    color: #06d6a0;
    border-bottom: 2px solid #06d6a060;
    padding-bottom: 4px;
  }}
  h3 {{ color: #8ecae6; }}
  blockquote {{
    border-left: 4px solid #ffd166;
    background: #ffffff15;
    padding: 0.5em 1em;
    font-style: italic;
  }}
  table {{ font-size: 18px; width: 100%; }}
  th {{ background: #06d6a040; }}
  code {{ background: #ffffff20; }}
  .lead h1 {{ font-size: 2em; color: #ffd166; }}
  .lead h2 {{ font-size: 1.3em; border: none; color: #e0e0e0; }}
  .bagian {{ font-size: 0.8em; color: #8ecae6; letter-spacing: 1px; }}
  .lead p {{ font-size: 0.9em; color: #c0c0c0; }}
---
'''

SLIDE_SEP = '\n---\n'

def build_slides(bab_num: int, title: str, bagian: str, reader_outcome: str,
                 level: str, content: str) -> str:
    pad     = f'{bab_num:02d}'
    header  = f'BAB {bab_num}'
    bagian_short = bagian.split('—')[0].strip() if '—' in bagian else bagian
    bagian_full  = bagian

    # Extract all parts
    hook, pemantik        = extract_opening(content, bab_num)
    mermaid               = extract_mermaid(content, bab_num)
    fig1_caption          = extract_gambar_caption(content, f'Gambar {bab_num}.1')
    defs                  = extract_definisi(content, bab_num)
    konsep                = extract_konsep_subsections(content, bab_num)
    salah_kaprah          = extract_salah_kaprah(content, bab_num)
    case_a_title, case_b_title = extract_studi_kasus_titles(content, bab_num)
    case_a_body           = extract_studi_kasus_body(content, 'A (Dasar)', bab_num)
    case_b_body           = extract_studi_kasus_body(content, 'B (Lanjutan)', bab_num)
    tmpl_name, tmpl_body  = extract_template(content, bab_num)
    rangkuman             = extract_rangkuman(content, bab_num)
    final                 = extract_final_statement(content)
    lat_title, lat_instr  = extract_latihan(content, bab_num)
    next_bab, menuju_text = extract_menuju_bab(content)

    slides_md = MARP_HEADER.format(
        header=f'BAB {bab_num} — {title}',
        author=AUTHOR,
        institution=INSTITUTION
    )

    # ── Slide 1: Cover ──────────────────────────────────────────────────────
    slides_md += f'''\
<!-- _class: lead invert -->

# BAB {bab_num}
## {title}

<p class="bagian">{bagian_full}</p>

**Level:** {level}

{SLIDE_SEP}'''

    # ── Slide 2: Reader Outcome ──────────────────────────────────────────────
    slides_md += f'''\
## Reader Outcome

> {reader_outcome}

| Info | Detail |
|------|--------|
| **Bagian** | {bagian_full} |
| **Level** | {level} |
| **Sub-topik** | {len(konsep)} konsep inti |

{SLIDE_SEP}'''

    # ── Slide 3: Pertanyaan Pemantik ─────────────────────────────────────────
    if hook:
        slides_md += f'''\
<!-- _class: invert -->

## Pertanyaan Pemantik

{hook}

---

_{pemantik}_

{SLIDE_SEP}'''
    else:
        slides_md += f'''\
<!-- _class: invert -->

## Pertanyaan Pemantik

_{pemantik}_

{SLIDE_SEP}'''

    # ── Slide 4: Signature Model ─────────────────────────────────────────────
    if mermaid:
        caption = fig1_caption if fig1_caption else f"Signature Model — Bab {bab_num}"
        slides_md += f'''\
## Model Utama — Gambar {bab_num}.1

```mermaid
{mermaid}
```

**{caption}**

{SLIDE_SEP}'''
    else:
        slides_md += f'''\
## Model Utama

_(Lihat Gambar {bab_num}.1 di buku teks)_

{SLIDE_SEP}'''

    # ── Slide 5: Definisi Kunci ──────────────────────────────────────────────
    if defs:
        defs_md = ""
        for term, defi, relevance in defs:
            defs_md += f'**{term}**\n{defi}\n\n> _{relevance}_\n\n'
        slides_md += f'''\
## Definisi Kunci

{defs_md.strip()}

{SLIDE_SEP}'''
    else:
        slides_md += f'''\
## Definisi Kunci

_(Lihat Seksi {bab_num}.3 di buku teks)_

{SLIDE_SEP}'''

    # ── Slide 6 & 7: Konsep Inti ─────────────────────────────────────────────
    if konsep:
        half = (len(konsep) + 1) // 2
        part1 = konsep[:half]
        part2 = konsep[half:]

        list1 = '\n'.join(f'- **{int(n)}.** {t}' for n, t in part1)
        slides_md += f'''\
## Konsep Inti — Bagian 1

{list1}

{SLIDE_SEP}'''

        if part2:
            list2 = '\n'.join(f'- **{int(n)}.** {t}' for n, t in part2)
            slides_md += f'''\
## Konsep Inti — Bagian 2

{list2}

{SLIDE_SEP}'''

    # ── Slide 8: Salah Kaprah ────────────────────────────────────────────────
    if salah_kaprah:
        traps_md = ""
        for myth, correction in salah_kaprah:
            traps_md += f'> ⚠️ _"{myth}"_\n\n↳ {correction}\n\n'
        slides_md += f'''\
## ⚠️ Salah Kaprah

{traps_md.strip()}

{SLIDE_SEP}'''

    # ── Slide 9: Studi Kasus ─────────────────────────────────────────────────
    case_slides = ""
    if case_a_title:
        summary_a = case_a_body[:250] if case_a_body else ""
        case_slides += f'''\
📊 **Dasar:** {case_a_title}

{summary_a}

'''
    if case_b_title:
        summary_b = case_b_body[:250] if case_b_body else ""
        case_slides += f'''\
📊 **Lanjutan:** {case_b_title}

{summary_b}

'''
    if case_slides:
        slides_md += f'''\
## Studi Kasus

{case_slides.strip()}

{SLIDE_SEP}'''

    # ── Slide 10: Template ───────────────────────────────────────────────────
    # Show first ~15 lines of template body (code block or table)
    tmpl_preview_lines = [ln for ln in tmpl_body.split('\n') if ln.strip()][:18]
    tmpl_preview = '\n'.join(tmpl_preview_lines)
    slides_md += f'''\
## 🔧 Template A.{bab_num}
### {tmpl_name}

```
{tmpl_preview}
```

{SLIDE_SEP}'''

    # ── Slide 11: Rangkuman ──────────────────────────────────────────────────
    if rangkuman:
        rang_md = '\n'.join(f'{i+1}. {item}' for i, item in enumerate(rangkuman))
        slides_md += f'''\
## Rangkuman

{rang_md}

{SLIDE_SEP}'''

    # ── Slide 12: Final Statement ─────────────────────────────────────────────
    if final:
        slides_md += f'''\
<!-- _class: lead invert -->

## 🔥 Final Statement

> "{final}"

{SLIDE_SEP}'''

    # ── Slide 13: Latihan + Menuju Bab ───────────────────────────────────────
    latihan_block = ""
    if lat_title:
        latihan_block = f'''\
### 📝 Latihan {bab_num}.1 — {lat_title}

{lat_instr}

'''
    menuju_block = ""
    if next_bab and menuju_text:
        menuju_block = f'''\
### ➡️ Menuju Bab {next_bab}

_{menuju_text[:200]}_
'''
    slides_md += f'''\
## Latihan & Refleksi

{latihan_block}{menuju_block}

{SLIDE_SEP}'''

    return slides_md


# ─── Main ─────────────────────────────────────────────────────────────────────

def main():
    print("Generating Marp slides for 18 chapters...")
    print(f"Output directory: {OUT_DIR}")
    print()

    success = 0
    errors  = []

    for bab_num in range(1, 19):
        pad      = f'{bab_num:02d}'
        src_path = CHAPTERS / f'bab-{pad}.md'

        if not src_path.exists():
            print(f"  [SKIP] bab-{pad}.md not found")
            errors.append(f"bab-{pad}.md not found")
            continue

        content = src_path.read_text(encoding='utf-8-sig')
        bab_n, title, bagian, ro, level = extract_title_bagian(content)

        if bab_n == 0:
            print(f"  [WARN] Could not parse title in bab-{pad}.md")
            bab_n = bab_num

        slides = build_slides(bab_n, title, bagian, ro, level, content)

        out_path = OUT_DIR / f'slide-bab-{pad}.md'
        out_path.write_text(slides, encoding='utf-8')

        # Count slides in output
        n_slides = slides.count('\n---\n')
        print(f"  [OK] bab-{pad:>2} — {title[:50]:<50}  ({n_slides} slides)")
        success += 1

    print()
    print(f"Done: {success}/18 chapters generated.")
    if errors:
        print(f"Errors: {errors}")
    print(f"\nOpen any file in VS Code with Marp extension to preview.")
    print(f"CLI export: marp slides/marp/slide-bab-NN.md --pdf")


if __name__ == '__main__':
    main()