feat: build PDF buku lengkap 356 hal (fpdf2 + markdown-it-py, tanpa instalasi tambahan)

This commit is contained in:
hb_alim 2026-04-25 15:07:27 +07:00
parent 661c4bdcb8
commit e52a9ca0b5

373
scripts/build-pdf-python.py Normal file
View file

@ -0,0 +1,373 @@
#!/usr/bin/env python3
"""
build-pdf-python.py
-----------------------------------------------------------
Membangun 1 file PDF buku lengkap dari semua chapter .md
Menggunakan: fpdf2 2.8.x + markdown-it-py (sudah terinstall)
Tidak membutuhkan instalasi tambahan.
Output: build/SIM-Era-AI-Book.pdf
-----------------------------------------------------------
"""
import os
import re
import sys
import warnings
# Suppress fpdf2 deprecation warnings (uni=True, font substitution)
warnings.filterwarnings('ignore', category=UserWarning, module='fpdf')
from fpdf import FPDF
from fpdf.html import TextStyle
from markdown_it import MarkdownIt
# ==============================================================================
# KONFIGURASI PATH
# ==============================================================================
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CHAP = os.path.join(ROOT, 'chapters')
BACK = os.path.join(ROOT, 'backmatter')
BUILD = os.path.join(ROOT, 'build')
OUT = os.path.join(BUILD, 'SIM-Era-AI-Book.pdf')
os.makedirs(BUILD, exist_ok=True)
FILES = (
[os.path.join(BACK, 'kata-pengantar.md')] +
[os.path.join(CHAP, f'bab-{i:02d}.md') for i in range(1, 19)] +
[os.path.join(BACK, f) for f in
['references.md', 'glosarium.md', 'indeks.md', 'tentang-penulis.md']]
)
# Halaman B5 dalam mm
PW, PH = 182, 257
ML, MT, MR, MB = 25, 30, 20, 25
UW = PW - ML - MR # lebar area teks = 137mm
# Windows system TrueType fonts (alias unik, hindari clash dengan core fonts fpdf2)
WIN_FONTS = [
('TNR', '', 'C:/Windows/Fonts/times.ttf'),
('TNR', 'B', 'C:/Windows/Fonts/timesbd.ttf'),
('TNR', 'I', 'C:/Windows/Fonts/timesi.ttf'),
('TNR', 'BI', 'C:/Windows/Fonts/timesbi.ttf'),
('TNS', '', 'C:/Windows/Fonts/arial.ttf'),
('TNS', 'B', 'C:/Windows/Fonts/arialbd.ttf'),
('TNS', 'I', 'C:/Windows/Fonts/ariali.ttf'),
('TNS', 'BI', 'C:/Windows/Fonts/arialbi.ttf'),
('CNR', '', 'C:/Windows/Fonts/cour.ttf'),
('CNR', 'B', 'C:/Windows/Fonts/courbd.ttf'),
]
# ==============================================================================
# PRE-PROCESSOR MARKDOWN
# ==============================================================================
def clean_md(text: str) -> str:
"""Bersihkan konstruk khusus buku sebelum konversi ke HTML."""
# 1. Hapus blok metadata chapter (```\nBagian: ...\n```)
text = re.sub(r'``` *\n(Bagian[\s\S]*?)```', '', text)
# 2. Ganti mermaid blocks dengan placeholder HTML
def mermaid_ph(m):
code = m.group(1)
# Cari judul dari komentar %% atau subgraph label
title_m = (re.search(r'%%\s*(.+)', code) or
re.search(r'subgraph\s+"?([^"\n]{4,50})"?', code) or
re.search(r'\["([^"]{4,50})"\]', code))
name = title_m.group(1).strip()[:60] if title_m else 'Diagram Alur'
return (
f'\n<blockquote><em>[Diagram: {name}'
f' -- tersedia di versi digital buku]</em></blockquote>\n\n'
)
text = re.sub(r'```mermaid\s*\n(.*?)```', mermaid_ph, text, flags=re.DOTALL)
# 4. Ganti karakter khusus yang tidak ada di Times New Roman
replacements = {
'\u2713': '[v]', # ✓ checkmark
'\u2714': '[v]', # ✔
'\u2718': '[x]', # ✘
'\u25c9': '[o]', # ◉ bullet
'\u25ad': '[ ]', # ▭ box
'\u25c7': '<>', # ◇ diamond
'\U0001f4ca': '[Grafik]', # 📊
'\U0001f4cc': '[Pin]', # 📌
'\U0001f4a1': '[Ide]', # 💡
'\U0001f6ab': '[No]', # 🚫
'\ufe0f': '', # variation selector (remove)
'\u200b': '', # zero-width space (remove)
}
for char, replacement in replacements.items():
text = text.replace(char, replacement)
return text
# ==============================================================================
# KELAS PDF BUKU
# ==============================================================================
class BookPDF(FPDF):
BF = 'TNR' # body font (Times New Roman TTF)
HF = 'TNS' # heading font (Arial TTF)
MF = 'CNR' # monospace font (Courier New TTF)
BSZ = 11 # base font size
def __init__(self):
super().__init__(orientation='P', unit='mm', format=(PW, PH))
self.set_margins(ML, MT, MR)
self.set_auto_page_break(True, MB)
self.chapter_title = ''
self._load_fonts()
# ── Fonts ─────────────────────────────────────────────────────────────────
def _load_fonts(self):
"""Muat TrueType fonts dari Windows untuk dukungan Unicode penuh."""
loaded = 0
for family, style, path in WIN_FONTS:
if os.path.exists(path):
try:
self.add_font(family, style, path) # uni=True deprecated since fpdf2 2.5
loaded += 1
except Exception:
pass
if loaded >= 4:
print(f" ✓ TrueType fonts: {loaded}/{len(WIN_FONTS)} loaded (TNR/TNS/CNR)")
else:
# Fallback ke core fonts Latin-1
self.BF = 'Helvetica'
self.HF = 'Helvetica'
self.MF = 'Courier'
print(f" ⚠ Only {loaded} TTF fonts loaded — using Helvetica fallback")
def _tag_styles(self):
"""Kembalikan TextStyle dict untuk write_html() — hanya tag heading."""
HF, BF = self.HF, self.BF
return {
'h1': TextStyle(font_family=HF, font_style='B', font_size_pt=20,
color=(25, 45, 110), t_margin=6, b_margin=4),
'h2': TextStyle(font_family=HF, font_style='B', font_size_pt=15,
color=(25, 45, 110), t_margin=5, b_margin=3),
'h3': TextStyle(font_family=HF, font_style='B', font_size_pt=12,
color=(50, 60, 100), t_margin=4, b_margin=2),
'h4': TextStyle(font_family=HF, font_style='B', font_size_pt=11,
t_margin=3, b_margin=2),
'h5': TextStyle(font_family=BF, font_style='BI', font_size_pt=11),
'h6': TextStyle(font_family=BF, font_style='I', font_size_pt=10),
'blockquote': TextStyle(font_family=BF, font_style='I',
font_size_pt=10, color=(60, 60, 80),
fill_color=(245, 247, 252), l_margin=8),
}
# ── Header & Footer ───────────────────────────────────────────────────────
def header(self):
if self.page_no() < 3:
return
self.set_font(self.BF, 'I', 8)
self.set_text_color(130, 130, 130)
if self.page_no() % 2 == 0:
lbl, align = 'Sistem Informasi Manajemen di Era AI', 'L'
else:
lbl = self.chapter_title
if len(lbl) > 55:
lbl = lbl[:53] + '\u2026'
align = 'R'
self.cell(UW, 6, lbl, align=align)
self.set_draw_color(190, 190, 190)
self.line(ML, self.get_y() + 7, PW - MR, self.get_y() + 7)
self.set_draw_color(0, 0, 0)
self.set_text_color(0, 0, 0)
self.ln(8)
def footer(self):
self.set_y(-18)
self.set_font(self.BF, 'I', 9)
self.set_text_color(100, 100, 100)
self.cell(0, 10, str(self.page_no()), align='C')
self.set_text_color(0, 0, 0)
# ── Halaman khusus ────────────────────────────────────────────────────────
def add_title_page(self):
self.add_page()
# Title
self.set_y(50)
self.set_font(self.HF, 'B', 26)
self.set_text_color(25, 45, 110)
self.multi_cell(UW, 13,
'Sistem Informasi Manajemen\ndi Era AI', align='C')
self.ln(6)
# Subtitle — gunakan body font italic (lebih aman)
self.set_font(self.BF, 'I', 13)
self.set_text_color(80, 80, 80)
self.multi_cell(UW, 7,
'Perspektif Strategis dan Pengambilan Keputusan',
align='C')
self.ln(14)
# Garis dekoratif
cx = ML + UW / 2
self.set_draw_color(25, 45, 110)
self.set_line_width(0.8)
self.line(cx - 42, self.get_y(), cx + 42, self.get_y())
self.set_line_width(0.2)
self.set_draw_color(0, 0, 0)
self.ln(18)
# Penulis
self.set_font(self.BF, 'B', 13)
self.set_text_color(0, 0, 0)
self.cell(0, 7, 'Helmi Bahar Alim, S.Kom., M.Kom.', align='C')
self.ln(8)
self.set_font(self.BF, '', 11)
self.set_text_color(60, 60, 60)
self.cell(0, 6, 'Program Studi Manajemen', align='C')
self.ln(6)
self.cell(0, 6, 'Universitas Putra Bangsa \u2014 Kebumen', align='C')
self.ln(38)
self.set_font(self.BF, 'I', 11)
self.cell(0, 7, '2026', align='C')
self.set_text_color(0, 0, 0)
def add_copyright_page(self):
self.add_page()
self.set_xy(ML, PH - 82)
rows = [
('Sistem Informasi Manajemen di Era AI', 'B', 11),
('Perspektif Strategis dan Pengambilan Keputusan', 'I', 10),
None,
('Penulis: Helmi Bahar Alim, S.Kom., M.Kom.', '', 10),
('Afiliasi: Universitas Putra Bangsa, Kebumen', '', 10),
('Penerbit: UPB Press', '', 10),
None,
('Hak Cipta \u00a9 2026 pada penulis. Dilindungi Undang-Undang.', '', 10),
('Dilarang memperbanyak tanpa izin tertulis penulis dan penerbit.', '', 10),
None,
('Cetakan Pertama, 2026', 'I', 10),
]
for row in rows:
if row is None:
self.ln(4)
continue
text, style, sz = row
self.set_font(self.BF, style, sz)
self.set_x(ML)
self.multi_cell(UW, 6.5, text)
# ── Render satu file markdown ─────────────────────────────────────────────
@staticmethod
def _flatten_table_cells(html: str) -> str:
"""Hapus inline tags di dalam <td>/<th> — fpdf2 tidak mendukungnya."""
def strip_inner(m):
tag = m.group(1)
inner = re.sub(r'<[^>]+>', ' ', m.group(2))
inner = re.sub(r'\s{2,}', ' ', inner).strip()
return f'<{tag}>{inner}</{tag}>'
return re.sub(r'<(th|td)>([\s\S]*?)</\1>', strip_inner, html)
@staticmethod
def _replace_code_blocks(html: str) -> str:
"""Ganti <pre><code> menjadi <blockquote> agar tidak trigger core Courier."""
# pre > code blocks
html = re.sub(
r'<pre><code[^>]*>([\s\S]*?)</code></pre>',
lambda m: '<blockquote>' + m.group(1).strip().replace('\n', '<br/>') + '</blockquote>',
html
)
# sisa <code> inline → <b> (biar pakai body font)
html = re.sub(r'<code>(.*?)</code>', r'<b>\1</b>', html)
return html
def render_file(self, filepath: str):
"""Baca, bersihkan, konversi MD→HTML, render ke PDF."""
with open(filepath, 'r', encoding='utf-8-sig') as f:
raw = f.read()
cleaned = clean_md(raw)
# Perbarui chapter_title dari heading H1
h1 = re.search(r'^#\s+(.+)', cleaned, re.MULTILINE)
if h1:
self.chapter_title = re.sub(r'[*_`]', '', h1.group(1)).strip()[:60]
# Konversi Markdown → HTML
md = MarkdownIt('commonmark').enable('table').enable('strikethrough')
html = md.render(cleaned)
# Bersihkan table cells agar kompatibel dengan fpdf2
html = self._flatten_table_cells(html)
# Ganti code blocks agar tidak trigger core Courier font
html = self._replace_code_blocks(html)
# Set font default sebelum write_html
self.set_font(self.BF, '', self.BSZ)
self.set_text_color(0, 0, 0)
# Render ke PDF
self.write_html(html, tag_styles=self._tag_styles())
# ==============================================================================
# MAIN
# ==============================================================================
def main():
print('=' * 58)
print(' BUILD PDF: Sistem Informasi Manajemen di Era AI')
print(f' Output: {OUT}')
print('=' * 58)
pdf = BookPDF()
pdf.add_title_page()
pdf.add_copyright_page()
total = len(FILES)
errors = []
for idx, fp in enumerate(FILES):
name = os.path.basename(fp)
if not os.path.exists(fp):
print(f' [{idx+1:02d}/{total}] SKIP (tidak ditemukan): {name}')
continue
print(f' [{idx+1:02d}/{total}] {name}...', end=' ', flush=True)
try:
pdf.add_page()
pdf.render_file(fp)
print('OK')
except Exception as e:
print(f'ERROR: {e}')
errors.append((name, str(e)))
print('\nMenyimpan PDF...', end=' ', flush=True)
try:
pdf.output(OUT)
sz = os.path.getsize(OUT) / 1_048_576
pgs = pdf.page
print('OK')
print(f'\n{"=" * 58}')
print(f' SELESAI!')
print(f' File : {OUT}')
print(f' Ukuran : {sz:.1f} MB')
print(f' Halaman : {pgs}')
if errors:
print(f' Errors : {len(errors)} file gagal:')
for n, e in errors:
print(f' - {n}: {e}')
print(f'{"=" * 58}')
sys.exit(0)
except Exception as e:
print(f'GAGAL: {e}')
sys.exit(1)
if __name__ == '__main__':
main()