diff --git a/logs/hf_summary.log b/logs/hf_summary.log
new file mode 100644
index 0000000..36aa330
--- /dev/null
+++ b/logs/hf_summary.log
@@ -0,0 +1,10 @@
+
+[2025-10-27T04:49:15.827939] ▶ HF Request to Qwen2.5-7B-Instruct
+Payload preview: 다음 텍스트를 자세하고 명확하게 한국어로 요약해주세요. 결과는 Markdown 형식으로 작성하고, '## 요약', '## 핵심 요점', '## 슬라이드 요약', '## 상세 설명' 섹션을 반드시 포함하세요.
+
+# 슬라이드1
+
+학생 여러분! 안녕하세요 건국대학교 컴퓨터공학과 이철원 교수입니다 자, 이번 시간에는 사 다시 일주차 파이썬 프로그래밍 리스트의 이해와 활용이라는 주제로 여러분을 만나뵙게 되었습니다 지금까지 우리는 변수와 기본적인 자료형, 각종 연산자들에 대해 배웠죠? 이제 파이썬의 꽃이라고 할 수 있는 '자료 구조' 중 하나...
+Response status: 404
+Response text preview: Not Found
+--------------------------------------------------------------------------------
diff --git a/routers/note.py b/routers/note.py
index 446e3a6..41ff3e8 100644
--- a/routers/note.py
+++ b/routers/note.py
@@ -1,40 +1,47 @@
 import os
+import re
+import json
+import difflib
+from datetime import datetime
+from typing import List, Optional
+
 from dotenv import load_dotenv
 from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Query, Request
 from fastapi.responses import StreamingResponse
 from sqlalchemy.orm import Session
-from typing import List
-from datetime import datetime
-import traceback
-import re
-import json
 
-from db import get_db, SessionLocal
+from db import get_db
 from models.note import Note
 from models.file import File as FileModel
 from schemas.note import NoteCreate, NoteUpdate, NoteResponse, FavoriteUpdate, NoteFile
 from utils.jwt_utils import get_current_user
-from utils.llm import stream_summary_with_langchain, _strip_top_level_h1_outside_code, _hf_generate_once, _system_prompt
-from utils.llm import _hf_generate_once, _system_prompt
+from utils.llm import (
+    stream_summary_with_langchain,
+    _strip_top_level_h1_outside_code,
+    _hf_generate_once,
+    _system_prompt,
+    count_slides,
+    normalize_and_renumber_slides,
+)
 
 load_dotenv()
-HF_TOKEN = os.getenv("HF_API_TOKEN")
 
 router = APIRouter(prefix="/api/v1", tags=["Notes"])
-
-# 환경변수에서 BASE_API_URL 가져와 파일 다운로드 URL 구성
 BASE_API_URL = os.getenv("BASE_API_URL", "http://localhost:8000")
 
+HF_MAX_NEW_TOKENS_LONG = int(os.getenv("HF_MAX_NEW_TOKENS_LONG", "32000"))
+HF_MAP_MAX_NEW_TOKENS = int(os.getenv("HF_MAP_MAX_NEW_TOKENS", "12000"))
+ENSURE_COMPLETION_PASSES = int(os.getenv("ENSURE_COMPLETION_PASSES", "3"))
+SLIDES_MIN = int(os.getenv("SUMMARY_SLIDES_MIN", "8"))
+SLIDES_MAX = int(os.getenv("SUMMARY_SLIDES_MAX", "40"))
+SUMMARY_CHUNK_CHARS = int(os.getenv("SUMMARY_CHUNK_CHARS", "12000"))
+SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", "1200"))
+
 
 # ─────────────────────────────────────────────
-# 공통: Note → NoteResponse 직렬화 + files 채우기
+# 직렬화
 # ─────────────────────────────────────────────
 def serialize_note(db: Session, note: Note, base_url: str) -> NoteResponse:
-    """
-    Note ORM → NoteResponse 수동 매핑.
-    관계(note.files)로 인해 Pydantic가 ORM 객체를 바로 검증하려다 실패하는 문제를 피하기 위해
-    기본 스칼라 필드만 직접 채우고, files는 별도 쿼리로 구성한다.
-    """
     files = (
         db.query(FileModel)
         .filter(FileModel.note_id == note.id, FileModel.user_id == note.user_id)
@@ -66,8 +73,10 @@ def serialize_note(db: Session, note: Note, base_url: str) -> NoteResponse:
     )
 
 
+# ─────────────────────────────────────────────
+# 간단 추출 요약 (백업용)
+# ─────────────────────────────────────────────
 def _fallback_extractive_summary(text: str) -> str:
-    """Simple extractive fallback: pick leading sentences and format as TL;DR + bullets."""
     if not text:
         return "## TL;DR\n요약할 내용이 없습니다."
     sents = re.split(r"(?<=[.!?。])\s+|\n+", text)
@@ -84,36 +93,40 @@ def _fallback_extractive_summary(text: str) -> str:
 
 
 def _is_summary_complete(s: str) -> bool:
-    """Heuristic: check presence of key sections and reasonable length."""
     if not s or not s.strip():
         return False
     low = s.lower()
-    # require TL;DR or 핵심 요점 and some detail
     if ('## tl;dr' in low or '## 핵심' in low or '## 핵심 요점' in low) and len(s) > 300:
         return True
-    # if contains multiple section headers, consider complete
     headers = len(re.findall(r"^##\s+", s, flags=re.M))
     if headers >= 2 and len(s) > 200:
         return True
-    # otherwise likely incomplete
     return False
 
 
+def _similarity_ratio(a: str, b: str) -> float:
+    a_norm = re.sub(r"\s+", " ", (a or "")).strip()
+    b_norm = re.sub(r"\s+", " ", (b or "")).strip()
+    if not a_norm or not b_norm:
+        return 0.0
+    return difflib.SequenceMatcher(None, a_norm, b_norm).ratio()
+
+
 async def _ensure_completion(full: str, domain: str | None = None, length: str = 'long') -> str:
-    """If `full` looks truncated, attempt up to 3 continuation passes to complete it."""
     try:
-        for i in range(3):
+        for _ in range(ENSURE_COMPLETION_PASSES):
             if _is_summary_complete(full) and re.search(r"[\.\!\?]\s*$", full.strip()):
                 return full
-            # build continuation prompt
             sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length=length)
-            cont_prompt = "The following summary appears incomplete. Continue and finish the summary without repeating previous text:\n\n" + full + "\n\nContinue:" 
+            cont_prompt = (
+                "The following summary appears incomplete. Continue and finish the summary **without repeating previous text**.\n\n"
+                + full + "\n\nContinue:"
+            )
             try:
-                cont = await _hf_generate_once(sys_prompt, cont_prompt, max_new_tokens=int(os.getenv('HF_MAX_NEW_TOKENS_LONG', '32000')))
+                cont = await _hf_generate_once(sys_prompt, cont_prompt, max_new_tokens=HF_MAX_NEW_TOKENS_LONG)
             except Exception:
                 cont = ''
             if cont and cont.strip():
-                # append continuation
                 full = (full + "\n\n" + cont.strip()).strip()
             else:
                 break
@@ -122,28 +135,80 @@ async def _ensure_completion(full: str, domain: str | None = None, length: str =
     return full
 
 
-# 1) 모든 노트 조회
+async def _ensure_slide_coverage(full: str, target_slides: int, source_text: str, domain: str | None = None) -> str:
+    try:
+        for _ in range(ENSURE_COMPLETION_PASSES):
+            cur = count_slides(full)
+            if cur >= target_slides:
+                return full
+
+            next_idx = cur + 1
+            sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length='long')
+            cont_user = (
+                "아래는 기존 요약입니다. '## 슬라이드' 섹션의 슬라이드 수가 목표보다 적습니다.\n"
+                f"목표 슬라이드 수: {target_slides}\n"
+                f"현재 슬라이드 수: {cur}\n\n"
+                "요청: 이전 내용을 반복하지 말고, **'## 슬라이드' 섹션만** 이어서 작성하세요. "
+                f"번호는 '### 슬라이드 {next_idx}'부터 연속으로 증가시키세요. "
+                "각 슬라이드는 제목 + 3–6개 불릿로 작성하고, 아직 다루지 않은 원문 토픽을 중심으로 추가하세요.\n\n"
+                "=== 기존 요약(참고) ===\n" + full[-12000:] + "\n\n"
+                "=== 원문(발췌; 필요시) ===\n" + (source_text[:12000] if source_text else "")
+            )
+            try:
+                extra = await _hf_generate_once(sys_prompt, cont_user, max_new_tokens=HF_MAX_NEW_TOKENS_LONG)
+            except Exception:
+                extra = ""
+
+            if extra and extra.strip():
+                full = (full.rstrip() + "\n\n" + extra.strip()).strip()
+            else:
+                break
+    except Exception:
+        pass
+    return full
+
+
+async def _force_compress_if_similar(full: str, source: str, domain: str | None = None) -> str:
+    try:
+        ratio = _similarity_ratio(full, source)
+        if ratio >= 0.85 or len(full.strip()) >= max(300, int(len(source.strip()) * 0.95)):
+            sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length='medium')
+            user = (
+                "다음 원문을 20–40% 길이로 정확하게 요약해. 절대 원문을 그대로 복사하지 말고, "
+                "출력은 반드시 '## TL;DR', '## 핵심 요점', '## 상세 설명', '## 슬라이드' 섹션을 포함하라.\n\n"
+                + (source[:80000] if source else "")
+            )
+            try:
+                compressed = await _hf_generate_once(sys_prompt, user, max_new_tokens=HF_MAX_NEW_TOKENS_LONG)
+            except Exception:
+                compressed = _fallback_extractive_summary(source)
+            if compressed and compressed.strip():
+                return compressed
+    except Exception:
+        pass
+    return full
+
+
+# ─────────────────────────────────────────────
+# 목록/CRUD
+# ─────────────────────────────────────────────
 @router.get("/notes", response_model=List[NoteResponse])
 def list_notes(
     request: Request,
-    q: str | None = Query(default=None, description="Optional search query (title or content)"),
+    q: Optional[str] = Query(default=None, description="Optional search query (title or content)"),
     db: Session = Depends(get_db),
     user = Depends(get_current_user)
 ):
-    """List notes for the current user. If `q` is provided, filter by title or content (case-insensitive).
-    """
     query = db.query(Note).filter(Note.user_id == user.u_id)
     if q and q.strip():
         like = f"%{q.strip()}%"
         query = query.filter((Note.title.ilike(like)) | (Note.content.ilike(like)))
 
     notes = query.order_by(Note.created_at.desc()).all()
-    # 각 노트의 files도 채워 반환
     base_url = os.getenv("BASE_API_URL") or str(request.base_url).rstrip('/')
     return [serialize_note(db, n, base_url) for n in notes]
 
 
-# 2) 최근 접근한 노트 조회 (상위 10개)
 @router.get("/notes/recent", response_model=List[NoteResponse])
 def recent_notes(
     request: Request,
@@ -161,7 +226,6 @@ def recent_notes(
     return [serialize_note(db, n, base_url) for n in notes]
 
 
-# 3) 노트 생성
 @router.post("/notes", response_model=NoteResponse)
 def create_note(
     request: Request,
@@ -182,7 +246,6 @@ def create_note(
     return serialize_note(db, note, base_url)
 
 
-# 4) 노트 수정 (제목/내용/폴더)
 @router.patch("/notes/{note_id}", response_model=NoteResponse)
 def update_note(
     request: Request,
@@ -210,8 +273,6 @@ def update_note(
     base_url = os.getenv("BASE_API_URL") or str(request.base_url).rstrip('/')
     return serialize_note(db, note, base_url)
 
-
-# 5) 노트 단일 조회 (마지막 접근 시간 업데이트 포함)
 @router.get("/notes/{note_id}", response_model=NoteResponse)
 def get_note(
     request: Request,
@@ -230,7 +291,6 @@ def get_note(
     return serialize_note(db, note, base_url)
 
 
-# 6) 노트 삭제
 @router.delete("/notes/{note_id}")
 def delete_note(
     note_id: int,
@@ -246,7 +306,6 @@ def delete_note(
     return {"message": "Note deleted successfully"}
 
 
-# 7) 즐겨찾기 토글
 @router.patch("/notes/{note_id}/favorite", response_model=NoteResponse)
 def toggle_favorite(
     request: Request,
@@ -266,162 +325,145 @@ def toggle_favorite(
     base_url = os.getenv("BASE_API_URL") or str(request.base_url).rstrip('/')
     return serialize_note(db, note, base_url)
 
-
 # ─────────────────────────────────────────────
-# (참고) 요약 스트리밍 API - 완료 후에도 serialize_note 사용 안 함
-#        (요약은 새 노트를 생성하고 SSE로 알림만 보냄)
+# 요약 (로컬 Qwen 모델 기반, ChatGPT 스타일 자연요약)
 # ─────────────────────────────────────────────
-@router.post("/notes/{note_id}/summarize")
-async def summarize_stream_langchain(
+@router.post("/notes/{note_id}/summarize_sync", response_model=NoteResponse)
+async def summarize_sync(
     note_id: int,
-    background_tasks: BackgroundTasks,
-    domain: str | None = Query(default=None, description="meeting | code | paper | general | auto(None)"),
-    longdoc: bool = Query(default=True, description="Enable long-document map→reduce"),
     db: Session = Depends(get_db),
-    user = Depends(get_current_user)
+    user=Depends(get_current_user)
 ):
+    """
+    ✅ ChatGPT 스타일 요약 + 요약 완료 후 메모리 해제
+    """
+    import torch
+    import numpy as np
+    import gc
+    from transformers import AutoTokenizer, AutoModelForCausalLM
+    from sklearn.feature_extraction.text import TfidfVectorizer
+    from sklearn.metrics.pairwise import cosine_similarity
+
     note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first()
     if not note or not (note.content or "").strip():
         raise HTTPException(status_code=404, detail="요약 대상 없음")
 
-    
+    source = note.content.strip()
+    if len(source) < 50:
+        raise HTTPException(status_code=400, detail="본문이 너무 짧습니다.")
 
-    async def event_gen():
-        parts = []
-        # Default to a comprehensive (long) summary when called without explicit options
-        async for sse in stream_summary_with_langchain(note.content, domain=domain, longdoc=longdoc, length='long', tone='neutral', output_format='md'):
-            parts.append(sse.removeprefix("data: ").strip())
-            yield sse.encode()
-        full = "".join(parts).strip()
-        # attempt to complete if truncated
-        try:
-            full = await _ensure_completion(full, domain=domain, length='long')
-        except Exception:
-            pass
-        # If streamed output looks incomplete, attempt a single-shot completion pass
-        try:
-            if not _is_summary_complete(full):
-                try:
-                    print('[summarize] partial output detected, performing completion pass')
-                    sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length='long')
-                    cont = await _hf_generate_once(sys_prompt, "Existing partial summary:\n\n" + full + "\n\nPlease expand and complete the summary, preserving facts and following the output format.", max_new_tokens=int(os.getenv('HF_MAX_NEW_TOKENS_LONG', '20000')))
-                    if cont and cont.strip():
-                        full = (full + "\n\n" + cont.strip()).strip()
-                        print('[summarize] completion pass appended, new length=', len(full))
-                except Exception as e:
-                    print('[summarize] completion pass failed:', e)
-        except Exception:
-            pass
-        # If model produced empty output, fall back to a simple extractive summary
-        if not (full or "").strip():
-            try:
-                sents = re.split(r"(?<=[.!?。])\s+|\n+", note.content or "")
-                sents = [p.strip() for p in sents if p.strip()]
-                head = sents[:6]
-                tl = head[0] if head else (note.content or "")[:200]
-                bullets = [f"- {p}" for p in head[1:5]]
-                fb = "## TL;DR\n" + tl + "\n\n## 핵심 요점\n" + "\n".join(bullets)
-                full = fb
-            except Exception:
-                full = (note.content or "")[:800]
-        try:
-            print(f"[summarize-sync] generated full length={len(full)} preview={repr(full[:200])}")
-        except Exception:
-            pass
-        # Remove local temp file paths (e.g. macOS /var/... or file://...) which shouldn't be persisted
+    full_summary = ""
+    failed = False
+
+    try:
+        print("[summarize_sync] 🚀 Qwen2.5-7B-Instruct 로드 중...")
+        model_name = "Qwen/Qwen2.5-7B-Instruct"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "당신은 전문적인 과학기술 문서 요약가입니다. "
+                    "텍스트를 자연스럽고 명확하게 요약하세요. "
+                    "결과는 Markdown 형식으로 작성하고, 다음 구조를 유지하세요:\n\n"
+                    "## 요약\n\n"
+                    "## 핵심 요점\n\n"
+                    "## 상세 설명\n"
+                ),
+            },
+            {
+                "role": "user",
+                "content": f"아래 내용을 ChatGPT처럼 깔끔하고 자연스럽게 요약해줘:\n\n{source}",
+            },
+        ]
+
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_tensors="pt",
+            return_dict=True,
+        ).to(model.device)
+
+        print("[summarize_sync] 🧠 요약 생성 중...")
+        with torch.no_grad():
+            outputs = model.generate(**inputs, max_new_tokens=1500, temperature=0.4, top_p=0.9)
+        generated = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
+        full_summary = generated.strip()
+
+        print("[summarize_sync] ✅ 요약 완료")
+
+    except Exception as e:
+        print(f"[summarize_sync] ❌ 모델 요약 실패: {e}")
+        failed = True
+
+    finally:
+        # ✅ 메모리 해제
         try:
-            # remove explicit file://... patterns
-            full = re.sub(r"file://\S+", "", full)
-            # remove absolute tmp paths like /var/... (up to whitespace or closing paren)
-            full = re.sub(r"/var/[^\s)]+", "", full)
-            # remove parenthesis-wrapped local paths in markdown images: ![alt](/path/to/file.png)
-            full = re.sub(r"!\[([^\]]*)\]\([^)]*(/var/[^)\s]+)[)]", r"![\1]()", full)
-        except Exception:
-            pass
-        # Strip any top-level H1 headings that the model may have added (outside code fences)
+            del model
+            del tokenizer
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            print("[summarize_sync] 🧹 모델 메모리 해제 완료")
+        except Exception as e:
+            print(f"[summarize_sync] ⚠️ 메모리 해제 실패: {e}")
+
+    # ───────────────
+    # Fallback (TextRank)
+    # ───────────────
+    if failed or not full_summary:
+        print("[summarize_sync] ⚠️ TextRank 백업 사용")
         try:
-            full = _strip_top_level_h1_outside_code(full)
+            sents = re.split(r"(?<=[.!?。])\s+|\n+", source)
+            sents = [s.strip() for s in sents if len(s.strip()) > 10]
+            if len(sents) < 3:
+                full_summary = _fallback_extractive_summary(source)
+            else:
+                vec = TfidfVectorizer()
+                tfidf = vec.fit_transform(sents)
+                sim = cosine_similarity(tfidf)
+                scores = np.sum(sim, axis=1)
+                top_n = max(3, int(len(sents) * 0.2))
+                top_idx = np.argsort(scores)[-top_n:]
+                key_sents = [sents[i] for i in sorted(top_idx)]
+                bullets = "\n".join(f"- {s}" for s in key_sents[:5])
+                full_summary = f"## 요약\n{' '.join(key_sents[:2])}\n\n## 핵심 요점\n{bullets}\n\n## 상세 설명\n이 요약은 TextRank 기반 로컬 요약입니다."
         except Exception:
-            # fallback: naive removal of a single leading H1
-            full = re.sub(r"^\s*#\s.*?\n+", "", full, count=1)
-        # Ensure non-empty summary; if model produced nothing, use extractive fallback
-        if not (full or "").strip():
-            try:
-                full = _fallback_extractive_summary(note.content)
-                print(f"[summarize] fallback summary used length={len(full)}")
-            except Exception:
-                full = (note.content or '')[:800]
+            full_summary = _fallback_extractive_summary(source)
 
-        # Ensure non-empty summary; if model produced nothing, use extractive fallback
-        if not (full or "").strip():
-            try:
-                full = _fallback_extractive_summary(note.content)
-                print(f"[summarize-sync] fallback summary used length={len(full)}")
-            except Exception:
-                full = (note.content or '')[:800]
-
-        if full:
-            # Create a new summary note in the same folder with title '<original> — 요약'
-            title = (note.title or "").strip() + " — 요약"
-            if len(title) > 255:
-                title = title[:255]
-            new_note = Note(
-                user_id=user.u_id,
-                folder_id=note.folder_id,
-                title=title,
-                content=full,
-            )
-            db.add(new_note)
-            db.commit()
-            db.refresh(new_note)
-            try:
-                # log created summary id and content preview for debugging
-                print(f"[summarize] created summary note id={new_note.id} for note_id={note_id}")
-                try:
-                    print("[summarize] saved content length=", len(new_note.content or ""))
-                    print("[summarize] saved content preview=", repr((new_note.content or "")[:400]))
-                except Exception:
-                    pass
-            except Exception:
-                pass
-            # normal streaming path: notify created note via SSE
-            try:
-                # notify created note: include serialized note JSON so client can render immediately
-                base_url = os.getenv("BASE_API_URL") or BASE_API_URL
-                note_obj = serialize_note(db, new_note, base_url)
-                payload = {"summary_note": note_obj.dict()}
-                yield f"data: {json.dumps(payload, default=str)}\n\n".encode()
-            except Exception:
-                # fallback to ID-only message
-                try:
-                    yield f"data: SUMMARY_NOTE_ID:{new_note.id}\n\n".encode()
-                except Exception:
-                    pass
-        else:
-            # As an extra fallback, aggregate streamed parts (if any) to ensure coverage
-            try:
-                agg = "\n\n".join(parts) if parts else (note.content or '')[:4000]
-                fallback_full = "## Aggregated streamed parts\n\n" + agg
-                title = (note.title or "").strip() + " — 요약"
-                new_note2 = Note(user_id=user.u_id, folder_id=note.folder_id, title=title, content=fallback_full)
-                db.add(new_note2)
-                db.commit()
-                db.refresh(new_note2)
-                try:
-                    yield f"data: SUMMARY_NOTE_ID:{new_note2.id}\n\n".encode()
-                except Exception:
-                    pass
-            except Exception:
-                pass
+    # ───────────────
+    # DB 저장
+    # ───────────────
+    title = (note.title or "").strip() + " — 요약"
+    if len(title) > 255:
+        title = title[:255]
 
-    return StreamingResponse(
-        event_gen(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache"}
+    new_note = Note(
+        user_id=user.u_id,
+        folder_id=note.folder_id,
+        title=title,
+        content=full_summary,
     )
+    db.add(new_note)
+    db.commit()
+    db.refresh(new_note)
+
+    base_url = os.getenv("BASE_API_URL") or "http://localhost:8000"
+    return serialize_note(db, new_note, base_url)
 
 
 
+# ─────────────────────────────────────────────
+# 퀴즈 생성
+# ─────────────────────────────────────────────
 @router.post("/notes/{note_id}/generate-quiz")
 def generate_quiz(
     note_id: int,
@@ -429,30 +471,27 @@ def generate_quiz(
     db: Session = Depends(get_db),
     user = Depends(get_current_user)
 ):
-    """간단한 규칙 기반 퀴즈 생성(대형 모델 없이 동작)."""
     note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first()
     if not note or not (note.content or "").strip():
         raise HTTPException(status_code=404, detail="퀴즈를 생성할 노트가 없습니다")
 
     text = (note.content or "").strip()
-    # 문장 단위 분할
-    import re, random
-    sents = re.split(r"(?<=[.!?。])\s+|\n+", text)
+    import re as _re, random as _random
+    sents = _re.split(r"(?<=[.!?。])\s+|\n+", text)
     sents = [s.strip() for s in sents if len(s.strip()) >= 8]
-    random.seed(note_id)
-    random.shuffle(sents)
+    _random.seed(note_id)
+    _random.shuffle(sents)
 
     quizzes = []
     for s in sents:
         if len(quizzes) >= count:
             break
-        # 공백 기준 토큰화 후, 길이 4 이상인 토큰을 빈칸으로
         toks = s.split()
-        cand = [i for i, t in enumerate(toks) if len(re.sub(r"\W+", "", t)) >= 4]
+        cand = [i for i, t in enumerate(toks) if len(_re.sub(r"\W+", "", t)) >= 4]
         if not cand:
             continue
         idx = cand[0]
-        answer = re.sub(r"^[\W_]+|[\W_]+$", "", toks[idx])
+        answer = _re.sub(r"^[\W_]+|[\W_]+$", "", toks[idx])
         toks[idx] = "_____"
         q = " ".join(toks)
         quizzes.append({
@@ -462,7 +501,6 @@ def generate_quiz(
             "source": s,
         })
 
-    # 보강: 부족하면 참/거짓 생성
     i = 0
     while len(quizzes) < count and i < len(sents):
         stmt = sents[i]
@@ -484,62 +522,3 @@ def generate_quiz(
         })
 
     return {"note_id": note.id, "count": len(quizzes), "items": quizzes}
-
-
-# Convenience synchronous summarization endpoint (returns created note JSON).
-@router.post("/notes/{note_id}/summarize_sync", response_model=NoteResponse)
-async def summarize_sync(
-    note_id: int,
-    domain: str | None = Query(default=None, description="meeting | code | paper | general | auto(None)"),
-    longdoc: bool = Query(default=True, description="Enable long-document map→reduce"),
-    db: Session = Depends(get_db),
-    user = Depends(get_current_user)
-):
-    note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first()
-    if not note or not (note.content or "").strip():
-        raise HTTPException(status_code=404, detail="요약 대상 없음")
-
-    parts = []
-    async for sse in stream_summary_with_langchain(note.content, domain=domain, longdoc=longdoc, length='long', tone='neutral', output_format='md'):
-        parts.append(sse.removeprefix("data: ").strip())
-    full = "".join(parts).strip()
-
-    # sanitize local paths and strip top-level H1
-    try:
-        full = re.sub(r"file://\S+", "", full)
-        full = re.sub(r"/var/[^\s)]+", "", full)
-        full = _strip_top_level_h1_outside_code(full)
-    except Exception:
-        try:
-            full = re.sub(r"^\s*#\s.*?\n+", "", full, count=1)
-        except Exception:
-            pass
-
-    # If model produced empty output, use extractive fallback
-    if not (full or "").strip():
-        try:
-            full = _fallback_extractive_summary(note.content)
-            print(f"[summarize_sync] fallback used length={len(full)}")
-        except Exception:
-            full = (note.content or '')[:800]
-
-    title = (note.title or "").strip() + " — 요약"
-    if len(title) > 255:
-        title = title[:255]
-    new_note = Note(
-        user_id=user.u_id,
-        folder_id=note.folder_id,
-        title=title,
-        content=full,
-    )
-    db.add(new_note)
-    db.commit()
-    db.refresh(new_note)
-    try:
-        print(f"[summarize_sync] created summary note id={new_note.id} for note_id={note_id}")
-        print("[summarize_sync] saved content length=", len(new_note.content or ""))
-        print("[summarize_sync] saved content preview=", repr((new_note.content or "")[:400]))
-    except Exception:
-        pass
-    base_url = os.getenv("BASE_API_URL") or "http://localhost:8000"
-    return serialize_note(db, new_note, base_url)
diff --git a/utils/llm.py b/utils/llm.py
index 5406ebc..f7b2cac 100644
--- a/utils/llm.py
+++ b/utils/llm.py
@@ -1,205 +1,109 @@
-# [CHANGED] 마크다운 보정 + Markdown 섹션 포맷 + 선택적 웹 보강(위키) + 동일 언어 요약을 포함한 전체 코드
+from __future__ import annotations
+import re, asyncio, os, threading, json, time
+from typing import Optional, List
+
+# LangChain / Ollama (옵션)
 from langchain.callbacks import AsyncIteratorCallbackHandler
 from langchain_ollama import ChatOllama
 from langchain.schema import HumanMessage, SystemMessage
-import re, asyncio, os, threading, json, time
+
+# HF Transformers
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 
-# [CHANGED] 웹 보강용 (옵션)
+# 웹 보강(옵션)
 import requests
 from urllib.parse import quote as _urlquote
 
-# =============== 필터: 사고과정 유사 문장 ===============
+
+# =========================================================
+# 설정값 (환경변수로 오버라이드 가능)
+# =========================================================
+DEFAULT_SUMMARY_BACKEND = os.getenv("SUMMARY_BACKEND", "hf").lower()  # "hf" | "ollama"
+DEFAULT_LONGDOC_CHAR_LIMIT = int(os.getenv("SUMMARY_LONGDOC_CHAR_LIMIT", "3500"))
+
+# 롱독 청크 설정
+DEFAULT_CHUNK_CHARS = int(os.getenv("SUMMARY_CHUNK_CHARS", "12000"))
+DEFAULT_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", "1200"))
+
+# 토큰 예산
+HF_MAX_NEW_TOKENS_SHORT = int(os.getenv("HF_MAX_NEW_TOKENS_SHORT", "8000"))
+HF_MAX_NEW_TOKENS_MEDIUM = int(os.getenv("HF_MAX_NEW_TOKENS_MEDIUM", "16000"))
+HF_MAX_NEW_TOKENS_LONG = int(os.getenv("HF_MAX_NEW_TOKENS_LONG", "32000"))
+HF_MAP_MAX_NEW_TOKENS = int(os.getenv("HF_MAP_MAX_NEW_TOKENS", "12000"))
+
+# 슬라이드 커버리지 목표
+SLIDES_MIN = int(os.getenv("SUMMARY_SLIDES_MIN", "8"))
+SLIDES_MAX = int(os.getenv("SUMMARY_SLIDES_MAX", "40"))
+
+# 증분 보강 루프 횟수
+ENSURE_COMPLETION_PASSES = int(os.getenv("ENSURE_COMPLETION_PASSES", "3"))
+
+
+# =========================================================
+# 사고과정 유사 문장 필터 (stream시 메타 프레이즈 억제)
+# =========================================================
 _THOUGHT_PAT = re.compile(
-    # only filter a few clear English meta-intro phrases to avoid removing valid Korean sentences
     r"^\s*(okay|let\s+me|i\s+need\s+to|in summary)\b",
     re.I,
 )
 
-# =============== 마크다운 보정 유틸 ===============
-# [CHANGED] 스트리밍 중 붙어버린 헤더/불릿을 자동 교정
+# =========================================================
+# 마크다운 보정 유틸
+# =========================================================
 _MD_CODE_FENCE = re.compile(r"(```.*?```|`[^`]*`)", re.S)
 
 def _format_md_stream(s: str) -> str:
-    """
-    스트리밍 중 합쳐진 마크다운을 사람 읽기 좋게 보정.
-    - 헤더(#~######) 앞뒤 빈 줄 보장
-    - 헤더 해시 뒤 공백 보장
-    - 불릿(- )/번호 목록은 줄 시작으로 강제
-    - 코드펜스/인라인코드는 건드리지 않음
-    """
     if not s:
         return s
-
     parts = []
     last = 0
     for m in _MD_CODE_FENCE.finditer(s):
         chunk = s[last:m.start()]
         parts.append(_format_md_plain(chunk))
-        parts.append(m.group(0))  # 코드펜스 원형 유지
+        parts.append(m.group(0))
         last = m.end()
     parts.append(_format_md_plain(s[last:]))
-
     out = "".join(parts)
-    out = re.sub(r"\n{3,}", "\n\n", out)  # 과도한 빈 줄 축소
+    out = re.sub(r"\n{3,}", "\n\n", out)
     return out
 
 def _format_md_plain(s: str) -> str:
-    # 헤더 해시 뒤 공백 보장: "##개요" -> "## 개요"
     s = re.sub(r"^(#{1,6})([^\s#])", r"\1 \2", s, flags=re.M)
-
-    # 문장 중간 헤더 분리: "...개요## 핵심" -> "...개요\n\n## 핵심"
     s = re.sub(r"(?<!\n)\s*(#{1,6}\s)", r"\n\n\1", s)
-
-    # 헤더 앞뒤 빈 줄 1줄 보장
     s = re.sub(r"(?m)(^#{1,6}\s.*$)", r"\n\1\n", s)
-
-    # 불릿(- )/번호목록이 문장 뒤에 붙으면 줄바꿈
     s = re.sub(r"(?<!\n)\s*(-\s+)", r"\n\1", s)
     s = re.sub(r"(?<!\n)\s*(\d+\.\s+)", r"\n\1", s)
-
-    # 리스트 항목 사이 최소 1줄 유지(너무 붙는 경우)
     s = re.sub(r"(\n[-\d].*\S)(?=[^\n]|$)", r"\1", s)
     return s
 
 
-# =============== 메인 엔트리 ===============
-async def stream_summary_with_langchain(
-    text: str,
-    domain: str | None = None,
-    longdoc: bool = True,
-    # [CHANGED] 출력/보강 옵션
-    output_format: str = "md",          # "md" | "html"
-    augment_web: bool = False,          # True면 위키 요약 보강
-    length: str = "medium",            # short | medium | long
-    tone: str = "neutral",             # neutral | formal | casual | concise
-):
-    """
-    한국어/영어 자동 인지 후 '동일 언어'로 요약을 스트리밍합니다.
-    - domain: None=자동탐지 | meeting | code | paper | general
-    - longdoc: True이면 길이 임계 초과 시 청크(Map)→합본(Reduce) 요약 적용
-    - output_format: "md"(권장) 또는 "html"
-    - augment_web: True면 Wikipedia REST로 '추가자료' 보강
-    """
-    dom = (domain or _detect_domain(text)).lower()
-    if dom not in {"meeting", "code", "paper", "general"}:
-        dom = "general"
-
-    # [CHANGED] 선택적 웹 보강
-    extra_context = ""
-    if augment_web and _is_augmentation_allowed():
-        try:
-            lang = _detect_lang(text)  # "ko" | "en"
-            entities = _extract_entities_for_web(text, lang=lang, max_items=int(os.getenv("AUGMENT_MAX_ENTITIES", "5")))
-            extra_context = _fetch_wikipedia_summaries(entities, lang=lang, max_sources=int(os.getenv("AUGMENT_MAX_SOURCES", "5")))
-        except Exception:
-            extra_context = ""  # 실패 시 조용히 무시
-
-    # 길이 기준: 대략 3500자 초과 시 청크 요약
-    enable_long = longdoc and len(text or "") > int(os.getenv("SUMMARY_LONGDOC_CHAR_LIMIT", "3500"))
-
-    backend = os.getenv("SUMMARY_BACKEND", "hf").lower()
-
-    # choose token budget by requested length
-    # Increase defaults to allow richer, more complete summaries. Can be overridden via env vars.
-    # Aggressively increase defaults to allow very long, comprehensive summaries.
-    # These can still be tuned via environment variables if needed.
-    # Aggressively increase defaults to allow very long, comprehensive summaries.
-    # These can still be tuned via environment variables if needed.
-    if length == 'short':
-        token_budget = int(os.getenv("HF_MAX_NEW_TOKENS_SHORT", "8000"))
-    elif length == 'medium':
-        token_budget = int(os.getenv("HF_MAX_NEW_TOKENS_MEDIUM", "16000"))
-    else:
-        token_budget = int(os.getenv("HF_MAX_NEW_TOKENS_LONG", "32000"))
-
-    if not enable_long:
-        sys_txt = _system_prompt(dom, phase="final", output_format=output_format, length=length)
-        user_payload = _compose_user_payload(text, extra_context, output_format, length=length, tone=tone)  # [CHANGED]
-        # Temporarily set HF token budget env so downstream generator respects it
-        old_budget = os.environ.get('HF_MAX_NEW_TOKENS')
-        os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget)
-        try:
-            if backend == "ollama":
-                async for s in _stream_with_ollama(user_payload, system_text=sys_txt, output_format=output_format):
-                    yield s
-            else:
-                async for s in _stream_with_hf(user_payload, system_text=sys_txt, output_format=output_format):
-                    yield s
-        finally:
-            if old_budget is None:
-                os.environ.pop('HF_MAX_NEW_TOKENS', None)
-            else:
-                os.environ['HF_MAX_NEW_TOKENS'] = old_budget
-        return
-
-    # Long-doc: Map (chunk summaries) → Reduce (final synthesis streamed)
-    chunks = _chunk_text(
-        text,
-        chunk_chars=int(os.getenv("SUMMARY_CHUNK_CHARS", "20000")),
-        overlap=int(os.getenv("SUMMARY_CHUNK_OVERLAP", "2000")),
-    )
-    map_sys = _system_prompt(dom, phase="map", output_format=output_format, length=length)
-    partials: list[str] = []
-    for idx, ch in enumerate(chunks, 1):
-        try:
-            map_input = _compose_user_payload(ch, "", output_format, length=length, tone=tone)  # [CHANGED]
-            part = await _hf_generate_once(map_sys, map_input, max_new_tokens=int(os.getenv("HF_MAP_MAX_NEW_TOKENS", "12000")))
-        except Exception:
-            part = ch[:500]
-        partials.append(f"[Chunk {idx}]\n{part.strip()}")
-
-    reduce_text = "\n\n".join(partials)
-    reduce_sys = _system_prompt(dom, phase="reduce", output_format=output_format, length=length)
-    reduce_input = _compose_user_payload(reduce_text, extra_context, output_format, length=length, tone=tone)  # [CHANGED]
-
-    # For reduce/final stage, also apply token budget
-    old_budget = os.environ.get('HF_MAX_NEW_TOKENS')
-    os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget)
-    try:
-        if backend == "ollama":
-            async for s in _stream_with_ollama(reduce_input, system_text=reduce_sys, output_format=output_format):
-                yield s
-        else:
-            async for s in _stream_with_hf(reduce_input, system_text=reduce_sys, output_format=output_format):
-                yield s
-    finally:
-        if old_budget is None:
-            os.environ.pop('HF_MAX_NEW_TOKENS', None)
-        else:
-            os.environ['HF_MAX_NEW_TOKENS'] = old_budget
-
-
-# =============== 도메인/언어 감지 ===============
+# =========================================================
+# 도메인/언어 감지
+# =========================================================
 def _detect_domain(t: str) -> str:
     s = (t or "").lower()
-    # lecture / slides signals
     if re.search(r"\blecture\b|강의|슬라이드|ppt|slide|강의자료|강의록", s):
         return "lecture"
-    # code-like signals
     if re.search(r"\b(def |class |import |#include|public\s+class|function\s|=>|:=)", s) or re.search(r"```|\bdiff --git\b|\bcommit\b", s):
         return "code"
-    # paper-like signals
     if re.search(r"\babstract\b|\bintroduction\b|\bmethod(s)?\b|\bresult(s)?\b|\bconclusion(s)?\b|doi:|arxiv:\d", s):
         return "paper"
-    # meeting-like signals (KO/EN keywords)
     if re.search(r"회의|안건|결정|논의|액션 아이템|참석자|회의록|meeting|agenda|minutes|action items|attendees", s):
         return "meeting"
     return "general"
 
 def _detect_lang(t: str) -> str:
-    """아주 단순한 언어 감지(영문자/한글자 수 비교). ko/en만 구분."""
     s = t or ""
     en = len(re.findall(r"[A-Za-z]", s))
     ko = len(re.findall(r"[가-힣]", s))
     return "en" if en > ko else "ko"
 
 
-# =============== 시스템 프롬프트 ===============
-# [CHANGED] 출력 포맷(MD/HTML) 지원 + 마크다운 간격 규칙 + 도메인별 포함 요소 힌트
+# =========================================================
+# 시스템 프롬프트 (슬라이드 섹션 강제)
+# =========================================================
 def _system_prompt(domain: str, phase: str = "final", output_format: str = "md", length: str = "medium") -> str:
-    # phase: map | reduce | final
     fmt = output_format.lower()
     base_rules = (
         "역할: 너는 사실 보존에 강한 전문 요약가다. 입력 텍스트의 언어(Korean/English)를 감지하고, 반드시 동일한 언어로 작성한다. "
@@ -220,14 +124,14 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md",
     if fmt == "md":
         format_rule = (
             "출력 형식: Markdown. 반드시 다음 섹션으로 구성하라(필요시 일부 생략 가능): "
-            "## TL;DR, ## 핵심 요점(불릿 3–8개), ## 상세 설명(문단), ## 용어 정리(선택), ## 한계/주의, ## 할 일(액션), ## 참고(선택). "
+            "## TL;DR, ## 핵심 요점(불릿 3–8개), ## 상세 설명(문단), ## 슬라이드(필수), ## 용어 정리(선택), ## 한계/주의, ## 할 일(액션), ## 참고(선택). "
             "절대 H1('# ')로 시작하지 말고, 불필요한 전언/사고과정/추론 과정을 출력하지 마라."
         )
     else:
         format_rule = (
             "출력 형식: HTML fragment. <h1>, <h2>, <h3>, <p>, <ul>, <li>, <strong>, <em>만 사용. "
             "<h1>제목</h1>, <h2>개요</h2>, <h2>핵심 요점</h2><ul>…</ul>, <h2>상세 설명</h2>, "
-            "<h2>용어 정리</h2>, <h2>한계/주의</h2>, <h2>할 일</h2>, <h2>참고/추가자료</h2>의 순서."
+            "<h2>슬라이드</h2>, <h2>용어 정리</h2>, <h2>한계/주의</h2>, <h2>할 일</h2>, <h2>참고/추가자료</h2>의 순서."
         )
 
     if length == 'long':
@@ -236,7 +140,7 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md",
         length_rule = "분량: 한두 문장 TL;DR 중심(간결)."
     else:
         length_rule = "분량: 원문 대비 약 15–30%. 각 문단은 2–5문장."
-    # [CHANGED] 마크다운 간격 규칙 추가
+
     md_spacing_rule = (
         "마크다운 간격 규칙: 모든 헤더(#, ##, ### 등) 뒤에는 한 칸 공백을 두고, 헤더의 앞뒤에는 빈 줄 1줄을 둔다. "
         "불릿(- )은 항목마다 줄바꿈하고, 서브항목은 들여쓰기 2–4칸을 사용한다."
@@ -247,14 +151,24 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md",
     )
 
     if phase == "map":
-        scope = "이 청크만 대상으로 섹션 골격을 간략히 채워라. 과도한 요약 금지."
+        scope = (
+            "이 청크만 대상으로 섹션 골격을 간략히 채워라. 특히 **## 슬라이드** 섹션에 이 청크의 주요 하위 주제를 1–3장의 "
+            "‘### 슬라이드 n: 제목’ + 불릿(3–6개)로 만들어라. 슬라이드 번호는 임시로 두고, 리듀스 단계에서 재번호됨."
+        )
     elif phase == "reduce":
-        scope = "아래 청크 요약들을 중복 없이 통합해 일관된 섹션 구성을 완성하라. 흐름(원인→과정→결과)을 유지. 최종 요약은 누락이 없도록 모든 청크의 핵심을 포함하라."
+        scope = (
+            "아래 청크 요약들을 중복 없이 통합해 일관된 섹션 구성을 완성하라. 흐름(원인→과정→결과)을 유지. "
+            "특히 **## 슬라이드** 섹션에 모든 청크의 슬라이드를 병합·정리하여 누락 없이 포함하라. "
+            "슬라이드 번호는 1부터 순차 재배열하고, 최소 목표 슬라이드 수(<!-- target_slides:X --> 주어짐)를 충족하라."
+        )
     else:
-        # Do not force a top-level H1; many clients render H1 differently.
-        scope = "전체 텍스트를 위 섹션 구조에 맞춰 응집력 있게 작성하라. 출력은 반드시 Markdown만 사용하라(원시 HTML 금지). 최상단 제목(H1)은 생략하거나 필요시만 사용하고, 주요 요약은 '## TL;DR' 또는 '## 핵심 요점'로 시작하라."
+        scope = (
+            "전체 텍스트를 위 섹션 구조에 맞춰 응집력 있게 작성하라. 출력은 반드시 Markdown만 사용하라(원시 HTML 금지). "
+            "최상단 제목(H1)은 생략하고, '## TL;DR'로 시작하라. "
+            "특히 **## 슬라이드** 섹션을 포함하고, 슬라이드를 ‘### 슬라이드 1: …’ 형식으로 최소 목표 수(<!-- target_slides:X -->) 이상 생성하라. "
+            "각 슬라이드는 3–6개 불릿을 갖고, 제목은 중복되지 않게 만든다."
+        )
 
-    # 명시적 예시 추가: (Korean short example)
     example = (
         "\n\n--- 예시 출력 (한국어, medium) ---\n"
         "## TL;DR\n"
@@ -263,42 +177,77 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md",
         "- 기능 X 구현 지연: 2주\n"
         "- 배포 일정: 11/10\n"
         "- QA 담당: 민수\n\n"
+        "## 슬라이드\n"
+        "### 슬라이드 1: 일정 변경 배경\n"
+        "- 외부 API 응답 지연이 주요 원인\n"
+        "- 기능 X 의존성이 높음\n"
+        "- …\n"
+        "### 슬라이드 2: 리스크와 대응\n"
+        "- 타임아웃 상향 및 캐시\n"
+        "- …\n\n"
         "## 할 일\n"
-        "- [개발팀] API 응답 문제 원인 분석 — 11/1\n"
+        "- [개발팀] API 응답 문제 원인 분석 — 11/01\n"
         "-------------------------------\n\n"
     )
 
-    # 추가 지침: 출력은 반드시 위 섹션 구조를 따르고(필요시 일부 섹션은 생략 가능), 끝에 JSON 메타데이터 블록을 추가하라.
-    # 이 블록은 분석용이며, ```json로 fenced 되어야 한다.
-    # MUST (강제) 요건:
-    # - 출력은 절대 H1('# ')로 시작하지 말고, 반드시 '## TL;DR'로 시작하라.
-    # - 포함 필수 항목: Setting(배경), Inciting Incident(발단), Protagonist(주인공), Goal(목적),
-    #   Stakes(위험/중요성), Key Events(핵심 사건), Next Steps/Actions(권장 조치).
-    # - 사실 기반(Fact preservation): 숫자, 날짜, 고유명사는 원문 그대로 보존. 입력에 없는 정보를 생성하지 마라(허위 생성 금지).
-    # - 길이 보장: 요청된 length가 'long'일 경우, 충분한 상세(증거·인용·핵심 문장 포함)를 제공하라.
-    #
-    # CHECKLIST: The following checklist MUST be present and satisfied in the summary (model must ensure each item is covered):
-    #  1) Setting/Background — where and in what context the document/event occurs
-    #  2) Inciting Incident — what triggered the situation or main event
-    #  3) Protagonist/Actors — who are the main people/agents involved
-    #  4) Goal/Purpose — what is being attempted or investigated
-    #  5) Stakes/Importance — why this matters, consequences if unresolved
-    #  6) Key Events/Findings — sequence of core events or main findings (with key numbers/dates)
-    #  7) Next Steps/Actions — recommended actions, owners and deadlines if present
-    #
-    # END-OF-SUMMARY REQUIREMENT: At the end of the Markdown output, include a fenced JSON block with keys:
-    #   {"tl_dr":"...","tags":[...],"actions":[...],"language":"ko|en","missing":[...]}.
-    # The "missing" array must list any checklist items that could not be filled from the input (use empty array [] when all present).
-    # 메타데이터 키: tl_dr (string), tags (array of strings), actions (array of { assignee?, task, due? }), language (ko/en)
     meta_hint = (
         "\n\n출력 후 반드시 JSON 메타데이터 블록을 추가하라. "
-        "형식: ```json\n{ \"tl_dr\": \"...\", \"tags\": [\"t1\",\"t2\"], \"actions\": [{\"assignee\": \"name\", \"task\": \"...\", \"due\": \"YYYY-MM-DD\"}], \"language\": \"ko\" }\n```\n"
+        "형식: ```json\n{ \"tl_dr\": \"...\", \"tags\": [\"t1\",\"t2\"], \"actions\": [{\"assignee\": \"name\", \"task\": \"...\", \"due\": \"YYYY-MM-DD\"}], \"language\": \"ko\", \"missing\": [] }\n```\n"
     )
 
     return f"{base_rules}\n포함 우선: {include_hint}\n{format_rule}\n{length_rule}\n{md_spacing_rule}\n{web_rule}\n{scope}{example}{meta_hint}"
 
 
-# =============== HF backend (Transformers) ===============
+# =========================================================
+# 웹 보강 유틸 (옵션)
+# =========================================================
+def _is_augmentation_allowed() -> bool:
+    return os.getenv("AUGMENT_WEB", "false").lower() in ("1", "true", "yes")
+
+def _extract_entities_for_web(text: str, lang: str = "ko", max_items: int = 5) -> list[str]:
+    items: list[str] = []
+    if lang == "en":
+        items = re.findall(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})\b", text)
+    else:
+        items = re.findall(r"[「『“\"'(](.*?)[」』”\"')]", text) + re.findall(r"[가-힣A-Za-z][가-힣A-Za-z]{1,}", text)
+    stop = set(["회의", "안건", "결정", "논의", "데이터", "모델", "결과", "방법", "프로젝트", "사용자", "시스템"])
+    uniq = []
+    for w in items:
+        w = re.sub(r"[\s]+", " ", w).strip()
+        if 2 <= len(w) <= 80 and w not in stop and not re.match(r"^\d+$", w):
+            if w not in uniq:
+                uniq.append(w)
+    return uniq[:max_items]
+
+def _fetch_wikipedia_summaries(entities: list[str], lang: str = "ko", max_sources: int = 5) -> str:
+    base = "https://%s.wikipedia.org/api/rest_v1/page/summary/%s" % (("ko" if lang == "ko" else "en"), "%s")
+    out = []
+    timeout = float(os.getenv("AUGMENT_HTTP_TIMEOUT", "2.5"))
+    session = requests.Session()
+    headers = {"User-Agent": os.getenv("AUGMENT_UA", "SummaryAgent/1.0")}
+    for ent in entities[:max_sources]:
+        url = base % _urlquote(ent)
+        try:
+            r = session.get(url, headers=headers, timeout=timeout)
+            if r.status_code != 200:
+                continue
+            data = r.json()
+            title = data.get("title") or ent
+            extract = (data.get("extract") or "").strip()
+            if not extract:
+                continue
+            extract = (extract[:500] + "…") if len(extract) > 500 else extract
+            src = "위키백과" if lang == "ko" else "Wikipedia"
+            out.append(f"- **{title}** ({src}): {extract}")
+            time.sleep(0.05)
+        except Exception:
+            continue
+    return "\n".join(out)
+
+
+# =========================================================
+# 프롬프트 빌드 / HF 모델 로딩
+# =========================================================
 _HF_MODEL = None
 _HF_TOKENIZER = None
 _HF_NAME = None
@@ -342,6 +291,7 @@ def try_load(name: str):
         else:
             if torch_dtype is not None:
                 kwargs["dtype"] = torch_dtype
+
         def load_model(with_token: bool):
             mk = dict(kwargs)
             if not with_token and "token" in mk:
@@ -369,7 +319,6 @@ def load_model(with_token: bool):
         _HF_MODEL, _HF_TOKENIZER, _HF_NAME = model, tok, primary
         return _HF_MODEL, _HF_TOKENIZER
     except Exception as e:
-        # 디스크 부족/네트워크 이슈 등으로 대형 모델 로딩 실패 시, 환경변수로 폴백 비활성화 가능
         if os.getenv("HF_DISABLE_FALLBACK", "1").lower() in ("1", "true", "yes"):
             raise RuntimeError("HF_DISABLED") from e
         try:
@@ -388,21 +337,52 @@ def _build_prompt(tokenizer, system_text: str, user_text: str) -> str:
     try:
         return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     except Exception:
-        # [CHANGED] 백업 프롬프트도 MD/HTML 구조 반영
         return (
             "You are a precise summarizer. Detect the input language (Korean/English) and write the summary in the SAME language. "
-            "Preserve key facts (claims, entities, numbers, dates); remove fluff; avoid speculation and chain-of-thought. "
-            "Use Markdown sections: # Title, ## Overview, ## Key Points, ## Details, ## Terms, ## Limitations, ## Action Items, ## References.\n\n"
+            "Preserve key facts; remove fluff; avoid speculation and chain-of-thought. "
+            "Use Markdown sections: ## TL;DR, ## Key Points, ## Details, ## Slides, ## Terms, ## Limitations, ## Actions, ## References.\n\n"
             "Text:\n" + user_text + "\n\nSummary:"
         )
 
 
+# =========================================================
+# 합본 payload (슬라이드 목표 힌트 포함)
+# =========================================================
+def _compose_user_payload(
+    main_text: str,
+    extra_context: str,
+    output_format: str,
+    length: str = "medium",
+    tone: str = "neutral",
+    target_slides: Optional[int] = None
+) -> str:
+    fmt = output_format.lower()
+    pref = f"요약 길이: {length}. 톤: {tone}."
+    slide_hint = f"<!-- target_slides:{target_slides} -->" if target_slides else ""
+    if fmt == "md":
+        if extra_context:
+            return (
+                "## 원문\n"
+                f"{main_text}\n\n"
+                "## 추가자료(요약)\n"
+                f"{extra_context}\n\n"
+                f"<!-- 사용자 선호: {pref} -->\n"
+                f"{slide_hint}\n"
+            )
+        return f"{main_text}\n\n<!-- 사용자 선호: {pref} -->\n{slide_hint}\n"
+    else:
+        if extra_context:
+            return f"<h2>원문</h2>\n{main_text}\n\n<h2>추가자료(요약)</h2>\n{extra_context}\n<!-- 사용자 선호: {pref} -->\n{slide_hint}\n"
+        return f"{main_text}\n<!-- 사용자 선호: {pref} -->\n{slide_hint}\n"
+
+
+# =========================================================
+# HF Streaming / Single-Shot
+# =========================================================
 def _simple_fallback_summary(text: str, output_format: str = "md") -> list[str]:
-    """모델 로딩 실패 시 사용할 초경량 요약: 앞부분 일부와 불릿을 구성."""
     s = (text or "").strip()
     if not s:
         return ["요약할 내용이 없습니다."]
-    # 문장 단위로 잘라 앞부분 3~6문장을 사용
     parts = re.split(r"(?<=[.!?。])\s+|\n+", s)
     parts = [p.strip() for p in parts if p.strip()]
     head = parts[:6]
@@ -422,11 +402,9 @@ async def _stream_with_hf(text: str, system_text: str | None = None, output_form
         return
 
     sys_msg = system_text or (
-        # [CHANGED] 기본 시스템 프롬프트: Markdown 섹션 + 동일 언어
         "역할: 너는 사실 보존에 강한 전문 요약가다. 입력 언어를 감지하고 동일 언어로 작성한다. "
-        "Markdown 섹션(# 제목, ## 개요, ## 핵심 요점, ## 상세 설명, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. "
-        "핵심 주장/결과, 인물·기관·수치·날짜, 원인↔결과·조건·한계를 보존하고 군더더기는 제거한다. "
-        "추정·가치판단·조언 금지. 사고과정/단계 나열/메타 코멘트 금지. 각 문단 2–5문장."
+        "Markdown 섹션(## TL;DR, ## 핵심 요점, ## 상세 설명, ## 슬라이드, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. "
+        "추정/가치판단/사고과정 금지. 각 문단 2–5문장. 마크다운 간격 규칙을 지켜라."
     )
     prompt = _build_prompt(tokenizer, sys_msg, text)
 
@@ -439,7 +417,7 @@ async def _stream_with_hf(text: str, system_text: str | None = None, output_form
             pass
 
     gen_kwargs = dict(
-        max_new_tokens=int(os.getenv("HF_MAX_NEW_TOKENS", "32000")),  # very generous default to allow extremely long summaries
+        max_new_tokens=int(os.getenv("HF_MAX_NEW_TOKENS", str(HF_MAX_NEW_TOKENS_LONG))),
         do_sample=False,
         repetition_penalty=float(os.getenv("HF_REPETITION_PENALTY", "1.02")),
         eos_token_id=tokenizer.eos_token_id,
@@ -450,7 +428,7 @@ async def _stream_with_hf(text: str, system_text: str | None = None, output_form
         gen_kwargs["temperature"] = float(os.getenv("HF_TEMPERATURE", "0.1"))
 
     def _gen():
-        model.generate(**inputs, **gen_kwargs)
+        _ = model.generate(**inputs, **gen_kwargs)
 
     thread = threading.Thread(target=_gen, daemon=True)
     thread.start()
@@ -477,17 +455,14 @@ def _drain_streamer():
         if chunk is None:
             break
         buffer += chunk
-        # [CHANGED] 플러시 트리거 확장 + 보정기 적용
         if buffer.endswith(("\n", "。", ".", "…", "!", "?", ")", "]")):
-            line = buffer  # strip 하지 않음: 줄바꿈 유지
+            line = buffer
             buffer = ""
-            # [CHANGED] 마크다운 보정
             if output_format.lower() == "md":
                 line = _format_md_stream(line)
             if not _THOUGHT_PAT.match(line.strip()):
                 yield f"data: {line}\n\n"
 
-    # [CHANGED] 잔여 버퍼 마무리 보정
     if buffer.strip():
         line = buffer
         if output_format.lower() == "md":
@@ -497,7 +472,6 @@ def _drain_streamer():
 
 
 async def _hf_generate_once(system_text: str, user_text: str, max_new_tokens: int = 256) -> str:
-    """Non-streaming single-shot generation (used for chunk map stage)."""
     model, tokenizer = _load_hf_model()
     prompt = _build_prompt(tokenizer, system_text, user_text)
 
@@ -521,8 +495,10 @@ async def _hf_generate_once(system_text: str, user_text: str, max_new_tokens: in
     return tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
 
 
-# =============== 텍스트 청크 나누기 ===============
-def _chunk_text(text: str, chunk_chars: int = 2000, overlap: int = 200) -> list[str]:
+# =========================================================
+# 텍스트 청크
+# =========================================================
+def _chunk_text(text: str, chunk_chars: int = DEFAULT_CHUNK_CHARS, overlap: int = DEFAULT_CHUNK_OVERLAP) -> list[str]:
     text = text or ""
     if len(text) <= chunk_chars:
         return [text]
@@ -537,12 +513,12 @@ def _chunk_text(text: str, chunk_chars: int = 2000, overlap: int = 200) -> list[
     return chunks
 
 
-# =============== Ollama backend ===============
+# =========================================================
+# Ollama Streaming (옵션)
+# =========================================================
 async def _stream_with_ollama(text: str, system_text: str | None = None, output_format: str = "md"):
-    # 1) LangChain용 콜백 핸들러
     cb = AsyncIteratorCallbackHandler()
 
-    # 2) Ollama Chat 모델 설정 (환경변수로 조정 가능)
     base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
     primary_model = os.getenv("OLLAMA_MODEL", "qwen2.5:14b")
     fallback_model = os.getenv("OLLAMA_FALLBACK_MODEL", "qwen2.5:7b")
@@ -573,13 +549,12 @@ def make_llm(model_name: str) -> ChatOllama:
 
     llm = make_llm(primary_model)
 
-    # [CHANGED] Ollama 경로의 시스템 프롬프트도 MD 섹션 구조 지시
     messages = [
         SystemMessage(
             content=(
                 system_text or (
                     "역할: 너는 사실 보존에 강한 전문 요약가다. 입력 언어를 감지하고 동일 언어로 작성한다. "
-                    "Markdown 섹션(# 제목, ## 개요, ## 핵심 요점, ## 상세 설명, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. "
+                    "Markdown 섹션(## TL;DR, ## 핵심 요점, ## 상세 설명, ## 슬라이드, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. "
                     "추정/가치판단/사고과정 금지. 각 문단 2–5문장. 마크다운 간격 규칙을 지켜라."
                 )
             )
@@ -597,7 +572,6 @@ def make_llm(model_name: str) -> ChatOllama:
     buffer = ""
     async for token in cb.aiter():
         buffer += token
-        # [CHANGED] 플러시 트리거 확장 + 마크다운 보정 적용
         if buffer.endswith(("\n", "。", ".", "…", "!", "?", ")", "]")):
             line = buffer
             buffer = ""
@@ -606,7 +580,6 @@ def make_llm(model_name: str) -> ChatOllama:
             if not _THOUGHT_PAT.match(line.strip()):
                 yield f"data: {line}\n\n"
 
-    # [CHANGED] 잔여 버퍼 마무리 보정
     if buffer.strip():
         line = buffer
         if output_format.lower() == "md":
@@ -631,7 +604,6 @@ def make_llm(model_name: str) -> ChatOllama:
                         line = _format_md_stream(line)
                     if not _THOUGHT_PAT.match(line.strip()):
                         yield f"data: {line}\n\n"
-            # [CHANGED] 잔여 버퍼
             if buffer2.strip():
                 line = buffer2
                 if output_format.lower() == "md":
@@ -643,92 +615,156 @@ def make_llm(model_name: str) -> ChatOllama:
             raise
 
 
-# =============== 보조 유틸리티 ===============
-# [CHANGED] 원문 + (선택) 추가자료를 모델에 전달하기 위한 합본
-def _compose_user_payload(main_text: str, extra_context: str, output_format: str, length: str = "medium", tone: str = "neutral") -> str:
-    fmt = output_format.lower()
-    # Include user preferences (length, tone) to guide the summarizer
-    pref = f"요약 길이: {length}. 톤: {tone}."
-    if fmt == "md":
-        if extra_context:
-            return (
-                "## 원문\n"
-                f"{main_text}\n\n"
-                "## 추가자료(요약)\n"
-                f"{extra_context}\n\n"
-                f"<!-- 사용자 선호: {pref} -->\n"
-            )
-        return f"{main_text}\n\n<!-- 사용자 선호: {pref} -->"
-    else:
-        if extra_context:
-            return f"<h2>원문</h2>\n{main_text}\n\n<h2>추가자료(요약)</h2>\n{extra_context}\n<!-- 사용자 선호: {pref} -->\n"
-        return f"{main_text}\n<!-- 사용자 선호: {pref} -->"
+# =========================================================
+# 메인 엔트리 (롱독 Map→Reduce + 슬라이드 목표 힌트)
+# =========================================================
+async def stream_summary_with_langchain(
+    text: str,
+    domain: str | None = None,
+    longdoc: bool = True,
+    output_format: str = "md",
+    augment_web: bool = False,
+    length: str = "medium",
+    tone: str = "neutral",
+):
+    dom = (domain or _detect_domain(text)).lower()
+    if dom not in {"meeting", "code", "paper", "general", "lecture"}:
+        dom = "general"
 
-def _is_augmentation_allowed() -> bool:
-    """환경변수로 보강 ON/OFF 제어. 기본 False."""
-    return os.getenv("AUGMENT_WEB", "false").lower() in ("1", "true", "yes")
+    extra_context = ""
+    if augment_web and _is_augmentation_allowed():
+        try:
+            lang = _detect_lang(text)
+            entities = _extract_entities_for_web(text, lang=lang, max_items=int(os.getenv("AUGMENT_MAX_ENTITIES", "5")))
+            extra_context = _fetch_wikipedia_summaries(entities, lang=lang, max_sources=int(os.getenv("AUGMENT_MAX_SOURCES", "5")))
+        except Exception:
+            extra_context = ""
 
-def _extract_entities_for_web(text: str, lang: str = "ko", max_items: int = 5) -> list[str]:
-    """
-    매우 가벼운 엔티티 후보 추출:
-    - 영문: 대문자로 시작하는 2~4단어 구
-    - 한글: 괄호/따옴표 내 주요어 + 2자 이상 단어
-    """
-    items: list[str] = []
-    if lang == "en":
-        items = re.findall(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})\b", text)
+    enable_long = longdoc and len(text or "") > DEFAULT_LONGDOC_CHAR_LIMIT
+
+    if length == 'short':
+        token_budget = HF_MAX_NEW_TOKENS_SHORT
+    elif length == 'medium':
+        token_budget = HF_MAX_NEW_TOKENS_MEDIUM
     else:
-        items = re.findall(r"[「『“\"'(](.*?)[」』”\"')]", text) + re.findall(r"[가-힣A-Za-z][가-힣A-Za-z]{1,}", text)
-    stop = set(["회의", "안건", "결정", "논의", "데이터", "모델", "결과", "방법", "프로젝트", "사용자", "시스템"])
-    uniq = []
-    for w in items:
-        w = re.sub(r"[\s]+", " ", w).strip()
-        if 2 <= len(w) <= 80 and w not in stop and not re.match(r"^\d+$", w):
-            if w not in uniq:
-                uniq.append(w)
-    return uniq[:max_items]
+        token_budget = HF_MAX_NEW_TOKENS_LONG
 
-def _fetch_wikipedia_summaries(entities: list[str], lang: str = "ko", max_sources: int = 5) -> str:
-    """
-    간단한 Wikipedia summary 수집.
-    - 공개 REST 엔드포인트 사용(무인증)
-    - 실패/타임아웃은 건너뜀
-    - 결과는 Markdown 불릿로 반환
-    """
-    base = "https://%s.wikipedia.org/api/rest_v1/page/summary/%s" % (("ko" if lang == "ko" else "en"), "%s")
-    out = []
-    timeout = float(os.getenv("AUGMENT_HTTP_TIMEOUT", "2.5"))
-    session = requests.Session()
-    headers = {"User-Agent": os.getenv("AUGMENT_UA", "SummaryAgent/1.0")}
-    for ent in entities[:max_sources]:
-        url = base % _urlquote(ent)
+    backend = DEFAULT_SUMMARY_BACKEND
+
+    if not enable_long:
+        target_slides = max(4, SLIDES_MIN // 2)
+        sys_txt = _system_prompt(dom, phase="final", output_format=output_format, length=length)
+        user_payload = _compose_user_payload(text, extra_context, output_format, length=length, tone=tone, target_slides=target_slides)
+        old_budget = os.environ.get('HF_MAX_NEW_TOKENS')
+        os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget)
         try:
-            r = session.get(url, headers=headers, timeout=timeout)
-            if r.status_code != 200:
-                continue
-            data = r.json()
-            title = data.get("title") or ent
-            extract = (data.get("extract") or "").strip()
-            if not extract:
-                continue
-            extract = (extract[:500] + "…") if len(extract) > 500 else extract
-            # ko/en 모두 동일한 표기
-            src = "위키백과" if lang == "ko" else "Wikipedia"
-            out.append(f"- **{title}** ({src}): {extract}")
-            time.sleep(0.05)
+            if backend == "ollama":
+                async for s in _stream_with_ollama(user_payload, system_text=sys_txt, output_format=output_format):
+                    yield s
+            else:
+                async for s in _stream_with_hf(user_payload, system_text=sys_txt, output_format=output_format):
+                    yield s
+        finally:
+            if old_budget is None:
+                os.environ.pop('HF_MAX_NEW_TOKENS', None)
+            else:
+                os.environ['HF_MAX_NEW_TOKENS'] = old_budget
+        return
+
+    # Long-doc: Map→Reduce
+    chunks = _chunk_text(
+        text,
+        chunk_chars=DEFAULT_CHUNK_CHARS,
+        overlap=DEFAULT_CHUNK_OVERLAP,
+    )
+    num_chunks = len(chunks)
+    target_slides = max(SLIDES_MIN, min(SLIDES_MAX, num_chunks))
+
+    # Map
+    map_sys = _system_prompt(dom, phase="map", output_format=output_format, length=length)
+    partials: list[str] = []
+    for idx, ch in enumerate(chunks, 1):
+        try:
+            map_input = _compose_user_payload(
+                f"[Chunk {idx}/{num_chunks}]\n{ch}",
+                "",
+                output_format,
+                length=length,
+                tone=tone,
+                target_slides=min(3, max(1, SLIDES_MIN // max(2, num_chunks)))
+            )
+            part = await _hf_generate_once(map_sys, map_input, max_new_tokens=HF_MAP_MAX_NEW_TOKENS)
         except Exception:
-            continue
-    return "\n".join(out)
+            part = ch[:800]
+        partials.append(f"[Chunk {idx}]\n{part.strip()}")
 
+    reduce_text = "\n\n".join(partials)
+    reduce_sys = _system_prompt(dom, phase="reduce", output_format=output_format, length=length)
+    reduce_input = _compose_user_payload(reduce_text, extra_context, output_format, length=length, tone=tone, target_slides=target_slides)
 
+    old_budget = os.environ.get('HF_MAX_NEW_TOKENS')
+    os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget)
+    try:
+        if backend == "ollama":
+            async for s in _stream_with_ollama(reduce_input, system_text=reduce_sys, output_format=output_format):
+                yield s
+        else:
+            async for s in _stream_with_hf(reduce_input, system_text=reduce_sys, output_format=output_format):
+                yield s
+    finally:
+        if old_budget is None:
+            os.environ.pop('HF_MAX_NEW_TOKENS', None)
+        else:
+            os.environ['HF_MAX_NEW_TOKENS'] = old_budget
+
+
+# =========================================================
+# H1 제거 유틸 (저장 전 위생 처리)
+# =========================================================
 def _strip_top_level_h1_outside_code(s: str) -> str:
-    """Remove top-level H1 lines (lines starting with '# ') outside of code fences.
-    Preserves content inside ```code fences```.
-    """
     if not s:
         return s
     parts = re.split(r'(```[\s\S]*?```)', s)
     for i in range(0, len(parts), 2):
-        # only operate on non-code parts (even indices)
         parts[i] = re.sub(r'(?m)^[ \t]*#\s+.*\n?', '', parts[i])
     return ''.join(parts)
+
+
+# =========================================================
+# (엔드포인트에서 재사용) 슬라이드/정규화 헬퍼
+# =========================================================
+def count_slides(md: str) -> int:
+    if not md:
+        return 0
+    return len(re.findall(r'(?mi)^###\s*슬라이드\s*\d+', md))
+
+def normalize_and_renumber_slides(md: str) -> str:
+    """'# 슬라이드1' 같은 난형식도 '### 슬라이드 n: 제목'으로 통일하고 번호 재배열."""
+    if not md:
+        return md
+    lines = md.splitlines()
+    out = []
+    has_section = any(re.match(r'(?mi)^##\s*슬라이드\s*$', ln.strip()) for ln in lines)
+    inserted_section = False
+
+    slide_idx = 0
+    header_pat = re.compile(r'(?mi)^\s*#{1,3}\s*슬라이드\s*(\d+)?\s*[:：]?\s*(.*)$')
+
+    for ln in lines:
+        m = header_pat.match(ln.strip())
+        if m:
+            if not has_section and not inserted_section:
+                out.append("## 슬라이드")
+                out.append("")
+                inserted_section = True
+            slide_idx += 1
+            title = (m.group(2) or "").strip()
+            if not title:
+                title = "요약"
+            out.append(f"### 슬라이드 {slide_idx}: {title}")
+        else:
+            out.append(ln)
+
+    if (not has_section) and (slide_idx == 0):
+        return md
+    return "\n".join(out)