Skip to content

NervaPack

Parser

ramdhavepreetam/NervaPack

NervaPack

ramdhavepreetam/NervaPack

Home
Getting Started
Getting Started
Performance
Performance
- Verified Benchmarks
- Messy Code Performance
User Guide
User Guide
- Core Concepts
  Core Concepts
- Commands
  Commands
  - ingest
  - query
  - visualize
  - sync
  - status
  - explore
  - dependencies
  - serve
  - history
Integrations
Integrations
- MCP Server
- Python SDK
API Reference
API Reference
- Parser Parser
  Table of contents
  - ast_parser
  - md_chunker
    
    MarkdownChunker
    
    chunk_file
- Graph Builder
- LLM Providers
Contributing
Changelog

Parser API¶

`ast_parser` ¶

`md_chunker` ¶

`MarkdownChunker` ¶

Source code in src/nervapack/parser/md_chunker.py

class MarkdownChunker:
    def __init__(self):
        # A simple regex for matching markdown headers
        self.header_regex = re.compile(r'^(#{1,6})\s+(.*)')

    def chunk_file(self, file_path: str) -> List[Dict[str, str]]:
        """
        Parses a Markdown file and returns chunks separated by headers.
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()
        except Exception:
            return []

        chunks = []
        current_chunk = []
        current_header = "Document Root"

        for line in lines:
            match = self.header_regex.match(line)
            if match:
                # Save previous chunk
                if current_chunk:
                    content = "".join(current_chunk).strip()
                    if content:
                        chunks.append({
                            "header": current_header,
                            "content": content,
                            "file_path": file_path
                        })
                current_header = match.group(2).strip()
                current_chunk = [line]
            else:
                current_chunk.append(line)

        # Add the last chunk
        if current_chunk:
            content = "".join(current_chunk).strip()
            if content:
                chunks.append({
                    "header": current_header,
                    "content": content,
                    "file_path": file_path
                })

        return chunks

`chunk_file(file_path)` ¶

Parses a Markdown file and returns chunks separated by headers.

Source code in src/nervapack/parser/md_chunker.py

def chunk_file(self, file_path: str) -> List[Dict[str, str]]:
    """
    Parses a Markdown file and returns chunks separated by headers.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception:
        return []

    chunks = []
    current_chunk = []
    current_header = "Document Root"

    for line in lines:
        match = self.header_regex.match(line)
        if match:
            # Save previous chunk
            if current_chunk:
                content = "".join(current_chunk).strip()
                if content:
                    chunks.append({
                        "header": current_header,
                        "content": content,
                        "file_path": file_path
                    })
            current_header = match.group(2).strip()
            current_chunk = [line]
        else:
            current_chunk.append(line)

    # Add the last chunk
    if current_chunk:
        content = "".join(current_chunk).strip()
        if content:
            chunks.append({
                "header": current_header,
                "content": content,
                "file_path": file_path
            })

    return chunks