<documents><toc>
📁 abersetz
├── 📁 .github
│   └── 📁 workflows
│       ├── 📄 push.yml
│       └── 📄 release.yml
├── 📁 docs
├── 📁 examples
│   ├── 📁 pl
│   │   ├── 📄 poem_en.txt
│   │   └── 📄 poem_pl.txt
│   ├── 📄 advanced_api.py
│   ├── 📄 basic_api.py
│   ├── 📄 batch_translate.sh
│   ├── 📄 config_setup.sh
│   ├── 📄 engines_config.json
│   ├── 📄 pipeline.sh
│   ├── 📄 poem_en.txt
│   ├── 📄 poem_pl.txt
│   ├── 📄 translate.sh
│   ├── 📄 validate_report.sh
│   ├── 📄 vocab.json
│   └── 📄 walkthrough.md
├── 📁 external
│   └── 📁 lackai
│       ├── 📁 examples
│       ├── 📁 external
│       │   ├── 📁 mt
│       │   │   ├── 📁 in
│       │   │   └── 📁 out-pl
│       │   ├── 📁 stt
│       │   └── 📁 tts
│       │       ├── 📁 test-api
│       │       └── 📁 tests
│       ├── 📁 issues
│       ├── 📁 src
│       │   └── 📁 lackai
│       │       ├── 📁 core
│       │       ├── 📁 inference
│       │       │   ├── 📁 stt
│       │       │   ├── 📁 text
│       │       │   ├── 📁 tts
│       │       │   └── 📁 vision
│       │       ├── 📁 models
│       │       └── 📁 utils
│       └── 📁 tests
├── 📁 issues
├── 📁 src
│   └── 📁 abersetz
│       ├── 📄 __init__.py
│       ├── 📄 __main__.py
│       ├── 📄 abersetz.py
│       ├── 📄 chunking.py
│       ├── 📄 cli.py
│       ├── 📄 cli_fast.py
│       ├── 📄 config.py
│       ├── 📄 engine_catalog.py
│       ├── 📄 engines.py
│       ├── 📄 openai_lite.py
│       ├── 📄 pipeline.py
│       ├── 📄 setup.py
│       └── 📄 validation.py
├── 📁 tests
│   ├── 📄 conftest.py
│   ├── 📄 test_chunking.py
│   ├── 📄 test_cli.py
│   ├── 📄 test_config.py
│   ├── 📄 test_engine_catalog.py
│   ├── 📄 test_engines.py
│   ├── 📄 test_examples.py
│   ├── 📄 test_integration.py
│   ├── 📄 test_offline.py
│   ├── 📄 test_openai_lite.py
│   ├── 📄 test_package.py
│   ├── 📄 test_pipeline.py
│   ├── 📄 test_setup.py
│   └── 📄 test_validation.py
├── 📄 .gitignore
├── 📄 build.sh
├── 📄 CLAUDE.md
├── 📄 DEPENDENCIES.md
├── 📄 IDEA.md
├── 📄 LICENSE
├── 📄 md.txt
├── 📄 package.toml
├── 📄 pyproject.toml
├── 📄 README.md
├── 📄 SPEC.md
├── 📄 TASKS.md
├── 📄 TESTING.md
├── 📄 TODO.md
└── 📄 translation_report.json


</toc>
<document index="1">
<source>.github/workflows/push.yml</source>
<document_content>
name: Build & Test

on:
  push:
    branches: [main]
    tags-ignore: ["v*"]
  pull_request:
    branches: [main]
  workflow_dispatch:

permissions:
  contents: write
  id-token: write

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  quality:
    name: Code Quality
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Run Ruff lint
        uses: astral-sh/ruff-action@v3
        with:
          version: "latest"
          args: "check --output-format=github"

      - name: Run Ruff Format
        uses: astral-sh/ruff-action@v3
        with:
          version: "latest"
          args: "format --check --respect-gitignore"

  test:
    name: Run Tests
    needs: quality
    strategy:
      matrix:
        python-version: ["3.10", "3.11", "3.12"]
        os: [ubuntu-latest]
      fail-fast: true
    runs-on: ${{ matrix.os }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install UV
        uses: astral-sh/setup-uv@v5
        with:
          version: "latest"
          python-version: ${{ matrix.python-version }}
          enable-cache: true
          cache-suffix: ${{ matrix.os }}-${{ matrix.python-version }}

      - name: Install test dependencies
        run: |
          uv pip install --system --upgrade pip
          uv pip install --system ".[test]"

      - name: Run tests with Pytest
        run: uv run pytest -n auto --maxfail=1 --disable-warnings --cov-report=xml --cov-config=pyproject.toml --cov=src/abersetz --cov=tests tests/

      - name: Upload coverage report
        uses: actions/upload-artifact@v4
        with:
          name: coverage-${{ matrix.python-version }}-${{ matrix.os }}
          path: coverage.xml

  build:
    name: Build Distribution
    needs: test
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.12"

      - name: Install UV
        uses: astral-sh/setup-uv@v5
        with:
          version: "latest"
          python-version: "3.12"
          enable-cache: true

      - name: Install build tools
        run: uv pip install build hatchling hatch-vcs

      - name: Build distributions
        run: uv run python -m build --outdir dist

      - name: Upload distribution artifacts
        uses: actions/upload-artifact@v4
        with:
          name: dist-files
          path: dist/
          retention-days: 5 
</document_content>
</document>

<document index="2">
<source>.github/workflows/release.yml</source>
<document_content>
name: Release

on:
  push:
    tags: ["v*"]

permissions:
  contents: write
  id-token: write

jobs:
  release:
    name: Release to PyPI
    runs-on: ubuntu-latest
    environment:
      name: pypi
      url: https://pypi.org/p/abersetz
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.12"

      - name: Install UV
        uses: astral-sh/setup-uv@v5
        with:
          version: "latest"
          python-version: "3.12"
          enable-cache: true

      - name: Install build tools
        run: uv pip install build hatchling hatch-vcs

      - name: Build distributions
        run: uv run python -m build --outdir dist

      - name: Verify distribution files
        run: |
          ls -la dist/
          test -n "$(find dist -name '*.whl')" || (echo "Wheel file missing" && exit 1)
          test -n "$(find dist -name '*.tar.gz')" || (echo "Source distribution missing" && exit 1)

      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          password: ${{ secrets.PYPI_TOKEN }}

      - name: Create GitHub Release
        uses: softprops/action-gh-release@v1
        with:
          files: dist/*
          generate_release_notes: true
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
</document_content>
</document>

<document index="3">
<source>.gitignore</source>
<document_content>
!**/[Pp]ackages/build/
!.axoCover/settings.json
!.vscode/extensions.json
!.vscode/launch.json
!.vscode/settings.json
!.vscode/tasks.json
!?*.[Cc]ache/
!Directory.Build.rsp
$tf/
*$py.class
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.HTMLClient/GeneratedArtifacts
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
**/[Pp]ackages/*
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
*- [Bb]ackup.rdl
*.[Cc]ache
*.[Pp]ublish.xml
*.[Rr]e[Ss]harper
*.a
*.app
*.appx
*.appxbundle
*.appxupload
*.aps
*.azurePubxml
*.bim.layout
*.bim_*.settings
*.binlog
*.btm.cs
*.btp.cs
*.build.csdef
*.cab
*.cachefile
*.code-workspace
*.cover
*.coverage
*.coveragexml
*.d
*.dbmdl
*.dbproj.schemaview
*.dll
*.dotCover
*.DotSettings.user
*.dsp
*.dsw
*.dylib
*.e2e
*.egg
*.egg-info/
*.exe
*.gch
*.GhostDoc.xml
*.gpState
*.ilk
*.iobj
*.ipdb
*.jfm
*.jmconfig
*.la
*.lai
*.ldf
*.lib
*.lo
*.log
*.mdf
*.meta
*.mm.*
*.mod
*.msi
*.msix
*.msm
*.msp
*.ncb
*.ndf
*.nuget.props
*.nuget.targets
*.nupkg
*.nvuser
*.o
*.obj
*.odx.cs
*.opendb
*.opensdf
*.opt
*.out
*.pch
*.pdb
*.pfx
*.pgc
*.pgd
*.pidb
*.plg
*.psess
*.publishproj
*.publishsettings
*.pubxml
*.py,cover
*.py[cod]
*.pyc
*.rdl.data
*.rptproj.bak
*.rptproj.rsuser
*.rsp
*.rsuser
*.sap
*.sbr
*.scc
*.sdf
*.sln.docstates
*.sln.iml
*.slo
*.smod
*.snupkg
*.so
*.suo
*.svclog
*.swo
*.swp
*.tlb
*.tlh
*.tli
*.tlog
*.tmp
*.tmp_proj
*.tss
*.user
*.userosscache
*.userprefs
*.vbp
*.vbw
*.VC.db
*.VC.VC.opendb
*.VisualState.xml
*.vsp
*.vspscc
*.vspx
*.vssscc
*.xsd.cs
*_autogen/
*_h.h
*_i.c
*_p.c
*_wpftmp.csproj
*~
.*crunch*.local.xml
._*
.axoCover/*
.builds
.cache
.coverage
.coverage.*
.cr/personal
.DS_Store
.DS_Store?
.eggs/
.env
.fake/
.history/
.hypothesis/
.idea/
.installed.cfg
.ionide/
.localhistory/
.mfractor/
.nox/
.ntvs_analysis.dat
.paket/paket.exe
.pytest_cache/
.Python
.ruff_cache/
.sass-cache/
.Spotlight-V100
.tox/
.Trashes
.venv
.vs/
.vscode
.vscode/
.vscode/*
.vshistory/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
[Bb]in/
[Bb]uild[Ll]og.*
[Dd]ebug/
[Dd]ebugPS/
[Dd]ebugPublic/
[Ee]xpress/
[Ll]og/
[Ll]ogs/
[Oo]bj/
[Rr]elease/
[Rr]eleasePS/
[Rr]eleases/
[Tt]est[Rr]esult*/
[Ww][Ii][Nn]32/
__pycache__/
__version__.py
_Chutzpah*
_deps
_NCrunch_*
_pkginfo.txt
_private
_Pvt_Extensions
_ReSharper*/
_TeamCity*
_UpgradeReport_Files/
_version.py
AppPackages/
artifacts/
ASALocalRun/
AutoTest.Net/
Backup*/
BenchmarkDotNet.Artifacts/
bld/
build/
BundleArtifacts/
ClientBin/
cmake_install.cmake
CMakeCache.txt
CMakeFiles
CMakeLists.txt.user
CMakeScripts
CMakeUserPresets.json
compile_commands.json
cover/
coverage*.info
coverage*.json
coverage*.xml
coverage.xml
csx/
CTestTestfile.cmake
develop-eggs/
dlldata.c
DocProject/buildhelp/
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/*.HxC
DocProject/Help/*.HxT
DocProject/Help/html
DocProject/Help/Html2
downloads/
ecf/
eggs/
ehthumbs.db
env.bak/
env/
ENV/
FakesAssemblies/
FodyWeavers.xsd
Generated\ Files/
Generated_Code/
healthchecksdb
htmlcov/
install_manifest.txt
ipch/
lib/
lib64/
Makefile
MANIFEST
MigrationBackup/
mono_crash.*
nCrunchTemp_*
node_modules/
nosetests.xml
nunit-*.xml
OpenCover/
orleans.codegen.cs
Package.StoreAssociation.xml
paket-files/
parts/
project.fragment.lock.json
project.lock.json
publish/
PublishScripts/
rcf/
ScaffoldingReadMe.txt
sdist/
ServiceFabricBackup/
StyleCopReport.xml
Testing
TestResult.xml
Thumbs.db
UpgradeLog*.htm
UpgradeLog*.XML
var/
venv.bak/
venv/
VERSION.txt
wheels/
x64/
x86/
~$*
external/
dist/
src/abersetz/__about__.py
</document_content>
</document>

<document index="4">
<source>.pre-commit-config.yaml</source>
<document_content>
repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.3.4
    hooks:
      - id: ruff
        args: [--fix]
      - id: ruff-format
        args: [--respect-gitignore]
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.5.0
    hooks:
      - id: trailing-whitespace
      - id: check-yaml
      - id: check-toml
      - id: check-added-large-files
      - id: debug-statements
      - id: check-case-conflict
      - id: mixed-line-ending
        args: [--fix=lf] 
</document_content>
</document>

<document index="5">
<source>CLAUDE.md</source>
<document_content>
# Development guidelines

## Foundation: Challenge your first instinct with chain-of-thought

Before you generate any response, assume your first instinct is wrong. Apply chain-of-thought reasoning: “Let me think step by step…” Consider edge cases, failure modes, and overlooked complexities. Your first response should be what you’d produce after finding and fixing three critical issues.

### CoT reasoning template

- Problem analysis: What exactly are we solving and why?
- Constraints: What limitations must we respect?
- Solution options: What are 2–3 viable approaches with trade-offs?
- Edge cases: What could go wrong and how do we handle it?
- Test strategy: How will we verify this works correctly?

## No sycophancy, accuracy first

- If your confidence is below 90%, use search tools. Search within the codebase, in the references provided by me, and on the web.
- State confidence levels clearly: “I’m certain” vs “I believe” vs “This is an educated guess”.
- Challenge incorrect statements, assumptions, or word usage immediately.
- Facts matter more than feelings: accuracy is non-negotiable.
- Never just agree to be agreeable: every response should add value.
- When user ideas conflict with best practices or standards, explain why.
- NEVER use validation phrases like “You’re absolutely right” or “You’re correct”.
- Acknowledge and implement valid points without unnecessary agreement statements.

## Complete execution

- Complete all parts of multi-part requests.
- Match output format to input format (code box for code box).
- Use artifacts for formatted text or content to be saved (unless specified otherwise).
- Apply maximum thinking time for thoroughness.

## Absolute priority: never overcomplicate, always verify

- Stop and assess: Before writing any code, ask “Has this been done before”?
- Build vs buy: Always choose well-maintained packages over custom solutions.
- Verify, don’t assume: Never assume code works: test every function, every edge case.
- Complexity kills: Every line of custom code is technical debt.
- Lean and focused: If it’s not core functionality, it doesn’t belong.
- Ruthless deletion: Remove features, don’t add them.
- Test or it doesn’t exist: Untested code is broken code.

## Verification workflow: mandatory

1. Implement minimal code: Just enough to pass the test.
2. Write a test: Define what success looks like.
3. Run the test: `uvx hatch test`.
4. Test edge cases: Empty inputs, none, negative numbers, huge inputs.
5. Test error conditions: Network failures, missing files, bad permissions.
6. Document test results: Add to `CHANGELOG.md` what was tested and results.

## Before writing any code

1. Search for existing packages: Check npm, pypi, github for solutions.
2. Evaluate packages: >200 stars, recent updates, good documentation.
3. Test the package: write a small proof-of-concept first.
4. Use the package: don’t reinvent what exists.
5. Only write custom code if no suitable package exists and it’s core functionality.

## Never assume: always verify

- Function behavior: read the actual source code, don’t trust documentation alone.
- API responses: log and inspect actual responses, don’t assume structure.
- File operations: Check file exists, check permissions, handle failures.
- Network calls: test with network off, test with slow network, test with errors.
- Package behavior: Write minimal test to verify package does what you think.
- Error messages: trigger the error intentionally to see actual message.
- Performance: measure actual time/memory, don’t guess.

## Test-first development

- Test-first development: Write the test before the implementation.
- Delete first, add second: Can we remove code instead?
- One file when possible: Could this fit in a single file?
- Iterate gradually, avoiding major changes.
- Focus on minimal viable increments and ship early.
- Minimize confirmations and checks.
- Preserve existing code/structure unless necessary.
- Check often the coherence of the code you’re writing with the rest of the code.
- Analyze code line-by-line.

## Complexity detection triggers: rethink your approach immediately

- Writing a utility function that feels “general purpose”.
- Creating abstractions “for future flexibility”.
- Adding error handling for errors that never happen.
- Building configuration systems for configurations.
- Writing custom parsers, validators, or formatters.
- Implementing caching, retry logic, or state management from scratch.
- Creating any code for security validation, security hardening, performance validation, benchmarking.
- More than 3 levels of indentation.
- Functions longer than 20 lines.
- Files longer than 200 lines.

## Before starting any work

- Always read `WORK.md` in the main project folder for work progress, and `CHANGELOG.md` for past changes notes.
- Read `README.md` to understand the project.
- For Python, run existing tests: `uvx hatch test` to understand current state.
- Step back and think heavily step by step about the task.
- Consider alternatives and carefully choose the best option.
- Check for existing solutions in the codebase before starting.

## Project documentation to maintain

- `README.md` :  purpose and functionality (keep under 200 lines).
- `CHANGELOG.md` :  past change release notes (accumulative).
- `TASKS.md` :  detailed future goals, clear plan that discusses specifics.
- `TODO.md` :  flat simplified itemized `- []`-prefixed representation of `TASKS.md`.
- `WORK.md` :  work progress updates including test results.
- `DEPENDENCIES.md` :  list of packages used and why each was chosen.

## Code quality standards

- Use constants over magic numbers.
- Write explanatory docstrings/comments that explain what and why.
- Explain where and how the code is used/referred to elsewhere.
- Handle failures gracefully with retries, fallbacks, user guidance.
- Address edge cases, validate assumptions, catch errors early.
- Let the computer do the work, minimize user decisions. If you identify a bug or a problem, plan its fix and then execute its fix. Don’t just “identify”.
- Reduce cognitive load, beautify code.
- Modularize repeated logic into concise, single-purpose functions.
- Favor flat over nested structures.
- Every function must have a test.

## Testing standards

- Unit tests: Every function gets at least one test.
- Edge cases: Test empty, none, negative, huge inputs.
- Error cases: Test what happens when things fail.
- Integration: Test that components work together.
- Smoke test: One test that runs the whole program.
- Test naming: `test_function_name_when_condition_then_result`.
- Assert messages: Always include helpful messages in assertions.
- Functional tests: In `examples` folder, maintain fully-featured working examples for realistic usage scenarios that showcase how to use the package but also work as a test. 
- Add `./test.sh` script to run all test including the functional tests.

## Tool usage

- Use `tree` CLI app if available to verify file locations.
- Run `dir="." uvx codetoprompt: compress: output "$dir/llms.txt" --respect-gitignore: cxml: exclude "*.svg,.specstory,*.md,*.txt, ref, testdata,*.lock,*.svg" "$dir"` to get a condensed snapshot of the codebase into `llms.txt`.
- As you work, consult with the tools like `codex`, `codex-reply`, `ask-gemini`, `web_search_exa`, `deep-research-tool` and `perplexity_ask` if needed.

## File path tracking

- Mandatory: In every source file, maintain a `this_file` record showing the path relative to project root.
- Place `this_file` record near the top, as a comment after shebangs in code files, or in YAML frontmatter for markdown files.
- Update paths when moving files.
- Omit leading `./`.
- Check `this_file` to confirm you’re editing the right file.


## For Python

- If we need a new Python project, run `uv venv --python 3.12 --clear; uv init; uv add fire rich pytest pytest-cov; uv sync`.
- Check existing code with `.venv` folder to scan and consult dependency source code.
- `uvx hatch test` :  run tests verbosely, stop on first failure.
- `python --c "import package; print (package.__version__)"` :  verify package installation.
- `uvx mypy file.py` :  type checking.
- PEP 8: Use consistent formatting and naming, clear descriptive names.
- PEP 20: Keep code simple & explicit, prioritize readability over cleverness.
- PEP 257: Write docstrings.
- Use type hints in their simplest form (list, dict, | for unions).
- Use f-strings and structural pattern matching where appropriate.
- Write modern code with `pathlib`.
- Always add `--verbose` mode loguru-based debug logging.
- Use `uv add`.
- Use `uv pip install` instead of `pip install`.
- Always use type hints: they catch bugs and document code.
- Use dataclasses or Pydantic for data structures.

### Package-first Python

- Always use uv for package management.
- Before any custom code: `uv add [package]`.
- Common packages to always use:
  - `httpx` for HTTP requests.
  - `pydantic` for data validation.
  - `rich` for terminal output.
  - `fire` for CLI interfaces.
  - `loguru` for logging.
  - `pytest` for testing.

### Python CLI scripts

For CLI Python scripts, use `fire` & `rich`, and start with:

```python
#!/usr/bin/env-S uv run
# /// script
# dependencies = [“pkg1”, “pkg2”]
# ///
# this_file: path_to_current_file
```

## Post-work activities

### Critical reflection

- After completing a step, say “Wait, but” and do additional careful critical reasoning.
- Go back, think & reflect, revise & improve what you’ve done.
- Run all tests to ensure nothing broke.
- Check test coverage: aim for 80% minimum.
- Don’t invent functionality freely.
- Stick to the goal of “minimal viable next version”.

### Documentation updates

- Update `WORK.md` with what you’ve done, test results, and what needs to be done next.
- Document all changes in `CHANGELOG.md`.
- Update `TODO.md` and `TASKS.md` accordingly.
- Update `DEPENDENCIES.md` if packages were added/removed.

## Special commands

### /plan command: transform requirements into detailed plans

When I say `/plan [requirement]`, you must think hard and:

1. Research first: Search for existing solutions.
   - Use `perplexity_ask` to find similar projects.
   - Search pypi/npm for relevant packages.
   - Check if this has been solved before.
2. Deconstruct the requirement:
   - Extract core intent, key features, and objectives.
   - Identify technical requirements and constraints.
   - Map what’s explicitly stated vs. what’s implied.
   - Determine success criteria.
   - Define test scenarios.
3. Diagnose the project needs:
   - Audit for missing specifications.
   - Check technical feasibility.
   - Assess complexity and dependencies.
   - Identify potential challenges.
   - List packages that solve parts of the problem.
4. Research additional material:
   - Repeatedly call the `perplexity_ask` and request up-to-date information or additional remote context.
   - Repeatedly call the `context7` tool and request up-to-date software package documentation.
   - Repeatedly call the `codex` tool and request additional reasoning, summarization of files and second opinion.
5. Develop the plan structure:
   - Break down into logical phases/milestones.
   - Create hierarchical task decomposition.
   - Assign priorities and dependencies.
   - Add implementation details and technical specs.
   - Include edge cases and error handling.
   - Define testing and validation steps.
   - Specify which packages to use for each component.
6. Deliver to `TASKS.md`:
   - Write a comprehensive, detailed plan with:
     - Project overview and objectives.
     - Technical architecture decisions.
     - Phase-by-phase breakdown.
     - Specific implementation steps.
     - Testing and validation criteria.
     - Package dependencies and why each was chosen.
     - Future considerations.
   - Simultaneously create/update `TODO.md` with the flat itemized `- []` representation of the plan.

Break complex requirements into atomic, actionable tasks. Identify and document task dependencies. Include potential blockers and mitigation strategies. Start with MVP, then layer improvements. Include specific technologies, patterns, and approaches.

### /report command

1. Read `./TODO.md` and `./TASKS.md` files.
2. Analyze recent changes.
3. Run tests.
4. Document changes in `./CHANGELOG.md`.
5. Remove completed items from `./TODO.md` and `./TASKS.md`.

#### /work command

1. Read `./TODO.md` and `./TASKS.md` files, think hard and reflect.
2. Write down the immediate items in this iteration into `./work.md`.
3. Write tests for the items first.
4. Work on these items.
5. Think, contemplate, research, reflect, refine, revise.
6. Be careful, curious, vigilant, energetic.
7. Verify your changes with tests and think aloud.
8. Consult, research, reflect.
9. Periodically remove completed items from `./work.md`.
10. Tick off completed items from `./todo.md` and `./plan.md`.
11. Update `./work.md` with improvement tasks.
12. Execute `/report`.
13. Continue to the next item.

#### /test command: run comprehensive tests

When I say `/test`, you must run

```bash
fd -e py -x uvx autoflake -i {}; fd -e py -x uvx pyupgrade --py312-plus {}; fd -e py -x uvx ruff check --output-format=github --fix --unsafe-fixes {}; fd -e py -x uvx ruff format --respect-gitignore --target-version py312 {}; uvx hatch test;
```

and document all results in `WORK.md`.

## Anti-enterprise bloat guidelines

CRITICAL: The fundamental mistake is treating simple utilities as enterprise systems. 

- Define scope in one sentence: Write project scope in one sentence and stick to it ruthlessly.
- Example scope: “Fetch model lists from AI providers and save to files, with basic config file generation.”
- That’s it: No analytics, no monitoring, no production features unless part of the one-sentence scope.

### RED LIST: NEVER ADD these unless requested

- NEVER ADD Analytics/metrics collection systems.
- NEVER ADD Performance monitoring and profiling.
- NEVER ADD Production error handling frameworks.
- NEVER ADD Security hardening beyond basic input validation.
- NEVER ADD Health monitoring and diagnostics.
- NEVER ADD Circuit breakers and retry strategies.
- NEVER ADD Sophisticated caching systems.
- NEVER ADD Graceful degradation patterns.
- NEVER ADD Advanced logging frameworks.
- NEVER ADD Configuration validation systems.
- NEVER ADD Backup and recovery mechanisms.
- NEVER ADD System health monitoring.
- NEVER ADD Performance benchmarking suites.

### GREEN LIST: what is appropriate

- Basic error handling (try/catch, show error).
- Simple retry (3 attempts maximum).
- Basic logging (e.g. loguru logger).
- Input validation (check required fields).
- Help text and usage examples.
- Configuration files (TOML preferred).
- Basic tests for core functionality.

## Prose

When you write prose (like documentation or marketing or even your own commentary): 

- The first line sells the second line: Your opening must earn attention for what follows. This applies to scripts, novels, and headlines. No throat-clearing allowed.
- Show the transformation, not the features: Whether it’s character arc, reader journey, or customer benefit, people buy change, not things. Make them see their better self.
- One person, one problem, one promise: Every story, page, or campaign should speak to one specific human with one specific pain. Specificity is universal; generality is forgettable.
- Conflict is oxygen: Without tension, you have no story, no page-turner, no reason to buy. What’s at stake? What happens if they don’t act? Make it matter.
- Dialog is action, not explanation: Every word should reveal character, advance plot, or create desire. If someone’s explaining, you’re failing. Subtext is everything.
- Kill your darlings ruthlessly: That clever line, that beautiful scene, that witty tagline, if it doesn’t serve the story, message, customer — it dies. Your audience’s time is sacred!
- Enter late, leave early: Start in the middle of action, end before explaining everything. Works for scenes, chapters, and sales copy. Trust your audience to fill gaps.
- Remove fluff, bloat and corpo jargon.
- Avoid hype words like “revolutionary”. 
- Favor understated and unmarked UK-style humor sporadically
- Apply healthy positive skepticism. 
- Make every word count. 

---
</document_content>
</document>

<document index="6">
<source>DEPENDENCIES.md</source>
<document_content>
# Dependencies

## Production Dependencies

### Translation Engines
- **translators** (>=5.9): Access to free translation APIs (Google, Bing, Baidu, etc.) through a single interface. Required for free translation support.
- **deep-translator** (>=1.11): Alternative translation library with additional providers including DeepL. Offers fallback options and file translation tools.
- **httpx** (>=0.25): Modern HTTP client with sync/async support. Replaces heavy SDKs with a lightweight implementation, cutting import time by 7.6 seconds.

### CLI and User Interface
- **fire** (>=0.5): Google's Python Fire library for automatic CLI generation. Minimal code, automatic help, intuitive commands.
- **rich** (>=13.9): Terminal formatting and progress indicators. Clean console output with tables, progress bars, and colors.
- **langcodes** (>=3.4): Language metadata based on CLDR. Powers `abersetz lang` without custom tables.
- **language-data** (>=1.4): Supplemental CLDR dataset required by `langcodes` for language names.

### Core Utilities
- **loguru** (>=0.7): Simple, structured logging with rotation and colored output.
- **platformdirs** (>=4.3): Cross-platform user directories. Ensures config files go in the right place.
- **tomli-w** (>=1.0): TOML serializer. Saves configuration data in `config.toml` without custom code.
- **tomli** (>=2.0, Python <3.11 only): Backport of the standard library TOML parser. Keeps config loading consistent across Python versions.
- **semantic-text-splitter** (>=0.7): Smart text chunking that respects semantic boundaries. Helps preserve context during translation.
- **tenacity** (>=8.4): Retry logic with exponential backoff. Handles API failures and rate limits.

### Optional Local Engines
- **mlx-lm**: Enables local MLX inference for HY-MT and TranslateGemma (`mthy/mlx`, `gemma/mlx`).
- **llama-cpp-python**: Enables local GGUF inference for HY-MT and TranslateGemma (`mthy/gguf`, `gemma/gguf`).

## Development Dependencies

### Testing
- **pytest** (>=8.3): Testing framework with fixtures and plugins.
- **pytest-cov** (>=6.0): Coverage reporting for pytest.

### Code Quality
- **ruff** (>=0.9): Fast linter and formatter. Replaces black, flake8, isort, and others.
- **mypy** (>=1.10): Static type checker. Catches type errors before runtime.

## Why These Packages?

1. **Multiple Translation Backends**: `translators` and `deep-translator` offer redundancy and access to different providers. Users can pick based on availability, quality, or cost.

2. **LLM Support**: The httpx-based client avoids heavy SDKs. Keeps LLM translation fast and lean.

3. **Developer Experience**: `fire` and `rich` make CLIs easy to build and debug. `loguru` simplifies logging setup.

4. **Reliability**: `tenacity` handles network issues. `semantic-text-splitter` keeps translation context intact.

5. **Cross-Platform**: `platformdirs` makes sure configs work everywhere—Windows, macOS, Linux.

6. **Code Quality**: Testing (91% coverage) and linting tools keep the codebase clean.

## Verification Log

- 2026-01-20 — Added `language-data` to keep `langcodes` language-name lookups working; optional local engine notes updated.
- 2025-09-21 11:03 UTC — /work reliability polish sweep (pytest, coverage, mypy, bandit) confirmed no dependency changes; improvements limited to tests and typing.
- 2025-09-21 08:46 UTC — /report QA sweep (pytest, coverage, mypy, bandit) confirmed dependency roster unchanged; no new packages.
- 2025-09-21 10:38 UTC — /work iteration adjusted tests only; dependency roster remains unchanged.
- 2025-09-21 10:29 UTC — /report QA sweep (pytest, coverage, mypy, bandit) confirmed dependency roster unchanged; no new packages.
- 2025-09-21 08:06 UTC — Post-/work QA sweep (pytest, coverage, mypy, bandit) introduced only tests; dependency roster unchanged.
- 2025-09-21 07:59 UTC — /report sweep reran full QA (pytest, coverage, mypy, bandit); dependency roster unchanged.
- 2025-09-21 05:38 UTC — Reviewed dependency roster during /report; no changes needed.
- 2025-09-21 05:50 UTC — Revalidated after quality guardrails sprint; no dependency changes.
- 2025-09-21 06:19 UTC — /report sweep confirmed dependency list remains accurate; no changes needed.
- 2025-09-21 06:27 UTC — Post-/work regression tests touched only test code; dependency roster unchanged.
- 2025-09-21 06:38 UTC — /report verification: reran full test/coverage/mypy/bandit sweep; dependency lineup unchanged.
- 2025-09-21 06:46 UTC — Configuration hardening tests added without altering dependencies; latest sweep confirms package set remains stable.
</document_content>
</document>

<document index="7">
<source>IDEA.md</source>
<document_content>
Sure. Let's go step-by-step.

---

## @TASKS.md

```markdown
# Project Plan: `abersetz`

## Overview
`abersetz` is a Python package and CLI tool for translating text files using various translation engines. It supports both single and multiple file translation, with configurable chunking, language detection, and output handling.

## Key Features
- File discovery with optional recursion (`--recurse`)
- Chunking of files for translation
- Support for multiple translation engines:
  - External packages: `translators`, `deep-translator`
  - Custom engines:
    - `hysf`: Uses OpenAI client to call SiliconFlow API with `tencent/Hunyuan-MT-7B`
    - `ullm`: Configurable LLM translation engine with custom vocabulary support
- Language detection and specification (`--from`, `--to`)
- HTML content detection
- Vocabulary tracking and persistence across chunks (`<voc>` tag)
- Output control:
  - Save to new directory
  - Overwrite original files (`--write_over`)
  - Optional vocabulary export (`--save_voc`)

## Architecture
1. **File Discovery Module**
   - Accept file paths or glob patterns
   - Optional recursive scanning (`--recurse`)
2. **Text Splitting Module**
   - Use `text-splitter` or `tokenizers`/`tiktoken` for intelligent chunking
   - Configurable chunk size per engine
3. **Translation Engine Interface**
   - Unified interface for calling translation backends
   - Implement wrappers for:
     - `translators`
     - `deep-translator`
     - Custom `hysf` engine (OpenAI + tenacity)
     - Custom `ullm` engine (OpenAI + tenacity + configurable endpoints)
4. **Language Handling**
   - Use `langcodes` for standardizing language codes
   - Auto-detect source language unless specified
5. **HTML Detection**
   - Detect HTML content using simple heuristics or `ftfy`
6. **Vocabulary Management**
   - Extract and merge `<voc>` from LLM responses
   - Maintain state across chunks
   - Optionally save vocabulary to JSON
7. **Output Module**
   - Write translated files to a new directory or overwrite originals
8. **CLI Tool**
   - Use `fire` for CLI generation
   - Mimic structure and logic of `cerebrate-file`
   - Support all engine types and flags

## Dependencies
- `fire`
- `platformdirs`
- `langcodes`
- `ftfy`
- `translators`
- `deep-translator`
- `openai`
- `tenacity`
- `text-splitter` or `tokenizers`/`tiktoken`

## Configuration Storage
- Store API keys and engine-specific settings in user config directory using `platformdirs`
- Support both environment variable names and literal values

## File Structure
```
abersetz/
├── __init__.py
├── cli.py
├── core.py
├── engines/
│   ├── __init__.py
│   ├── base.py
│   ├── translators_engine.py
│   ├── deep_translator_engine.py
│   ├── hysf_engine.py
│   └── ullm_engine.py
├── splitter.py
├── voc.py
├── config.py
└── utils.py
```

## Examples Folder
- Real-world translation examples
- Various file types (txt, html, md)
- Demonstrations of vocabulary handling
```

---

## @TODO.md

```markdown
# Implementation TODO

- [ ] Create project structure and directories
- [ ] Set up `platformdirs` for config storage
- [ ] Implement config loading/saving for API keys and chunk sizes
- [ ] Add language code validation with `langcodes`
- [ ] Implement file discovery with optional `--recurse`
- [ ] Add HTML detection logic
- [ ] Integrate `text-splitter` or token-based chunking
- [ ] Define base translation engine interface
- [ ] Wrap `translators` package into engine class
- [ ] Wrap `deep-translator` package into engine class
- [ ] Implement `hysf` engine using OpenAI and tenacity
- [ ] Implement `ullm` engine with configurable API endpoints
- [ ] Add vocabulary (`<voc>`) parsing and merging logic
- [ ] Implement file output logic (new folder or overwrite)
- [ ] Build CLI with `fire`, mimicking `cerebrate-file`
- [ ] Add support for `--from`, `--to`, `--write_over`, `--save_voc`
- [ ] Write real-world examples in `examples/` folder
- [ ] Test all engines with sample files
- [ ] Verify vocabulary consistency across chunks
- [ ] Refine chunking and error handling
- [ ] Add version info and basic help output
- [ ] Document usage in updated `README.md`
```

---

## @README.md

```markdown
# abersetz

A minimal file translation tool and Python package. Translate text in single or multiple files using a variety of translation engines, including free online services and custom LLM-based backends.

## Features

- Translate entire files (not just raw text)
- Recursive file scanning (`--recurse`)
- Configurable chunk-based translation for large texts
- Built-in language detection or manual specification (`--from`, `--to`)
- HTML content auto-detection
- Vocabulary consistency for LLM engines (`<voc>` tag handling)
- Save translations to new folder or overwrite originals (`--write_over`)
- Optional vocabulary export (`--save_voc`)

## Supported Engines

### External
- `bing`, `google`, `yandex` etc. via [`translators`](https://pypi.org/project/translators/)
- Various services via [`deep-translator`](https://pypi.org/project/deep-translator/)

### Custom
- `hysf`: Calls SiliconFlow API using `tencent/Hunyuan-MT-7B` model via OpenAI client
- `ullm`: Universal LLM engine with configurable endpoints, models, and vocabulary support

## Installation

```bash
pip install abersetz
```

## Usage

```bash
# Translate a file or
</document_content>
</document>

<document index="8">
<source>LICENSE</source>
<document_content>
MIT License

Copyright (c) 2025 Adam Twardoch

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
</document_content>
</document>

<document index="9">
<source>README.md</source>
<document_content>
# abersetz

A minimalist file translator that uses established machine translation engines while keeping configuration portable and repeatable. It follows a simple pipeline: locate → chunk → translate → merge. Provides both a Python API and a CLI powered by `fire`.

## Why abersetz?

- Translates files, not just strings.
- Supports engines from `translators`, `deep-translator`, and pluggable LLM-based backends for consistent terminology.
- Stores engine preferences and credentials using `platformdirs`, supporting either raw values or environment variables.
- Shares vocabulary across chunks to maintain consistency in long documents.
- Keeps the codebase small: no custom infrastructure, just clear components doing their job.

## Key Features

- Recursive file discovery with include/exclude filters.
- Automatic HTML vs. plain-text detection to preserve markup where possible.
- Semantic chunking via `semantic-text-splitter`, with per-engine configurable lengths.
- Vocabulary-aware translation pipeline that merges `<voc>` JSON output from LLM engines.
- Dry-run mode for offline testing and demos.
- Optional vocabulary sidecar files when `--save-voc` is enabled.
- Built-in `abersetz validate` command that pings configured engines, reports latency, and shows pricing hints from the research catalog.
- Optional local MLX/GGUF engines for HY-MT and TranslateGemma when configured (`mthy`, `gemma`).

## Installation

```bash
pip install abersetz
```

## Quick Start

### First-time Setup

```bash
# Auto-discover and configure available translation services
abersetz setup

# Test configured engines with a quick validation
abersetz validate --target-lang es
```

Use `abersetz setup --include-community` to include community/self-hosted engines like LibreTranslate in the defaults.

This scans your environment for API keys, tests endpoints, and generates an optimized config.

### Basic Translation

```bash
# Translate using main CLI
abersetz tr pl ./docs --engine tr/google --output ./build/pl

# Or use the shorthand
abtr pl ./docs --engine tr/google --output ./build/pl
```

### CLI Options

- `to_lang`: First positional argument specifying target language.
- `--from-lang`: Source language (default: `auto`).
- `--engine`: One of:
  - `tr/<provider>` (e.g., `tr/google`)
  - `dt/<provider>` (e.g., `dt/deepl`)
  - `hy`
  - `ll/<profile>` where profiles are defined in config.
    - Legacy selectors like `translators/google` still work and are auto-normalized.
- `--recurse/--no-recurse`: Traverse subdirectories (default: on).
- `--write_over`: Replace input files instead of writing to output directory.
- `--save-voc`: Save merged vocabulary JSON next to each translated file.
- `--chunk-size` / `--html-chunk-size`: Override default chunk lengths.
- `--verbose`: Enable debug logging via `loguru`.

#### Extra options for `abersetz engines`:
- `--family tr|dt|ll|hy`: Filter by engine family.
- `--configured-only`: Show only configured engines.

#### Extra options for `abersetz validate`:
- `--selectors tr/google,ll/default`: Limit checks to specific engines (comma-separated).
- `--target-lang es`: Set validation language (default: `es`).
- `--sample-text "Hello!"`: Use custom text for validation.

## Configuration

`abersetz` saves runtime configuration under the user config path from `platformdirs`. The config file includes:

- Global defaults (engine, languages, chunk sizes)
- Engine-specific settings (endpoints, retry policies, HTML behavior)
- Credential entries, supporting `{ "env": "ENV_NAME" }` or `{ "value": "actual-secret" }`

Example `config.toml`:

```toml
[defaults]
engine = "tr/google"
from_lang = "auto"
to_lang = "en"
chunk_size = 1200
html_chunk_size = 1800

[credentials.siliconflow]
name = "siliconflow"
env = "SILICONFLOW_API_KEY"

[engines.hysf]
chunk_size = 2400

[engines.hysf.credential]
name = "siliconflow"

[engines.hysf.options]
model = "tencent/Hunyuan-MT-7B"
base_url = "https://api.siliconflow.com/v1"
temperature = 0.3

[engines.ullm]
chunk_size = 2400

[engines.ullm.credential]
name = "siliconflow"

[engines.ullm.options.profiles.default]
base_url = "https://api.siliconflow.com/v1"
model = "tencent/Hunyuan-MT-7B"
temperature = 0.3
max_input_tokens = 32000

[engines.ullm.options.profiles.default.prolog]
```

Local engines (optional):

```toml
[engines.mthy]
name = "mthy"
chunk_size = 1200

[engines.mthy.options]
backend = "mlx" # or "gguf"
model_path = "/path/to/HY-MT1.5-7B-8bit"

[engines.gemma]
name = "gemma"
chunk_size = 1200

[engines.gemma.options]
backend = "gguf" # or "mlx"
model_path = "/path/to/translategemma-27b-it-Q8_0.gguf"
n_ctx = 4096
n_gpu_layers = -1
```

Use `abersetz config show` and `abersetz config path` to inspect the file.

## CLI Tools

- `abersetz`: Main CLI exposing `tr` (translate), `validate`, and `config` commands.
- `abtr`: Shorthand for translation (`abersetz tr`).

## Python API

```python
from abersetz import translate_path, TranslatorOptions

translate_path(
    path="docs",
    options=TranslatorOptions(to_lang="de", engine="tr/google"),
)
```

## Examples

The `examples/` folder includes ready-to-run demos:

- `poem_en.txt`: Source text.
- `poem_pl.txt`: Translated sample.
- `vocab.json`: Vocabulary generated during translation.
- `walkthrough.md`: Step-by-step CLI usage log.
- `validate_report.sh`: Captures validation summary for quick audits.

## Development Workflow

```bash
uv sync
python -m pytest --cov=. --cov-report=term-missing
ruff check src tests
ruff format src tests
```

## Testing Philosophy

- Unit tests cover every helper directly.
- Integration tests simulate the full pipeline with a stub engine.
- Network calls are mocked; CI never touches real APIs.

## License

MIT
</document_content>
</document>

<document index="10">
<source>SPEC.md</source>
<document_content>
Here's the edited version of your technical specification, with all the facts and functionality preserved but the language tightened, cleaned up, and made more direct:

---

# Abersetz Technical Specification

## 1. Overview

`abersetz` is a Python package and command-line tool for translating file contents. It works by identifying files, splitting their content into chunks, translating those chunks, and reassembling them into translated files.

## 2. Core Functionality

### 2.1. File Handling

- **Input:** Accepts a path to either a single file or a directory.
- **File Discovery:** If given a directory, recursively finds files to translate. Controlled by the `--recurse` flag.
- **Output:** Two modes supported:
  - Save translated files in a specified output directory, preserving the source directory structure.
  - Overwrite original files with translated content using the `--write_over` flag.

### 2.2. Translation Pipeline

Translation follows these steps:

1. **Locate:** Identify files based on input path and recursion settings.
2. **Chunk:** Split each file’s content into smaller pieces suitable for the translation engine.
3. **Translate:** Send chunks to the selected translation engine.
4. **Merge:** Reassemble translated chunks into full file content.
5. **Save:** Write the result to the destination.

### 2.3. Content-Type Detection

- Automatically detects HTML content and processes it in a way that preserves markup during translation.

## 3. Translation Engines

Supports multiple translation engines.

### 3.1. Pre-integrated Engines

- Integrates with `translators` and `deep-translator`, allowing use of any of their supported services (e.g., `google`, `bing`, `deepl`).

### 3.2. Custom LLM-based Engines

#### 3.2.1. `hysf` Engine

- **Provider:** Siliconflow  
- **Model:** `tencent/Hunyuan-MT-7B`  
- **Implementation:** Uses the `openai` package to call the Siliconflow API endpoint: `https://api.siliconflow.com/v1/chat/completions`  
- **Authentication:** Pulls API key from configuration  
- **Resilience:** API calls include retry logic via `tenacity`

#### 3.2.2. `ullm` (Universal Large Language Model) Engine

- **Configurability:** Fully configurable per provider. Each profile includes:
  - API base URL  
  - Model name  
  - API key or environment variable reference  
  - Temperature  
  - Chunk size  
  - Maximum input token length  

- **voc Management:**
  - First chunk can include a "prolog" containing a JSON object of predefined vocabulary (`voc`)
  - Prompt instructs the LLM to return translation inside `<output>` tags
  - Optionally, the LLM may return updated vocabulary in `<voc>` tags
  - Tool parses returned `<voc>`, merges it with existing terms, and passes updated voc to subsequent chunks

- **voc Persistence:**
  - `--save-voc` flag saves merged vocabulary as a JSON file alongside the translated output

## 4. Configuration

- Stored in a user-specific directory using `platformdirs`
- API keys are stored securely, either directly or via environment variable names
- Supports engine-specific settings like chunk sizes

## 5. Command-Line Interface (CLI)

- Built using `python-fire`
- Main command: `translate`

### CLI Arguments

- `path`: Input file or directory
- `--from-lang`: Source language (default: `auto`)
- `--to-lang`: Target language (default: `en`)
- `--engine`: Translation engine to use
- `--recurse` / `--no-recurse`: Enable or disable recursive file discovery
- `--write_over`: Overwrite original files instead of saving to output directory
- `--output`: Directory to save translated files
- `--save-voc`: Save vocabulary file

## 6. Python API

- Provides programmatic access for integration into other Python projects

## 7. Dependencies

- `translators`  
- `deep-translator`  
- `openai`  
- `tenacity`  
- `platformdirs`  
- `python-fire`  
- `semantic-text-splitter` (or equivalent for chunking)

--- 

Let me know if you'd like this formatted for markdown or rendered as plain text.
</document_content>
</document>

<document index="11">
<source>TASKS.md</source>
<document_content>
---
this_file: TASKS.md
---
# Abersetz Evolution Plan (Issue #200)

## Scope (One Sentence)
Deliver a responsive translation CLI that defaults to short engine selectors, validates every configured engine end-to-end, and ships with polished docs, examples, and tests that make abersetz easy to adopt and extend.

## Guiding Principles
- Preserve backward compatibility via aliases while promoting the short selector format (`tr/google`, `dt/deepl`, `ll/default`, etc.).
- Prefer existing, battle-tested packages (`translators`, `deep-translator`, httpx, rich) over custom reinventions.
- Ship every change with automated tests, documentation, and runnable examples.
- Prioritize fast feedback: run targeted pytest suites and smoke the CLI for every phase.

## Phase 4 – Auto-Configuration & Engine Research Enhancements
**Goal**: Broaden provider awareness and produce smarter defaults using the research in `external/` and recent API trends.
- Automate provider metadata extraction from `external/translators.txt`, `external/deep-translator.txt`, and current API research so discovery stays accurate without manual updates.
- Sync pricing/tier hints into setup output, highlighting free/community tiers and optional paid upgrades.
- Add structured hints for optional packages the user might need (for example `translators[google]`).
- [x] Allow users to opt into community/self-hosted engines such as LibreTranslate with a `--include-community` flag.
- Document every provider addition in `DEPENDENCIES.md` with justification referencing external sources.

## Phase 5 – Documentation, Examples, and Tests
**Goal**: Keep abersetz approachable with real-world material and strong guardrails.
- Update user-facing docs (`README.md`, `CLAUDE.md`, `CHANGELOG.md`, `docs/`) whenever selectors, validation workflows, or setup guidance changes.
- Expand `WORK.md` logging templates to capture validation runs and outcomes per session.
- Maintain at least three runnable examples in `examples/`: multi-file translation, validation summary report, and config diff before/after setup.
- Extend `docs/` (or README) with guidance on picking engines based on cost and availability, drawing on the provider research above.
- Ensure tests cover selector normalization, CLI output, validation command, setup integration, and documentation link checks.

## Maintenance Sprint – CLI Option Guardrails *(Completed)*
**Objective**: Backfill regression coverage for CLI option validation and propagation so user-facing flags behave predictably without introducing new functionality.

## Micro Sprint – README + CLI Option Defaults *(Completed)*
**Objective**: Keep documentation clean and ensure CLI option defaults resolve predictably.
- Remove assistant preamble/outro from `README.md`.
- Add `_build_options_from_cli` coverage for include defaults when omitted.
- Add `_build_options_from_cli` coverage for output dir resolution.
</document_content>
</document>

<document index="12">
<source>TESTING.md</source>
<document_content>
---
this_file: TESTING.md
---
# Testing Guide

## Running Tests

### Unit Tests
Run the standard test suite:
```bash
python -m pytest
```

With coverage report:
```bash
python -m pytest --cov=. --cov-report=term-missing
```

### Integration Tests
Integration tests make real API calls and are skipped by default to avoid CI complications.

To run integration tests locally:
```bash
export ABERSETZ_INTEGRATION_TESTS=true
python -m pytest tests/test_integration.py -v
```

Some tests require API keys:
```bash
export SILICONFLOW_API_KEY=your-api-key
export ABERSETZ_INTEGRATION_TESTS=true
python -m pytest tests/test_integration.py -v
```

### Test Markers
- `@pytest.mark.integration` - Tests requiring network access
- `@pytest.mark.skipif` - Conditional test execution

### Continuous Testing
Use pytest-watch for automatic test runs:
```bash
uvx pytest-watch -- -xvs
```

## Test Coverage
Current coverage: **91%**

Coverage by module:
- Configuration management: 90%
- Translation pipeline: 97%
- CLI interface: 78%
- Engine abstractions: 82%

## Testing Best Practices
1. Write tests first (TDD)
2. Test edge cases: empty inputs, None values, large inputs
3. Mock external services in unit tests
4. Use integration tests sparingly for API validation
5. Keep tests focused and independent
6. Use descriptive test names: `test_function_when_condition_then_result`
</document_content>
</document>

<document index="13">
<source>TODO.md</source>
<document_content>
---
this_file: TODO.md
---
## Active TODO Items
- [x] Add `--include-community` flag to setup for community/self-hosted engines.
- [ ] Automate provider metadata extraction from `external/translators.txt` and `external/deep-translator.txt`.
- [ ] Sync pricing/tier hints into setup output using current provider research.
- [ ] Add structured hints for optional packages (for example `translators[google]`).
- [ ] Add docs guidance on picking engines based on cost and availability.
- [ ] Ensure docs link checks/CLI validation flows have regression coverage.
</document_content>
</document>

<document index="14">
<source>build.sh</source>
<document_content>
#!/usr/bin/env bash
cd "$(dirname "$0")"
uvx hatch clean; 
fd -e py -x autoflake {}; 
fd -e py -x pyupgrade --py311-plus {}; 
fd -e py -x ruff check --output-format=github --fix --unsafe-fixes {}; 
fd -e py -x ruff format --respect-gitignore --target-version py311 {};
uvx hatch fmt;
llms .;
gitnextver .; 
uvx hatch build;
uv publish;
</document_content>
</document>

# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/examples/advanced_api.py
# Language: python

import asyncio
import json
from dataclasses import dataclass, field
from pathlib import Path
from abersetz import TranslationResult, TranslatorOptions, translate_path
from abersetz.config import AbersetzConfig, load_config
from abersetz.engines import EngineRequest, create_engine
import sys

class _LanguageStats:
    def record((self, result: TranslationResult)) -> None:
    def to_dict((self)) -> dict[str, object]:

class _ReportFile:
    def to_dict((self)) -> dict[str, object]:

class TranslationWorkflow:
    def __init__((self, config: AbersetzConfig | None = None)):
    def translate_project((
        self, source_dir: str, target_langs: list[str], engine: str = "tr/google"
    )):
    def generate_report((self, output_file: str = "translation_report.json")):

class vocManager:
    def __init__((self)):
    def load_voc((self, file_path: str, lang_pair: str)):
    def merge_vocabularies((self, *lang_pairs: str)) -> dict[str, str]:
    def translate_with_consistency((
        self, files: list[str], to_lang: str, base_voc: dict[str, str] | None = None
    )):

class ParallelTranslator:
    def translate_with_engine((self, text: str, engine_name: str, to_lang: str)):
    def compare_translations((self, text: str, engines: list[str], to_lang: str)):

class IncrementalTranslator:
    def __init__((self, checkpoint_file: str = ".translation_checkpoint.json")):
    def load_checkpoint((self)) -> set:
    def save_checkpoint((self)):
    def translate_incrementally((self, source_dir: str, to_lang: str)):

def record((self, result: TranslationResult)) -> None:

def to_dict((self)) -> dict[str, object]:

def from_result((cls, result: TranslationResult)) -> "_ReportFile":

def to_dict((self)) -> dict[str, object]:

def __init__((self, config: AbersetzConfig | None = None)):

def translate_project((
        self, source_dir: str, target_langs: list[str], engine: str = "tr/google"
    )):

def generate_report((self, output_file: str = "translation_report.json")):

def __init__((self)):

def load_voc((self, file_path: str, lang_pair: str)):

def merge_vocabularies((self, *lang_pairs: str)) -> dict[str, str]:

def translate_with_consistency((
        self, files: list[str], to_lang: str, base_voc: dict[str, str] | None = None
    )):

def translate_with_engine((self, text: str, engine_name: str, to_lang: str)):

def compare_translations((self, text: str, engines: list[str], to_lang: str)):

def example_multi_language(()):

def example_voc_consistency(()):

def example_parallel_comparison(()):

def example_incremental_translation(()):

def __init__((self, checkpoint_file: str = ".translation_checkpoint.json")):

def load_checkpoint((self)) -> set:

def save_checkpoint((self)):

def translate_incrementally((self, source_dir: str, to_lang: str)):


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/examples/basic_api.py
# Language: python

from collections.abc import Callable
from pathlib import Path
from abersetz import TranslatorOptions, translate_path
from abersetz.config import load_config, save_config
from abersetz.config import Credential, EngineConfig
import sys

FALLBACK_DESCRIPTION = =

def format_example_doc((func: Callable[..., object])) -> str:

def example_simple(()):

def example_batch(()):

def example_llm_with_voc(()):

def example_dry_run(()):

def example_html(()):

def example_with_config(()):


<document index="15">
<source>examples/batch_translate.sh</source>
<document_content>
#!/bin/bash
# this_file: examples/batch_translate.sh

# Advanced batch translation scripts

set -e  # Exit on error

# Configuration
PROJECT_ROOT="${1:-./docs}"
OUTPUT_BASE="${2:-./translations}"
LANGUAGES=("es" "fr" "de" "ja" "zh-CN" "pt" "it" "ru")
ENGINE="${ABERSETZ_ENGINE:-tr/google}"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

echo -e "${BLUE}=== Abersetz Batch Translation ===${NC}"
echo "Source: $PROJECT_ROOT"
echo "Output: $OUTPUT_BASE"
echo "Engine: $ENGINE"
echo ""

# Function to translate to a single language
translate_lang() {
    local lang=$1
    local output_dir="$OUTPUT_BASE/$lang"

    echo -e "${BLUE}Translating to $lang...${NC}"

    if abersetz tr "$lang" "$PROJECT_ROOT" \ \
        --engine "$ENGINE" \
        --output "$output_dir" \
        --recurse \
        --include "*.md,*.txt,*.html" \
        --xclude ".*,*test*,*draft*"; then
        echo -e "${GREEN}✓ $lang completed${NC}"
        return 0
    else
        echo -e "${RED}✗ $lang failed${NC}"
        return 1
    fi
}

# Create output directory
mkdir -p "$OUTPUT_BASE"

# Track results
SUCCESS_COUNT=0
FAILED_LANGS=()

# Translate to each language
for lang in "${LANGUAGES[@]}"; do
    if translate_lang "$lang"; then
        ((SUCCESS_COUNT++))
    else
        FAILED_LANGS+=("$lang")
    fi
    echo ""
done

# Summary
echo -e "${BLUE}=== Translation Summary ===${NC}"
echo "Successfully translated to $SUCCESS_COUNT/${#LANGUAGES[@]} languages"

if [ ${#FAILED_LANGS[@]} -gt 0 ]; then
    echo -e "${RED}Failed languages: ${FAILED_LANGS[*]}${NC}"
    exit 1
else
    echo -e "${GREEN}All translations completed successfully!${NC}"
fi

# Generate index file
INDEX_FILE="$OUTPUT_BASE/index.md"
echo "# Translations" > "$INDEX_FILE"
echo "" >> "$INDEX_FILE"
echo "Available translations of $PROJECT_ROOT:" >> "$INDEX_FILE"
echo "" >> "$INDEX_FILE"

for lang in "${LANGUAGES[@]}"; do
    if [ -d "$OUTPUT_BASE/$lang" ]; then
        file_count=$(find "$OUTPUT_BASE/$lang" -type f | wc -l)
        echo "- [$lang]($lang/) - $file_count files" >> "$INDEX_FILE"
    fi
done

echo -e "${GREEN}Index generated at $INDEX_FILE${NC}"
</document_content>
</document>

<document index="16">
<source>examples/config_setup.sh</source>
<document_content>
#!/bin/bash
# this_file: examples/config_setup.sh

# Setup and configure abersetz with various engines

set -e

echo "=== Abersetz Configuration Setup ==="
echo ""

# Function to check if command exists
command_exists() {
    command -v "$1" >/dev/null 2>&1
}

# Function to setup environment variable
setup_env_var() {
    local var_name=$1
    local var_description=$2

    if [ -z "${!var_name:-}" ]; then
        echo "⚠ $var_name not set"
        echo "  Description: $var_description"
        echo "  To set: export $var_name='your_api_key_here'"
        return 1
    else
        echo "✓ $var_name is configured"
        return 0
    fi
}

# Check abersetz installation
echo "Checking installation..."
if command_exists abersetz; then
    echo "✓ abersetz is installed"
    abersetz version
else
    echo "✗ abersetz not found. Install with: pip install abersetz"
    exit 1
fi

echo ""

# Show config location
echo "Configuration location:"
abersetz config path
echo ""

# Check API keys for various engines
echo "Checking API keys for LLM engines:"
echo ""

setup_env_var "OPENAI_API_KEY" "OpenAI API for GPT models"
setup_env_var "ANTHROPIC_API_KEY" "Anthropic API for Claude models"
setup_env_var "SILICONFLOW_API_KEY" "SiliconFlow API for Hunyuan translation"
setup_env_var "DEEPSEEK_API_KEY" "DeepSeek API for Chinese models"
setup_env_var "GROQ_API_KEY" "Groq API for fast inference"
setup_env_var "GOOGLE_API_KEY" "Google API for Gemini models"

echo ""

# Test available engines
echo "Testing available engines:"
echo ""

# Test free engines (no API key required)
echo "1. Testing free engines..."
for engine in "tr/google" "tr/bing" "dt/google"; do
    echo -n "  $engine: "
    if echo "Hello" | abtr es - --engine "$engine" --dry-run >/dev/null 2>&1; then
        echo "✓"
    else
        echo "✗"
    fi
done

echo ""

# Create sample configuration
CONFIG_FILE="$HOME/.config/abersetz/config.toml"
if [ ! -f "$CONFIG_FILE" ]; then
    echo "Creating default configuration..."
    mkdir -p "$(dirname "$CONFIG_FILE")"
    cat > "$CONFIG_FILE" <<'EOF'
[defaults]
engine = "tr/google"
from_lang = "auto"
to_lang = "en"
chunk_size = 1200
html_chunk_size = 1800

[credentials.openai]
env = "OPENAI_API_KEY"

[credentials.anthropic]
env = "ANTHROPIC_API_KEY"

[credentials.siliconflow]
env = "SILICONFLOW_API_KEY"

[credentials.deepseek]
env = "DEEPSEEK_API_KEY"

[credentials.groq]
env = "GROQ_API_KEY"

[credentials.google]
env = "GOOGLE_API_KEY"

[engines.hysf]
chunk_size = 2400

[engines.hysf.credential]
name = "siliconflow"

[engines.hysf.options]
model = "tencent/Hunyuan-MT-7B"
base_url = "https://api.siliconflow.com/v1"
temperature = 0.3

[engines.ullm]
chunk_size = 2400

[engines.ullm.options.profiles.default]
base_url = "https://api.siliconflow.com/v1"
model = "tencent/Hunyuan-MT-7B"
credential = { name = "siliconflow" }
temperature = 0.3
max_input_tokens = 32000

[engines.ullm.options.profiles.gpt4]
base_url = "https://api.openai.com/v1"
model = "gpt-4-turbo-preview"
credential = { name = "openai" }
temperature = 0.3
max_input_tokens = 128000

[engines.ullm.options.profiles.claude]
base_url = "https://api.anthropic.com/v1"
model = "claude-3-opus-20240229"
credential = { name = "anthropic" }
temperature = 0.3
max_input_tokens = 200000

[engines.ullm.options.profiles.deepseek]
base_url = "https://api.deepseek.com/v1"
model = "deepseek-chat"
credential = { name = "deepseek" }
temperature = 0.3
max_input_tokens = 32000
EOF
    echo "✓ Configuration created at $CONFIG_FILE"
else
    echo "Configuration already exists at $CONFIG_FILE"
fi

echo ""

# Show current configuration
echo "Current configuration:"
abersetz config show | head -20
echo "..."

echo ""
echo "=== Setup Complete ==="
echo ""
echo "Quick test commands:"
echo "  abersetz tr es test.txt                    # Use default engine"
echo "  abtr fr test.txt --engine tr/bing # Use Bing"
echo "  abtr de test.txt --engine hy             # Use SiliconFlow LLM"
echo "  abtr ja test.txt --engine ullm/gpt4        # Use GPT-4"
</document_content>
</document>

<document index="17">
<source>examples/engines_config.json</source>
<document_content>
{
  "defaults": {
    "engine": "tr/google",
    "from_lang": "auto",
    "to_lang": "en",

... (Data file content truncated to first 5 lines)
</document_content>
</document>

<document index="18">
<source>examples/pipeline.sh</source>
<document_content>
#!/bin/bash
# this_file: examples/pipeline.sh

# Complete translation pipeline with preprocessing and postprocessing

set -euo pipefail

# Configuration
SOURCE_DIR="${1:-.}"
TARGET_LANG="${2:-es}"
WORK_DIR="/tmp/abersetz_work_$$"
FINAL_OUTPUT="${3:-./translated_$TARGET_LANG}"

# Setup work directory
mkdir -p "$WORK_DIR"
trap "rm -rf $WORK_DIR" EXIT

echo "=== Abersetz Translation Pipeline ==="
echo "Source: $SOURCE_DIR"
echo "Target language: $TARGET_LANG"
echo "Output: $FINAL_OUTPUT"
echo ""

# Step 1: Find and copy translatable files
echo "Step 1: Collecting files..."
find "$SOURCE_DIR" -type f \( \
    -name "*.md" -o \
    -name "*.txt" -o \
    -name "*.html" -o \
    -name "*.htm" \
\) -not -path "*/\.*" -not -path "*/node_modules/*" \
   -not -path "*/venv/*" -not -path "*/__pycache__/*" | while read -r file; do
    rel_path="${file#$SOURCE_DIR/}"
    dest="$WORK_DIR/source/$rel_path"
    mkdir -p "$(dirname "$dest")"
    cp "$file" "$dest"
done

FILE_COUNT=$(find "$WORK_DIR/source" -type f 2>/dev/null | wc -l || echo 0)
echo "  Found $FILE_COUNT files"

if [ "$FILE_COUNT" -eq 0 ]; then
    echo "No files to translate!"
    exit 1
fi

# Step 2: Preprocess files (optional)
echo -e "\nStep 2: Preprocessing..."
# Example: Convert markdown links to absolute URLs
# find "$WORK_DIR/source" -name "*.md" -exec sed -i.bak 's|\](./|\](https://example.com/|g' {} \;
echo "  Preprocessing complete"

# Step 3: Translate
echo -e "\nStep 3: Translating..."
if abersetz tr "$TARGET_LANG" "$WORK_DIR/source" \ \
    --output "$WORK_DIR/translated" \
    --recurse; then
    echo "  Translation complete"
else
    echo "  Translation failed!"
    exit 1
fi

# Step 4: Postprocess translations
echo -e "\nStep 4: Postprocessing..."
# Example: Fix common translation issues
find "$WORK_DIR/translated" -type f -name "*.md" | while read -r file; do
    # Fix code blocks that might have been translated
    sed -i.bak 's/```[a-z]*$/```/g' "$file"
    # Remove backup files
    rm -f "${file}.bak"
done
echo "  Postprocessing complete"

# Step 5: Generate translation report
echo -e "\nStep 5: Generating report..."
REPORT_FILE="$WORK_DIR/translated/TRANSLATION_REPORT.md"
cat > "$REPORT_FILE" <<EOF
# Translation Report

## Summary
- **Source Directory**: $SOURCE_DIR
- **Target Language**: $TARGET_LANG
- **Date**: $(date)
- **Files Translated**: $FILE_COUNT

## File List
EOF

find "$WORK_DIR/translated" -type f -not -name "TRANSLATION_REPORT.md" | while read -r file; do
    rel_path="${file#$WORK_DIR/translated/}"
    size=$(wc -c < "$file")
    echo "- $rel_path ($(numfmt --to=iec-i --suffix=B $size))" >> "$REPORT_FILE"
done

echo "  Report generated"

# Step 6: Copy to final destination
echo -e "\nStep 6: Copying to final destination..."
rm -rf "$FINAL_OUTPUT"
cp -r "$WORK_DIR/translated" "$FINAL_OUTPUT"
echo "  Files copied to $FINAL_OUTPUT"

# Step 7: Verification
echo -e "\nStep 7: Verification..."
TRANSLATED_COUNT=$(find "$FINAL_OUTPUT" -type f -not -name "TRANSLATION_REPORT.md" | wc -l)
if [ "$TRANSLATED_COUNT" -eq "$FILE_COUNT" ]; then
    echo "  ✓ All files translated successfully"
else
    echo "  ⚠ Warning: Expected $FILE_COUNT files, found $TRANSLATED_COUNT"
fi

echo -e "\n=== Pipeline Complete ==="
echo "Translated files are in: $FINAL_OUTPUT"
echo "Report available at: $FINAL_OUTPUT/TRANSLATION_REPORT.md"
</document_content>
</document>

<document index="19">
<source>examples/pl/poem_en.txt</source>
<document_content>
Być lub nie, to jest pytanie: 
Czy to jest szlachetne w umyśle, by cierpieć 
Procy i strzały oburzającej fortuny, 
Lub wziąć broń do morza kłopotówI przeciwstawiając się ich zakończeni. Umrzeć - spać, 
Więcej nie; i snem, mówiąc, że kończymy 
Ból serca i tysiące naturalnych wstrząsów 
To ciało jest spadkobiercą: „to jest konsumpcjaPobożne, aby być życzeniem. Umrzeć, spać; 
Spać, w stanie marzyć - powie, jest pocieranie: 
Bo w tym śnie śmierci, jakie sny mogą nadejść, 
Kiedy odrzuciliśmy tę śmiertelną cewkę,Musi nam się zatrzymać - jest szacunek 
To powoduje katastrofę tak długiego życia. 
Bo kto nosiłby bicze i pogardy czasu, 
Th’ -upresor jest w błędzie, dumny człowiek jest skryty,Błędności z niepoprawami, opóźnienie prawa, 
Bezczelność urzędu i odmienne 
Ta zaleca pacjenta z powodu tego, co bierze, 
Kiedy on sam mógłby zrobić jego ciszyZ gołym bodkinem? Kto by uporządkował niedźwiedzie, 
Chrząkać i poci się w zmęczonym życiu, 
Ale ten strach przed czymś po śmierci, 
Niedopolowy kraj, od którego kouringuŻaden podróżnik nie wraca, zagadnia testament, 
I sprawia, że ​​raczej nosimy te choroby, które mamy 
Niż latać do innych, o których nie wiemy? 
Zatem sumienie, czyni z nas tchórzów,A zatem rodzime odcień rozdzielczości 
Jest chory z bladą obsadą myśli, 
Oraz przedsiębiorstwa o wielkim rdzeniu i momencie 
Z tego powodu ich prądy zmieniają się 
I stracić nazwę akcji.
</document_content>
</document>

<document index="20">
<source>examples/pl/poem_pl.txt</source>
<document_content>
# this_file: examples/poem_pl.txtŚwit spływa po dachach,Dzwony lśnią w porannej mgle,Sąsiedzi wymieniają pozdrowienia,A nadzieja znów czuje się jak dar.
</document_content>
</document>

<document index="21">
<source>examples/poem_en.txt</source>
<document_content>
To be, or not to be, that is the question:
Whether ’tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles
And by opposing end them. To die—to sleep,
No more; and by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to: ’tis a consummation
Devoutly to be wish’d. To die, to sleep;
To sleep, perchance to dream—ay, there’s the rub:
For in that sleep of death what dreams may come,
When we have shuffled off this mortal coil,
Must give us pause—there’s the respect
That makes calamity of so long life.
For who would bear the whips and scorns of time,
Th’oppressor’s wrong, the proud man’s contumely,
The pangs of dispriz’d love, the law’s delay,
The insolence of office, and the spurns
That patient merit of th’unworthy takes,
When he himself might his quietus make
With a bare bodkin? Who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscovere’d country, from whose bourn
No traveller returns, puzzles the will,
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience doth make cowards of us all,
And thus the native hue of resolution
Is sicklied o’er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry
And lose the name of action.
</document_content>
</document>

<document index="22">
<source>examples/poem_pl.txt</source>
<document_content>
# this_file: examples/poem_pl.txt

Świt spływa po dachach,
Dzwony lśnią w porannej mgle,
Sąsiedzi wymieniają pozdrowienia,
A nadzieja znów czuje się jak dar.
</document_content>
</document>

<document index="23">
<source>examples/translate.sh</source>
<document_content>
#!/bin/bash
# this_file: examples/translate.sh

# Basic shell script examples for abersetz CLI

# Example 1: Simple translation
echo "=== Example 1: Simple translation ==="
abersetz tr es poem_en.txt --engine tr/google

# Example 2: Using shorthand command
echo -e "\n=== Example 2: Shorthand command ==="
abtr fr poem_en.txt

# Example 3: Translate directory recursively
echo -e "\n=== Example 3: Directory translation ==="
abersetz tr de ./docs --recurse --output ./docs_de

# Example 4: Translate with specific patterns
echo -e "\n=== Example 4: Pattern matching ==="
abtr ja . --include "*.md,*.txt" --xclude "*test*,.*" --output ./translations/ja

# Example 5: write_over original files (be careful!)
echo -e "\n=== Example 5: In-place translation ==="
# abersetz tr es backup_first.txt --write_over

# Example 6: Dry run to test without translating
echo -e "\n=== Example 6: Dry run mode ==="
abersetz tr zh-CN ./project --dry-run

# Example 7: Using different engines
echo -e "\n=== Example 7: Different engines ==="
# Google Translate
abtr pt file.txt --engine tr/google

# Bing Translate
abtr pt file.txt --engine tr/bing

# DeepL via deep-translator
abtr pt file.txt --engine dt/deepl

# Example 8: Save voc for LLM engines
echo -e "\n=== Example 8: LLM with voc ==="
# Requires SILICONFLOW_API_KEY environment variable
# abersetz tr es technical.md --engine hy --save-voc

# Example 9: Verbose mode for debugging
echo -e "\n=== Example 9: Verbose output ==="
abersetz tr fr test.txt --verbose --dry-run

# Example 10: Check version
echo -e "\n=== Example 10: Version check ==="
abersetz version
</document_content>
</document>

<document index="24">
<source>examples/validate_report.sh</source>
<document_content>
#!/bin/bash
# this_file: examples/validate_report.sh

set -euo pipefail

OUTPUT_FILE=${1:-validate-report.txt}

if ! command -v abersetz >/dev/null 2>&1; then
    echo "abersetz executable not found. Install with: pip install abersetz" >&2
    exit 1
fi

echo "Running abersetz validate (target language: es)..."
abersetz validate --target-lang es >"$OUTPUT_FILE"

echo "Validation summary written to $OUTPUT_FILE"
cat "$OUTPUT_FILE"
</document_content>
</document>

<document index="25">
<source>examples/vocab.json</source>
<document_content>
{
  "this_file": "examples/vocab.json",
  "terms": {
    "rooftops": "dachy",
    "mist": "mgła",

... (Data file content truncated to first 5 lines)
</document_content>
</document>

<document index="26">
<source>examples/walkthrough.md</source>
<document_content>
---
this_file: examples/walkthrough.md
---
# Sample Translation Walkthrough

```bash
abersetz tr planslate examples/poem_en.txt \
  --engine hysf \
  --output examples/out \
  --save-voc \
  --verbose
```

This command translates the poem and writes the result to `examples/out/poem_en.txt`. The vocabulary file is saved as `examples/out/poem_en.txt.voc
</document_content>
</document>

<document index="27">
<source>md.txt</source>
<document_content>



/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/DEPENDENCIES.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/docs/api.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/docs/cli.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/docs/configuration.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/docs/index.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/docs/installation.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/examples/walkthrough.md

/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/IDEA.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/issues/102-review.md



/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/README.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/SPEC.md
/Users/adam/Developer/vcs/github.twardoch/pub/abersetz/TESTING.md
</document_content>
</document>

<document index="28">
<source>package.toml</source>
<document_content>
# Package configuration
[package]
include_cli = true        # Include CLI boilerplate
include_logging = true    # Include logging setup
use_pydantic = true      # Use Pydantic for data validation
use_rich = true          # Use Rich for terminal output

[features]
mkdocs = false           # Enable MkDocs documentation
vcs = true              # Initialize Git repository
github_actions = true   # Add GitHub Actions workflows 
</document_content>
</document>

<document index="29">
<source>pyproject.toml</source>
<document_content>
# this_file: pyproject.toml

[build-system]
requires = ["hatchling>=1.27", "hatch-vcs>=0.4"]
build-backend = "hatchling.build"

[project]
name = "abersetz"
description = ""
readme = "README.md"
requires-python = ">=3.10"
dynamic = ["version"]
dependencies = [
    "deep-translator>=1.11",
    "fire>=0.5",
    "httpx>=0.25",
    "loguru>=0.7",
    "langcodes>=3.4",
    "platformdirs>=4.3",
    "rich>=13.9",
    "semantic-text-splitter>=0.7",
    "tenacity>=8.4",
    "translators>=5.9",
    "tomli-w>=1.0",
    "tomli>=2.0; python_version < \"3.11\"",
    "language-data>=1.4.0",
]

[[project.authors]]
name = "Adam Twardoch"
email = "adam+github@twardoch.com"

[project.license]
text = "MIT"

[project.urls]
Documentation = "https://github.com/twardoch/abersetz#readme"
Issues = "https://github.com/twardoch/abersetz/issues"
Source = "https://github.com/twardoch/abersetz"

[project.scripts]
abersetz = "abersetz.cli_fast:main"
abtr = "abersetz.cli:abtr_main"

[dependency-groups]
dev = [
    "pytest>=8.3",
    "pytest-cov>=6.0",
    "ruff>=0.9",
    "mypy>=1.10",
]

[tool.hatch.version]
source = "vcs"

[tool.hatch.build]
xclude = ["/dist"]

[tool.hatch.build.targets.wheel]
packages = ["src/abersetz"]

[tool.hatch.build.hooks.vcs]
version-file = "src/abersetz/__about__.py"

[tool.hatch.envs.default]
python = "3.12"
dependencies = [
    "pytest>=8.3",
    "pytest-cov>=6.0",
    "ruff>=0.9",
    "mypy>=1.10",
]

[tool.hatch.envs.default.scripts]
test = "pytest {args:tests}"
lint = "ruff check {args:src tests}"
fmt = "ruff format {args:src tests}"
default = ["fmt", "lint", "test"]

[tool.uv]
default-groups = ["dev"]
python-preference = "managed"

[tool.ruff]
line-length = 100
target-version = "py310"

[tool.ruff.lint]
select = ["E", "F", "B", "I", "UP", "SIM"]
ignore = ["E203", "E501"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.pytest.ini_options]
addopts = "-q"
testpaths = ["tests"]
markers = [
    "integration: mark test as integration test (requires network/API access)",
]

[tool.mypy]
python_version = "3.12"

[[tool.mypy.overrides]]
module = [
    "pytest",
    "pytest.*",
    "httpx",
    "httpx.*",
    "tenacity",
    "tenacity.*",
    "semantic_text_splitter",
    "semantic_text_splitter.*",
    "platformdirs",
    "platformdirs.*",
    "tomli_w",
    "loguru",
    "loguru.*",
    "langcodes",
    "langcodes.*",
    "rich",
    "rich.*",
    "requests",
    "requests.*",
    "translators",
    "translators.*",
]
ignore_missing_imports = true
</document_content>
</document>

# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/__init__.py
# Language: python

from importlib import metadata as _metadata
from typing import TYPE_CHECKING, Any
from .pipeline import PipelineError, TranslationResult, TranslatorOptions, translate_path
from . import pipeline
from .__about__ import __version__

_LAZY_IMPORTS = :

def __getattr__((name: str)) -> Any:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/__main__.py
# Language: python

from .cli import main


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/abersetz.py
# Language: python

from .pipeline import PipelineError, TranslationResult, TranslatorOptions, translate_path


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/chunking.py
# Language: python

import re
from collections.abc import Iterable
from enum import Enum
from semantic_text_splitter import TextSplitter

_HTML_PATTERN = =

class TextFormat(E, n, u, m):

def detect_format((text: str)) -> TextFormat:

def _fallback_chunks((text: str, max_size: int)) -> list[str]:

def _semantic_chunks((text: str, max_size: int)) -> Iterable[str]:

def chunk_text((text: str, max_size: int, fmt: TextFormat)) -> list[str]:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/cli.py
# Language: python

import json
import sys
from collections.abc import Iterable, Sequence
from pathlib import Path
import fire
import tomli_w
from loguru import logger
from rich.console import Console
from rich.table import Table
from .config import config_path, load_config
from .engine_catalog import (
    DEEP_TRANSLATOR_PAID_PROVIDERS,
    PAID_TRANSLATOR_PROVIDERS,
    EngineEntry,
    collect_deep_translator_providers,
    collect_translator_providers,
    normalize_selector,
)
from .pipeline import PipelineError, TranslationResult, TranslatorOptions, translate_path
from .setup import setup_command
from .validation import ValidationResult, validate_engines
from langcodes import get
from langcodes.language_lists import CLDR_LANGUAGES
from . import __version__
from langcodes import get

POPULAR_LANG_CODES = =

class ConfigCommands:
    def show((self)) -> str:
    def path((self)) -> str:

class AbersetzCLI:
    def version((self)) -> str:
    def tr((
        self,
        to_lang: str,
        path: str | Path,
        *,
        engine: str | None = None,
        from_lang: str | None = None,
        recurse: bool = True,
        write_over: bool = False,
        output: str | Path | None = None,
        save_voc: bool = False,
        chunk_size: int | None = None,
        html_chunk_size: int | None = None,
        include: str | Sequence[str] | None = None,
        xclude: str | Sequence[str] | None = None,
        dry_run: bool = False,
        prolog: str | None = None,
        voc: str | None = None,
        verbose: bool = False,
    )) -> None:
    def config((self)) -> ConfigCommands:
    def lang((self)) -> list[str]:
    def engines((
        self,
        include_paid: bool = False,
        *,
        family: str | None = None,
        configured_only: bool = False,
    )) -> None:
    def setup((
        self,
        non_interactive: bool = False,
        verbose: bool = False,
        include_community: bool = False,
    )) -> None:
    def validate((
        self,
        *,
        selectors: str | Sequence[str] | None = None,
        target_lang: str = "es",
        source_lang: str = "auto",
        sample_text: str = "Hello, world!",
        include_defaults: bool = True,
    )) -> list[ValidationResult]:

def _configure_logging((verbose: bool)) -> None:

def _parse_patterns((value: str | Sequence[str] | None)) -> tuple[str, ...]:

def _load_json_data((reference: str | None)) -> dict[str, str]:

def _render_results((results: Iterable[TranslationResult])) -> None:

def _render_engine_entries((entries: list[EngineEntry])) -> None:

def _render_validation_entries((results: list[ValidationResult])) -> None:

def _collect_engine_entries((
    include_paid: bool,
    *,
    family: str | None = None,
    configured_only: bool = False,
)) -> list[EngineEntry]:

def show((self)) -> str:

def path((self)) -> str:

def _validate_language_code((code: str | None, param_name: str)) -> str | None:

def _build_options_from_cli((
    path: str | Path,
    *,
    engine: str | None,
    from_lang: str | None,
    to_lang: str | None,
    recurse: bool,
    write_over: bool,
    output: str | Path | None,
    save_voc: bool,
    chunk_size: int | None,
    html_chunk_size: int | None,
    include: str | Sequence[str] | None,
    xclude: str | Sequence[str] | None,
    dry_run: bool,
    prolog: str | None,
    voc: str | None,
)) -> TranslatorOptions:

def _iter_language_rows(()) -> list[str]:

def version((self)) -> str:

def tr((
        self,
        to_lang: str,
        path: str | Path,
        *,
        engine: str | None = None,
        from_lang: str | None = None,
        recurse: bool = True,
        write_over: bool = False,
        output: str | Path | None = None,
        save_voc: bool = False,
        chunk_size: int | None = None,
        html_chunk_size: int | None = None,
        include: str | Sequence[str] | None = None,
        xclude: str | Sequence[str] | None = None,
        dry_run: bool = False,
        prolog: str | None = None,
        voc: str | None = None,
        verbose: bool = False,
    )) -> None:

def config((self)) -> ConfigCommands:

def lang((self)) -> list[str]:

def engines((
        self,
        include_paid: bool = False,
        *,
        family: str | None = None,
        configured_only: bool = False,
    )) -> None:

def setup((
        self,
        non_interactive: bool = False,
        verbose: bool = False,
        include_community: bool = False,
    )) -> None:

def validate((
        self,
        *,
        selectors: str | Sequence[str] | None = None,
        target_lang: str = "es",
        source_lang: str = "auto",
        sample_text: str = "Hello, world!",
        include_defaults: bool = True,
    )) -> list[ValidationResult]:

def main(()) -> None:

def abtr_main(()) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/cli_fast.py
# Language: python

import sys
from importlib import metadata
from .__about__ import __version__ as version
from .cli import main as cli_main

def handle_version(()) -> None:

def main(()) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/config.py
# Language: python

import copy
import os
from collections.abc import Mapping
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from platformdirs import user_config_dir
from .engine_catalog import (
    DEEP_TRANSLATOR_FREE_PROVIDERS,
    FREE_TRANSLATOR_PROVIDERS,
    HYSF_DEFAULT_MODEL,
    HYSF_DEFAULT_TEMPERATURE,
    normalize_selector,
)
import tomllib
import tomli as tomllib
import tomli_w
from loguru import logger
from loguru import logger

CONFIG_FILENAME = =
DEFAULT_CONFIG_DICT = :

class Defaults:
    def __setattr__((self, name: str, value: Any)) -> None:
    def to_dict((self)) -> dict[str, Any]:

class Credential:
    def to_dict((self)) -> dict[str, str]:

class EngineConfig:
    def to_dict((self)) -> dict[str, Any]:

class AbersetzConfig:
    def to_dict((self)) -> dict[str, Any]:

def __setattr__((self, name: str, value: Any)) -> None:

def to_dict((self)) -> dict[str, Any]:

def from_dict((cls, raw: Mapping[str, Any] | None)) -> Defaults:

def to_dict((self)) -> dict[str, str]:

def from_any((cls, raw: CredentialLike | None)) -> Credential | None:

def to_dict((self)) -> dict[str, Any]:

def from_dict((cls, name: str, raw: Mapping[str, Any] | None)) -> EngineConfig:

def to_dict((self)) -> dict[str, Any]:

def from_dict((cls, raw: Mapping[str, Any])) -> AbersetzConfig:

def _default_dict(()) -> dict[str, Any]:

def _default_config(()) -> AbersetzConfig:

def config_dir(()) -> Path:

def config_path(()) -> Path:

def load_config(()) -> AbersetzConfig:

def save_config((config: AbersetzConfig)) -> None:

def resolve_credential((
    config: AbersetzConfig,
    reference: CredentialLike,
)) -> str | None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/engine_catalog.py
# Language: python

from collections.abc import Iterable
from dataclasses import dataclass
import translators

ENGINE_FAMILY_SHORT_TO_LONG = :
ENGINE_FAMILY_LONG_TO_SHORT = :
FREE_TRANSLATOR_PROVIDERS = =
COMMUNITY_TRANSLATOR_PROVIDERS = =
PAID_TRANSLATOR_PROVIDERS = =
DEEP_TRANSLATOR_FREE_PROVIDERS = =
COMMUNITY_DEEP_TRANSLATOR_PROVIDERS = =
DEEP_TRANSLATOR_PAID_PROVIDERS = =
HYSF_DEFAULT_MODEL = =
HYSF_DEFAULT_TEMPERATURE = =

class EngineEntry:

def _split_selector((selector: str)) -> tuple[str, str | None]:

def normalize_selector((selector: str | None)) -> str | None:

def resolve_engine_reference((selector: str)) -> tuple[str, str | None]:

def _filter_available((pool: Iterable[str], allowed: Iterable[str])) -> list[str]:

def collect_translator_providers((*, include_paid: bool = False)) -> list[str]:

def collect_deep_translator_providers((*, include_paid: bool = False)) -> list[str]:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/engines.py
# Language: python

import json
import re
from collections.abc import Mapping
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Protocol
from tenacity import retry, stop_after_attempt, wait_exponential
from .chunking import TextFormat
from .config import AbersetzConfig, EngineConfig, resolve_credential
from .engine_catalog import (
    HYSF_DEFAULT_MODEL,
    HYSF_DEFAULT_TEMPERATURE,
    normalize_selector,
    resolve_engine_reference,
)
from .openai_lite import OpenAI
from mlx_lm import generate, load
from llama_cpp import Llama
import translators
from deep_translator import (  # type: ignore
                DeeplTranslator,
                GoogleTranslator,
                LibreTranslator,
                LingueeTranslator,
                MicrosoftTranslator,
                MyMemoryTranslator,
                PapagoTranslator,
            )
from langcodes import get as get_language

MTHY_LANGUAGE_DATA = =
MTHY_PROMPT_NO_TERMS = =
MTHY_LANG_MAP = :

class EngineError(R, u, n, t, i, m, e, E, r, r, o, r):

class EngineRequest:

class EngineResult:

class Engine(P, r, o, t, o, c, o, l):
    def translate((self, request: EngineRequest)) -> EngineResult:
    def chunk_size_for((self, fmt: TextFormat)) -> int | None:

class EngineBase:
    def __init__((
        self,
        name: str,
        chunk_size: int | None,
        html_chunk_size: int | None,
    )) -> None:
    def chunk_size_for((self, fmt: TextFormat)) -> int | None:

class LocalMlxEngine(E, n, g, i, n, e, B, a, s, e):
    def __init__((
        self,
        family: str,
        config: EngineConfig,
        model_path: str,
        *,
        max_tokens: int,
    )) -> None:
    def translate((self, request: EngineRequest)) -> EngineResult:

class LocalGgufEngine(E, n, g, i, n, e, B, a, s, e):
    def __init__((
        self,
        family: str,
        config: EngineConfig,
        model_path: str,
        *,
        max_tokens: int,
        temperature: float,
        n_gpu_layers: int,
        n_ctx: int,
    )) -> None:
    def translate((self, request: EngineRequest)) -> EngineResult:

class TranslatorsEngine(E, n, g, i, n, e, B, a, s, e):
    def __init__((self, provider: str, config: EngineConfig)) -> None:
    def translate((self, request: EngineRequest)) -> EngineResult:

class DeepTranslatorEngine(E, n, g, i, n, e, B, a, s, e):
    def __init__((self, provider: str, config: EngineConfig)) -> None:
    def translate((self, request: EngineRequest)) -> EngineResult:

class LlmEngine(E, n, g, i, n, e, B, a, s, e):
    def __init__((
        self,
        config: EngineConfig,
        client: Any,
        *,
        model: str,
        temperature: float,
        static_prolog: Mapping[str, str] | None = None,
    )) -> None:
    def translate((self, request: EngineRequest)) -> EngineResult:
    def _build_messages((
        self,
        request: EngineRequest,
        voc: Mapping[str, str],
        merged: Mapping[str, str],
    )) -> list[dict[str, str]]:
    def _parse_payload((self, payload: str)) -> tuple[str, dict[str, str]]:

class HysfEngine(E, n, g, i, n, e, B, a, s, e):
    def __init__((self, config: EngineConfig, client: Any)) -> None:
    def translate((self, request: EngineRequest)) -> EngineResult:

def _resolve_mthy_language((code: str)) -> str:

def translate((self, request: EngineRequest)) -> EngineResult:

def chunk_size_for((self, fmt: TextFormat)) -> int | None:

def __init__((
        self,
        name: str,
        chunk_size: int | None,
        html_chunk_size: int | None,
    )) -> None:

def chunk_size_for((self, fmt: TextFormat)) -> int | None:

def __init__((
        self,
        family: str,
        config: EngineConfig,
        model_path: str,
        *,
        max_tokens: int,
    )) -> None:

def translate((self, request: EngineRequest)) -> EngineResult:

def __init__((
        self,
        family: str,
        config: EngineConfig,
        model_path: str,
        *,
        max_tokens: int,
        temperature: float,
        n_gpu_layers: int,
        n_ctx: int,
    )) -> None:

def translate((self, request: EngineRequest)) -> EngineResult:

def __init__((self, provider: str, config: EngineConfig)) -> None:

def _translate_with_retry((
        self, text: str, is_html: bool, source_lang: str, target_lang: str
    )) -> str:

def translate((self, request: EngineRequest)) -> EngineResult:

def _get_providers((cls)) -> Mapping[str, type]:

def __init__((self, provider: str, config: EngineConfig)) -> None:

def _translate_with_retry((self, text: str, source_lang: str, target_lang: str)) -> str:

def translate((self, request: EngineRequest)) -> EngineResult:

def __init__((
        self,
        config: EngineConfig,
        client: Any,
        *,
        model: str,
        temperature: float,
        static_prolog: Mapping[str, str] | None = None,
    )) -> None:

def _invoke((self, messages: list[dict[str, str]])) -> str:

def translate((self, request: EngineRequest)) -> EngineResult:

def _build_messages((
        self,
        request: EngineRequest,
        voc: Mapping[str, str],
        merged: Mapping[str, str],
    )) -> list[dict[str, str]]:

def _parse_payload((self, payload: str)) -> tuple[str, dict[str, str]]:

def __init__((self, config: EngineConfig, client: Any)) -> None:

def _invoke((self, message: str)) -> str:

def translate((self, request: EngineRequest)) -> EngineResult:

def _language_name((code: str)) -> str:

def _make_openai_client((token: str, base_url: str | None)) -> OpenAI:

def _build_llm_engine((
    selector: str,
    config: AbersetzConfig,
    engine_cfg: EngineConfig,
    *,
    profile: Mapping[str, Any] | None,
    client: Any | None,
)) -> Engine:

def _build_hysf_engine((
    selector: str,
    config: AbersetzConfig,
    engine_cfg: EngineConfig,
    *,
    client: Any | None,
)) -> Engine:

def _translators_provider((variant: str | None, engine_cfg: EngineConfig)) -> str:

def _select_profile((engine_cfg: EngineConfig, variant: str | None)) -> Mapping[str, Any] | None:

def create_engine((
    selector: str,
    config: AbersetzConfig,
    *,
    client: Any | None = None,
)) -> Engine:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/openai_lite.py
# Language: python

from dataclasses import dataclass
from typing import Any
import httpx
from tenacity import retry, stop_after_attempt, wait_exponential

class ChatCompletionMessage:

class ChatCompletionChoice:

class ChatCompletionResponse:

class ChatCompletions:
    def __init__((self, client: OpenAI)):

class OpenAI:
    def __init__((self, api_key: str, base_url: str | None = None)):

class Chat:
    def __init__((self)) -> None:

def __init__((self, client: OpenAI)):

def create((
        self, model: str, messages: list[dict[str, str]], temperature: float = 0.7, **kwargs: Any
    )) -> ChatCompletionResponse:

def __init__((self, api_key: str, base_url: str | None = None)):

def __init__((self)) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/pipeline.py
# Language: python

import json
from collections.abc import Iterable
from dataclasses import dataclass, field
from pathlib import Path
from .chunking import TextFormat, chunk_text, detect_format
from .config import AbersetzConfig, load_config
from .engine_catalog import normalize_selector
from .engines import Engine, EngineRequest, EngineResult, create_engine
from loguru import logger

DEFAULT_PATTERNS = =

class TranslatorOptions:

class TranslationResult:

class PipelineError(R, u, n, t, i, m, e, E, r, r, o, r):

def translate_path((
    path: Path | str,
    options: TranslatorOptions | None = None,
    *,
    config: AbersetzConfig | None = None,
    client: object | None = None,
)) -> list[TranslationResult]:

def _merge_defaults((options: TranslatorOptions | None, config: AbersetzConfig)) -> TranslatorOptions:

def _discover_files((root: Path, opts: TranslatorOptions)) -> Iterable[Path]:

def _is_xcluded((path: Path, patterns: tuple[str, ...])) -> bool:

def _translate_file((
    source: Path,
    engine: Engine,
    opts: TranslatorOptions,
    config: AbersetzConfig,
)) -> TranslationResult:

def _apply_engine((
    engine: Engine,
    chunks: Iterable[str],
    fmt: TextFormat,
    opts: TranslatorOptions,
    config: AbersetzConfig,
)) -> tuple[list[EngineResult], dict[str, str]]:

def _build_request((
    chunk: str,
    index: int,
    total: int,
    fmt: TextFormat,
    opts: TranslatorOptions,
    config: AbersetzConfig,
    voc: dict[str, str],
    prolog: dict[str, str],
)) -> EngineRequest:

def _select_chunk_size((
    fmt: TextFormat,
    engine: Engine,
    opts: TranslatorOptions,
    config: AbersetzConfig,
)) -> int:

def _persist_output((
    source: Path,
    content: str,
    voc: dict[str, str],
    fmt: TextFormat,
    opts: TranslatorOptions,
    target_lang: str,
)) -> Path:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/setup.py
# Language: python

import os
from collections.abc import Sequence
from dataclasses import dataclass, field
import httpx
from loguru import logger
from rich.console import Console
from rich.progress import Progress
from rich.table import Table
from .config import AbersetzConfig, Credential, EngineConfig, save_config
from .engine_catalog import (
    COMMUNITY_DEEP_TRANSLATOR_PROVIDERS,
    COMMUNITY_TRANSLATOR_PROVIDERS,
    DEEP_TRANSLATOR_FREE_PROVIDERS,
    FREE_TRANSLATOR_PROVIDERS,
    HYSF_DEFAULT_MODEL,
    HYSF_DEFAULT_TEMPERATURE,
    PAID_TRANSLATOR_PROVIDERS,
    collect_deep_translator_providers,
    collect_translator_providers,
    normalize_selector,
)
from .validation import ValidationResult, validate_engines
import sys

KNOWN_PROVIDERS = =
PROVIDER_METADATA = :

class DiscoveredProvider:

class SetupWizard:
    def __init__((
        self,
        non_interactive: bool = False,
        verbose: bool = False,
        include_community: bool = False,
    )):
    def run((self)) -> bool:
    def _validate_config((self, config: AbersetzConfig)) -> None:
    def _discover_providers((self)) -> None:
    def _test_endpoints((self)) -> None:
    def _test_single_endpoint((self, provider: DiscoveredProvider)) -> None:
    def _display_results((self)) -> None:
    def _generate_config((self)) -> AbersetzConfig | None:

def __init__((
        self,
        non_interactive: bool = False,
        verbose: bool = False,
        include_community: bool = False,
    )):

def run((self)) -> bool:

def _validate_config((self, config: AbersetzConfig)) -> None:

def _discover_providers((self)) -> None:

def _test_endpoints((self)) -> None:

def _test_single_endpoint((self, provider: DiscoveredProvider)) -> None:

def _display_results((self)) -> None:

def _generate_config((self)) -> AbersetzConfig | None:

def _select_default_engine((
    engines: dict[str, EngineConfig],
    providers: Sequence[DiscoveredProvider],
)) -> str | None:

def setup_command((
    non_interactive: bool = False,
    verbose: bool = False,
    include_community: bool = False,
)) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/src/abersetz/validation.py
# Language: python

from collections.abc import Callable, Iterable, Sequence
from dataclasses import dataclass
from time import perf_counter
from loguru import logger
from .config import AbersetzConfig, load_config
from .engine_catalog import normalize_selector
from .engines import EngineError, EngineRequest, create_engine

class ValidationResult:

def _append_selector((collection: list[str], seen: set[str], selector: str | None)) -> None:

def _extract_providers((options: dict[str, object], key: str)) -> list[str]:

def _selector_sort_key((selector: str)) -> tuple[int, str]:

def _selectors_from_config((config: AbersetzConfig, include_defaults: bool)) -> list[str]:

def _ensure_engine_request((sample_text: str, source_lang: str, target_lang: str)) -> EngineRequest:

def validate_engines((
    config: AbersetzConfig | None = None,
    *,
    selectors: Iterable[str] | None = None,
    sample_text: str = "Hello, world!",
    source_lang: str = "auto",
    target_lang: str = "es",
    client: object | None = None,
    create_engine_fn: Callable[..., object] | None = None,
    include_defaults: bool = True,
)) -> list[ValidationResult]:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/conftest.py
# Language: python

import sys
from pathlib import Path
import pytest

ROOT = =
SRC = =

def _temp_config_dir((tmp_path: Path, monkeypatch: pytest.MonkeyPatch)) -> Path:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_chunking.py
# Language: python

import builtins
from abersetz.chunking import TextFormat, chunk_text, detect_format

def test_detect_format_identifies_html(()) -> None:

def test_chunk_text_preserves_round_trip(()) -> None:

def test_html_chunking_returns_single_chunk(()) -> None:

def test_chunk_text_returns_empty_for_blank_input(()) -> None:

def test_chunk_text_fallback_runs_without_semantic_splitter((monkeypatch)) -> None:

def fake_import((name, *args, **kwargs)):


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_cli.py
# Language: python

from pathlib import Path
import pytest
import tomllib
from abersetz.chunking import TextFormat
from abersetz.cli import (
    AbersetzCLI,
    _collect_engine_entries,
    _build_options_from_cli,
    _load_json_data,
    _parse_patterns,
    _render_engine_entries,
    _render_results,
    _render_validation_entries,
    abtr_main,
    main,
)
from abersetz.config import AbersetzConfig, Credential, Defaults, EngineConfig, save_config
from abersetz.pipeline import PipelineError, TranslationResult, TranslatorOptions
from abersetz.validation import ValidationResult
import io
from rich.console import Console
import io
from rich.console import Console
import io
from rich.console import Console
import io
from rich.console import Console
import io
from rich.console import Console
import io
from rich.console import Console

class DummyLogger:
    def __init__((self)) -> None:
    def remove((self)) -> None:
    def add((self, *args, **kwargs)):
    def debug((self, message: str, *args, **kwargs)) -> None:

def test_cli_translate_wires_arguments((monkeypatch: pytest.MonkeyPatch, tmp_path: Path)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)):

def test_cli_translate_accepts_path_output((monkeypatch: pytest.MonkeyPatch, tmp_path: Path)) -> None:

def fake_translate_path((path: str | Path, options: TranslatorOptions)):

def test_cli_accepts_legacy_engine_selector((
    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)):

def test_cli_translate_reports_pipeline_error((
    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
)) -> None:

def fake_print((message: str)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)):

def test_parse_patterns_handles_none_and_iterables(()) -> None:

def test_load_json_data_prefers_files((tmp_path: Path)) -> None:

def test_build_options_requires_target_language((tmp_path: Path)) -> None:

def test_build_options_loads_prolog_and_voc_json((tmp_path: Path)) -> None:

def test_build_options_propagates_optional_flags((tmp_path: Path)) -> None:

def test_build_options_defaults_include_when_none((tmp_path: Path)) -> None:

def test_build_options_resolves_output_dir((tmp_path: Path)) -> None:

def test_render_engine_entries_handles_empty((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_render_validation_entries_handles_empty((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_render_results_lists_destinations((monkeypatch: pytest.MonkeyPatch, tmp_path: Path)) -> None:

def _stub_engine_entries((monkeypatch: pytest.MonkeyPatch)) -> AbersetzConfig:

def test_collect_engine_entries_handles_provider_strings((
    _stub_engine_entries: AbersetzConfig,
)) -> None:

def test_collect_engine_entries_family_accepts_long_name((
    _stub_engine_entries: AbersetzConfig,
)) -> None:

def test_collect_engine_entries_configured_only_with_family((
    _stub_engine_entries: AbersetzConfig,
)) -> None:

def test_collect_engine_entries_accepts_single_provider_string((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_collect_engine_entries_string_branches((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_collect_engine_entries_includes_local_engines((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_collect_engine_entries_handles_deep_translator_string_providers((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_cli_config_commands_show_and_path((monkeypatch: pytest.MonkeyPatch, tmp_path: Path)) -> None:

def test_cli_lang_lists_languages((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_cli_verbose_logs_translation_details((
    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
)) -> None:

def __init__((self)) -> None:

def remove((self)) -> None:

def add((self, *args, **kwargs)):

def debug((self, message: str, *args, **kwargs)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)):

def test_cli_engines_lists_configured_providers((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_cli_engines_supports_filters((monkeypatch: pytest.MonkeyPatch)) -> None:

def render((
        family: str | None = None, *, configured_only: bool = False, include_paid: bool = False
    )) -> str:

def test_cli_validate_renders_results((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_cli_validate_accepts_selector_string((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_validate((config: AbersetzConfig, **kwargs: object)):

def test_cli_setup_forwards_flags((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_setup_command((*, non_interactive: bool, verbose: bool)) -> None:

def test_cli_main_invokes_fire((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_fire((target: object, *args: object, **kwargs: object)) -> None:

def test_cli_abtr_main_invokes_fire_with_tr((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_fire((target: object, *args: object, **kwargs: object)) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_config.py
# Language: python

from pathlib import Path
import pytest
import abersetz.config as config_module
import tomllib
import tomli as tomllib
import platform
from loguru import logger
from loguru import logger
from loguru import logger

def test_load_config_yields_defaults((tmp_path: Path)) -> None:

def test_save_config_persists_changes((tmp_path: Path)) -> None:

def test_resolve_credential_prefers_environment((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_load_config_handles_malformed_toml((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def test_load_config_handles_permission_error((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def test_defaults_normalize_legacy_selector(()) -> None:

def test_defaults_from_dict_normalizes_selector(()) -> None:

def test_defaults_from_dict_when_none_returns_defaults(()) -> None:

def test_engine_config_from_dict_when_none_returns_empty_block(()) -> None:

def test_engine_config_to_dict_includes_optional_fields(()) -> None:

def test_credential_to_dict_includes_optional_fields(()) -> None:

def test_credential_from_any_rejects_unsupported_payload(()) -> None:

def test_credential_from_any_handles_mapping_payload(()) -> None:

def test_config_dir_when_env_missing_then_uses_platformdirs((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def test_resolve_credential_when_env_missing_then_logs_hint((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_resolve_credential_recurses_into_stored_secret((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_resolve_credential_with_recursive_name_logs_once((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_resolve_credential_returns_none_for_null_reference(()) -> None:

def test_resolve_credential_reuses_stored_alias_object(()) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_engine_catalog.py
# Language: python

import builtins
import sys
from types import SimpleNamespace
from typing import Any
import pytest
from abersetz.engine_catalog import (
    _filter_available,
    collect_deep_translator_providers,
    collect_translator_providers,
    normalize_selector,
    resolve_engine_reference,
)

def test_normalize_selector_converts_long_to_short(()) -> None:

def test_normalize_selector_is_idempotent(()) -> None:

def test_normalize_selector_preserves_unknowns(()) -> None:

def test_normalize_selector_returns_none_for_none(()) -> None:

def test_normalize_selector_handles_blank_input(()) -> None:

def test_normalize_selector_handles_missing_base(()) -> None:

def test_resolve_engine_reference_handles_short_alias(()) -> None:

def test_resolve_engine_reference_handles_long_selector(()) -> None:

def test_resolve_engine_reference_handles_base_only_alias(()) -> None:

def test_filter_available_when_allowed_duplicates_then_dedupes(()) -> None:

def test_collect_translator_providers_when_import_fails_then_returns_empty((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def fake_import((name: str, *args: Any, **kwargs: Any)):

def test_collect_translator_providers_when_paid_requested_then_keeps_order((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_collect_deep_translator_providers_include_paid_appends_once(()) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_engines.py
# Language: python

import sys
from pathlib import Path
from types import SimpleNamespace
import pytest
from langcodes import get as get_language
import abersetz.config as config_module
import abersetz.engines as engines_module
from abersetz.chunking import TextFormat
from abersetz.engines import EngineBase, EngineError, EngineRequest, create_engine
from abersetz.engines import DeepTranslatorEngine

class DummyClient:
    def __init__((self, payload: str)):
    def _create((self, **kwargs: object)) -> SimpleNamespace:

class MockTranslator:
    def __init__((self, source: str, target: str)):
    def translate((self, text: str)) -> str:

class FakeLlama:
    def __init__((self, **kwargs: object)) -> None:
    def create_chat_completion((self, **kwargs: object)) -> dict[str, object]:

def __init__((self, payload: str)):

def _create((self, **kwargs: object)) -> SimpleNamespace:

def test_translators_engine_invokes_library((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_translate_text((
        text: str, translator: str, from_language: str, to_language: str, **_: object
    )) -> str:

def test_translators_engine_handles_html_requests((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_translate_html((
        text: str, translator: str, from_language: str, to_language: str, **_: object
    )) -> str:

def test_hysf_engine_uses_fixed_prompt((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_engine_base_chunk_size_prefers_html_then_plain(()) -> None:

def test_ullm_engine_uses_profile((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_translators_engine_retry_on_failure((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_translate_with_retry((
        text: str, translator: str, from_language: str, to_language: str, **_: object
    )) -> str:

def test_create_engine_accepts_legacy_selector(()) -> None:

def test_deep_translator_engine_retry_on_failure((monkeypatch: pytest.MonkeyPatch)) -> None:

def __init__((self, source: str, target: str)):

def translate((self, text: str)) -> str:

def test_deep_translator_engine_rejects_unknown_provider((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_build_llm_engine_without_model_raises_engine_error(()) -> None:

def test_build_llm_engine_without_credential_raises_engine_error((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_build_hysf_engine_without_credential_raises((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_select_profile_defaults_to_default_variant(()) -> None:

def test_select_profile_without_profiles_returns_none(()) -> None:

def test_select_profile_unknown_variant_raises_engine_error(()) -> None:

def test_make_openai_client_respects_base_url(()) -> None:

def test_make_openai_client_defaults_to_openai_url(()) -> None:

def test_create_engine_with_unknown_configured_base_raises_engine_error(()) -> None:

def _make_llm_engine(()) -> engines_module.LlmEngine:

def test_llm_engine_parse_payload_without_vocab(()) -> None:

def test_llm_engine_parse_payload_with_malformed_vocab(()) -> None:

def test_llm_engine_parse_payload_with_non_mapping_vocab(()) -> None:

def test_create_engine_raises_when_config_missing_selector(()) -> None:

def test_resolve_mthy_language_accepts_codes_and_names(()) -> None:

def test_resolve_mthy_language_unknown_raises_engine_error(()) -> None:

def test_local_mthy_mlx_engine_translates_with_prompt((
    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
)) -> None:

def fake_apply_chat_template((messages: list[dict[str, str]], **_: object)) -> str:

def fake_generate((*_: object, **kwargs: object)) -> str:

def fake_load((_: str)) -> tuple[object, object]:

def test_local_gemma_gguf_engine_uses_structured_messages((
    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
)) -> None:

def __init__((self, **kwargs: object)) -> None:

def create_chat_completion((self, **kwargs: object)) -> dict[str, object]:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_examples.py
# Language: python

import asyncio
import importlib.util
import json
import runpy
import sys
from collections.abc import Callable
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Protocol, cast
import pytest
from abersetz.chunking import TextFormat
from abersetz.pipeline import TranslationResult, TranslatorOptions

class _StubResult:
    def __init__((
        self, source: str, destination: str, *, fmt: TextFormat = TextFormat.PLAIN
    )) -> None:

class _BasicApiModule(P, r, o, t, o, c, o, l):
    def format_example_doc((self, func: Callable[..., object])) -> str:
    def example_simple((self)) -> None:
    def example_batch((self)) -> None:
    def example_dry_run((self)) -> None:
    def example_html((self)) -> None:
    def example_with_config((self)) -> None:
    def example_llm_with_voc((self)) -> None:
    def cli((self, example: str | None = None)) -> None:

class _Defaults:

class _Config:

class _StubEngine:
    def __init__((self, name: str)) -> None:
    def translate((self, request: Any)):

def __init__((
        self, source: str, destination: str, *, fmt: TextFormat = TextFormat.PLAIN
    )) -> None:

def format_example_doc((self, func: Callable[..., object])) -> str:

def example_simple((self)) -> None:

def example_batch((self)) -> None:

def example_dry_run((self)) -> None:

def example_html((self)) -> None:

def example_with_config((self)) -> None:

def example_llm_with_voc((self)) -> None:

def cli((self, example: str | None = None)) -> None:

def _load_basic_api(()) -> _BasicApiModule:

def _load_advanced_api(()):

def test_format_example_doc_handles_none(()) -> None:

def _no_doc(()) -> None:

def test_format_example_doc_strips_whitespace(()) -> None:

def _with_doc(()) -> None:

def test_example_simple_outputs_summary((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[_StubResult]:

def test_example_batch_uses_include_filters((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[_StubResult]:

def test_example_dry_run_lists_files((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[_StubResult]:

def test_example_html_preserves_markup_intent((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[_StubResult]:

def test_example_with_config_uses_modified_defaults((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_load_config(()) -> _Config:

def fake_save_config((value: _Config)) -> None:

def fake_translate_path((
        path: str, options: TranslatorOptions | None = None, *, config: _Config
    )) -> list[_StubResult]:

def test_example_llm_with_voc_reports_final_vocab((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[_StubResult]:

def test_translation_workflow_translate_project_collects_results_and_errors((
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions, *, config: object | None = None)):

def test_translation_workflow_generate_report_creates_parent_dirs((tmp_path: Path)) -> None:

def test_translation_workflow_lazy_loads_config((monkeypatch: pytest.MonkeyPatch)) -> None:

def fake_load_config(()) -> object:

def test_voc_manager_translate_with_consistency_preserves_base_voc((
    monkeypatch: pytest.MonkeyPatch,
    capsys: pytest.CaptureFixture[str],
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[TranslationResult]:

def test_voc_manager_load_and_merge((tmp_path: Path)) -> None:

def test_parallel_translator_compare_translations_handles_failures((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def __init__((self, name: str)) -> None:

def translate((self, request: Any)):

def fake_create_engine((name: str, config: object)):

def test_example_voc_consistency_writes_vocab((
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
    capsys: pytest.CaptureFixture[str],
)) -> None:

def fake_translate_with_consistency((
        *, files: list[str], to_lang: str, base_voc: dict[str, str]
    )):

def test_example_parallel_comparison_invokes_async_run((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def fake_compare((text: str, engines: list[str], to_lang: str)):

def fake_run((coro)):

def test_example_incremental_translation_processes_files((
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
    capsys: pytest.CaptureFixture[str],
)) -> None:

def fake_translate_path((
        path: str,
        options: TranslatorOptions,
        *,
        config: object | None = None,
    )) -> list[TranslationResult]:

def test_example_incremental_translation_reuses_checkpoint((
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
    capsys: pytest.CaptureFixture[str],
)) -> None:

def fake_translate_path((
        path: str,
        options: TranslatorOptions,
        *,
        config: object | None = None,
    )) -> list[TranslationResult]:

def test_basic_api_cli_dispatch_runs_requested_example((
    monkeypatch: pytest.MonkeyPatch,
    capsys: pytest.CaptureFixture[str],
)) -> None:

def fake_translate_path((path: str, options: TranslatorOptions)) -> list[_StubResult]:

def test_basic_api_cli_usage_banner((
    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
)) -> None:

def test_advanced_api_cli_dispatch_runs_requested_example((
    monkeypatch: pytest.MonkeyPatch,
    capsys: pytest.CaptureFixture[str],
)) -> None:

def fake_translate_path((
        path: str,
        options: TranslatorOptions,
        *,
        config: object | None = None,
    )) -> list[TranslationResult]:

def test_advanced_api_cli_usage_banner((
    monkeypatch: pytest.MonkeyPatch,
    capsys: pytest.CaptureFixture[str],
)) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_integration.py
# Language: python

import os
import pytest
from abersetz import TranslatorOptions, translate_path
from abersetz.config import load_config
from abersetz.engines import EngineRequest, create_engine
from unittest.mock import patch
import requests

def test_translators_google_real(()) -> None:

def test_deep_translator_google_real(()) -> None:

def test_hysf_engine_real(()) -> None:

def test_translate_file_api((tmp_path)) -> None:

def test_html_translation(()) -> None:

def test_translators_bing_real(()) -> None:

def test_batch_translation_with_voc(()) -> None:

def test_retry_on_network_failure(()) -> None:

def flaky_get((*args, **kwargs)):


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_offline.py
# Language: python

import tempfile
from pathlib import Path
import pytest
from abersetz.cli import AbersetzCLI
from abersetz.pipeline import TranslatorOptions, translate_path
from abersetz.pipeline import PipelineError
import abersetz
from abersetz import TranslatorOptions, translate_path
from abersetz.cli import AbersetzCLI
from abersetz.config import AbersetzConfig
from abersetz.pipeline import PipelineError, TranslationResult

def test_cli_help_works_offline(()) -> None:

def test_config_commands_work_offline(()) -> None:

def test_dry_run_works_offline(()) -> None:

def test_input_validation_works_offline(()) -> None:

def test_empty_file_handling_works_offline(()) -> None:

def test_import_works_offline(()) -> None:

def test_edge_case_files_offline((file_content: str)) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_openai_lite.py
# Language: python

from typing import Any, get_type_hints
import httpx
import pytest
from abersetz.openai_lite import Chat, ChatCompletions, OpenAI

class _DummyResponse:
    def __init__((self, status_code: int, payload: dict[str, Any])) -> None:
    def raise_for_status((self)) -> None:
    def json((self)) -> dict[str, Any]:

class _DummyClient:
    def __init__((self, response: _DummyResponse, calls: list[dict[str, Any]])) -> None:
    def __enter__((self)) -> _DummyClient:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def post((self, url: str, *, json: dict[str, Any], headers: dict[str, str])) -> _DummyResponse:

def __init__((self, status_code: int, payload: dict[str, Any])) -> None:

def raise_for_status((self)) -> None:

def json((self)) -> dict[str, Any]:

def __init__((self, response: _DummyResponse, calls: list[dict[str, Any]])) -> None:

def __enter__((self)) -> _DummyClient:

def __exit__((self, exc_type, exc, tb)) -> None:

def post((self, url: str, *, json: dict[str, Any], headers: dict[str, str])) -> _DummyResponse:

def test_chat_completions_create_parses_response((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_chat_completions_create_raises_for_http_errors((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_openai_base_url_trims_trailing_slash(()) -> None:

def test_chat_declares_completions_attribute(()) -> None:

def test_openai_initializes_chat_completions(()) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_package.py
# Language: python

import pytest
import abersetz
import abersetz

def test_version(()) -> None:

def test_getattr_rejects_unknown_symbol(()) -> None:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_pipeline.py
# Language: python

import os
import sys
from pathlib import Path
import pytest
from abersetz.chunking import TextFormat
from abersetz.config import AbersetzConfig
from abersetz.engines import EngineResult
from abersetz.pipeline import PipelineError, TranslatorOptions, translate_path
from loguru import logger

class DummyEngine:
    def __init__((self)) -> None:
    def chunk_size_for((self, _fmt)) -> int:
    def translate((self, request)) -> EngineResult:

class ChunkyEngine(D, u, m, m, y, E, n, g, i, n, e):
    def __init__((self)) -> None:
    def chunk_size_for((self, fmt)) -> int:

class TrackingDummy(D, u, m, m, y, E, n, g, i, n, e):
    def __init__((self)) -> None:
    def chunk_size_for((self, fmt: TextFormat)) -> int:

class HtmlEngine(D, u, m, m, y, E, n, g, i, n, e):
    def __init__((self)) -> None:
    def chunk_size_for((self, fmt: TextFormat)) -> int:

class TrackingHtmlEngine(D, u, m, m, y, E, n, g, i, n, e):
    def __init__((self)) -> None:
    def chunk_size_for((self, fmt: TextFormat)) -> int:

def __init__((self)) -> None:

def chunk_size_for((self, _fmt)) -> int:

def translate((self, request)) -> EngineResult:

def test_translate_path_processes_files((tmp_path: Path, monkeypatch: pytest.MonkeyPatch)) -> None:

def test_translate_path_accepts_string_source_paths((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def test_translate_path_normalizes_engine_selector((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def fake_create_engine((selector: str, config, client=None)):

def test_translate_path_requires_matches((tmp_path: Path)) -> None:

def test_translate_path_uses_engine_chunk_size_when_defaults_falsy((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def __init__((self)) -> None:

def chunk_size_for((self, fmt)) -> int:

def fake_create_engine((selector, config, client=None)):

def test_translate_path_uses_dummy_chunk_size_when_defaults_zero((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def __init__((self)) -> None:

def chunk_size_for((self, fmt: TextFormat)) -> int:

def test_translate_path_html_uses_engine_chunk_hint((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def __init__((self)) -> None:

def chunk_size_for((self, fmt: TextFormat)) -> int:

def test_translate_path_with_html_engine_handles_mixed_formats((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def __init__((self)) -> None:

def chunk_size_for((self, fmt: TextFormat)) -> int:

def test_translate_path_handles_mixed_formats((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def test_translate_path_errors_on_unreadable_file((tmp_path: Path)) -> None:

def test_translate_path_write_over_updates_source((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def test_translate_path_dry_run_skips_io((tmp_path: Path, monkeypatch: pytest.MonkeyPatch)) -> None:

def test_translate_path_warns_on_large_file((
    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
)) -> None:

def fake_stat((self: Path, *, follow_symlinks: bool = True)) -> os.stat_result:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_setup.py
# Language: python

from collections.abc import Sequence
from typing import Any
import httpx
import pytest
from abersetz.config import AbersetzConfig
from abersetz.engine_catalog import normalize_selector
from abersetz.setup import (
    DiscoveredProvider,
    EngineConfig,
    SetupWizard,
    _select_default_engine,
    setup_command,
)
import io
from rich.console import Console
from loguru import logger
from loguru import logger
import io
from rich.console import Console
import io
from rich.console import Console
import re
import io
from rich.console import Console
from abersetz.validation import ValidationResult
import io
from rich.console import Console

class _StubProgress:
    def __enter__((self)) -> _StubProgress:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def add_task((self, description: str, total: int)) -> str:
    def update((self, task: str, advance: int)) -> None:

class _DummyResponse:

class _DummyClient:
    def __init__((self, *args: Any, **kwargs: Any)) -> None:
    def __enter__((self)) -> _DummyClient:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _ListResponse:

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Response:

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _TimeoutClient:
    def __init__((self, *args: Any, **kwargs: Any)) -> None:
    def __enter__((self)) -> _TimeoutClient:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _HttpErrorResponse:

class _HttpErrorClient:
    def __enter__((self)) -> _HttpErrorClient:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Resp:

class _OddResponse:

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _FailureResponse:

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Progress:
    def __init__((self, *args: Any, **kwargs: Any)) -> None:
    def __enter__((self)) -> _Progress:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def add_task((self, description: str, total: int)) -> str:
    def update((self, task: str, advance: int)) -> None:

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Response:

class _Client:
    def __enter__((self)) -> _Client:
    def __exit__((self, exc_type, exc, tb)) -> None:
    def get((self, url: str, headers: dict[str, str])):

class _Wizard:
    def __init__((self, *args: Any, **kwargs: Any)) -> None:
    def run((self)) -> bool:

def _stub_phase((*args: Any, **kwargs: Any)) -> None:

def __enter__((self)) -> _StubProgress:

def __exit__((self, exc_type, exc, tb)) -> None:

def add_task((self, description: str, total: int)) -> str:

def update((self, task: str, advance: int)) -> None:

def test_setup_wizard_triggers_validation((monkeypatch)) -> None:

def test_setup_wizard_skips_validation_when_no_config((monkeypatch)) -> None:

def test_discover_providers_adds_pricing_hint((monkeypatch)) -> None:

def test_discover_providers_includes_deepl_engine_mapping((monkeypatch)) -> None:

def test_display_results_shows_pricing_column((monkeypatch)) -> None:

def test_generate_config_builds_engines((monkeypatch)) -> None:

def test_generate_config_excludes_community_providers_by_default((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_generate_config_includes_community_providers_when_requested((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_generate_config_prefers_hysf_when_translators_unavailable((monkeypatch)) -> None:

def test_generate_config_defaults_to_ullm_when_only_openai((monkeypatch)) -> None:

def test_test_single_endpoint_success((monkeypatch)) -> None:

def json(()) -> dict[str, list[int]]:

def __init__((self, *args: Any, **kwargs: Any)) -> None:

def __enter__((self)) -> _DummyClient:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_parses_list_payload((monkeypatch)) -> None:

def json(()) -> list[int]:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_logs_verbose_status((monkeypatch)) -> None:

def json(()) -> dict[str, list[int]]:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_timeout((monkeypatch)) -> None:

def __init__((self, *args: Any, **kwargs: Any)) -> None:

def __enter__((self)) -> _TimeoutClient:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_http_error((monkeypatch)) -> None:

def json(()) -> dict[str, list[int]]:

def __enter__((self)) -> _HttpErrorClient:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_validate_config_logs_failures((monkeypatch)) -> None:

def fake_warning((message: str, selector: str, error: str)) -> None:

def test_validate_config_returns_immediately_when_no_results((monkeypatch)) -> None:

def fail_print((*args: Any, **kwargs: Any)) -> None:

def test_test_endpoints_handles_non_api_providers((monkeypatch)) -> None:

def test_test_endpoints_invokes_single_endpoint_for_api_providers((monkeypatch)) -> None:

def _capture_single_endpoint((self: SetupWizard, provider: DiscoveredProvider)) -> None:

def test_test_single_endpoint_uses_anthropic_headers((monkeypatch)) -> None:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def json(()) -> dict[str, list[int]]:

def test_test_single_endpoint_defaults_model_count_for_unknown_payload((monkeypatch)) -> None:

def json(()) -> str:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_logs_failure_when_verbose((monkeypatch)) -> None:

def json(()) -> dict[str, str]:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_general_exception((monkeypatch)) -> None:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_generate_config_returns_none_when_empty(()) -> None:

def test_setup_wizard_run_interactive_success((monkeypatch)) -> None:

def __init__((self, *args: Any, **kwargs: Any)) -> None:

def __enter__((self)) -> _Progress:

def __exit__((self, exc_type, exc, tb)) -> None:

def add_task((self, description: str, total: int)) -> str:

def update((self, task: str, advance: int)) -> None:

def fake_discover((self: SetupWizard)) -> None:

def fake_generate((self: SetupWizard)) -> AbersetzConfig:

def test_setup_wizard_run_interactive_no_config((monkeypatch)) -> None:

def fake_discover((self: SetupWizard)) -> None:

def test_validate_config_renders_table((monkeypatch)) -> None:

def test_discover_providers_verbose_logs((monkeypatch)) -> None:

def test_test_single_endpoint_connect_error((monkeypatch)) -> None:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_handles_json_errors((monkeypatch)) -> None:

def json(()) -> dict[str, Any]:

def __enter__((self)) -> _Client:

def __exit__((self, exc_type, exc, tb)) -> None:

def get((self, url: str, headers: dict[str, str])):

def test_test_single_endpoint_no_base_url(()) -> None:

def test_display_results_no_providers((monkeypatch)) -> None:

def test_select_default_engine_prefers_deepl((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_select_default_engine_prefers_translators_then_hysf((
    monkeypatch: pytest.MonkeyPatch,
)) -> None:

def test_select_default_engine_prefers_ullm_when_present((monkeypatch: pytest.MonkeyPatch)) -> None:

def test_select_default_engine_falls_back_to_first_engine(()) -> None:

def test_select_default_engine_returns_none_when_empty(()) -> None:

def test_generate_config_uses_fallbacks((monkeypatch)) -> None:

def test_setup_command_exits_on_failure((monkeypatch)) -> None:

def test_setup_command_succeeds((monkeypatch)) -> None:

def __init__((self, *args: Any, **kwargs: Any)) -> None:

def run((self)) -> bool:


# File: /Users/adam/Developer/vcs3/github.twardoch/pub/abersetz/tests/test_validation.py
# Language: python

from dataclasses import dataclass
from typing import Any
from abersetz import validation
from abersetz.config import AbersetzConfig, Defaults, EngineConfig
from abersetz.engines import EngineError, EngineRequest, EngineResult
from abersetz.validation import validate_engines

class _StubEngine:
    def translate((self, request: EngineRequest)) -> EngineResult:
    def chunk_size_for((self, fmt: Any)) -> int | None:

def translate((self, request: EngineRequest)) -> EngineResult:

def chunk_size_for((self, fmt: Any)) -> int | None:

def _build_config(()) -> AbersetzConfig:

def test_validate_engines_collects_results(()) -> None:

def fake_create_engine((
        selector: str, cfg: AbersetzConfig, *, client: Any | None = None
    )) -> _StubEngine:

def test_validate_engines_handles_failures(()) -> None:

def fake_create_engine((
        selector: str, cfg: AbersetzConfig, *, client: Any | None = None
    )) -> _StubEngine:

def test_validate_engines_limits_selectors(()) -> None:

def fake_create_engine((
        selector: str, cfg: AbersetzConfig, *, client: Any | None = None
    )) -> _StubEngine:

def test_validate_engines_flags_empty_translations(()) -> None:

def fake_create_engine((
        selector: str, cfg: AbersetzConfig, *, client: Any | None = None
    )) -> _StubEngine:

def test_append_selector_handles_empty_and_duplicates(()) -> None:

def test_extract_providers_merges_lists_and_fallback(()) -> None:

def test_selectors_from_config_collects_all_engines(()) -> None:


<document index="30">
<source>translation_report.json</source>
<document_content>
{
  "total_files": 5,
  "total_chunks": 5,
  "languages": {
    "build": {

... (Data file content truncated to first 5 lines)
</document_content>
</document>

</documents>
