forked from cimeister/tokenizer-analysis-suite
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathpyproject.toml
More file actions
68 lines (62 loc) · 1.78 KB
/
pyproject.toml
File metadata and controls
68 lines (62 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
[project]
name = "tokenizer-analysis"
version = "0.1.0"
description = "Comprehensive tokenizer analysis framework"
readme = "README.md"
requires-python = ">=3.10"
authors = [
{ name = "Apertus tokenization team" },
]
dependencies = [
"matplotlib>=3.3.0",
"numpy>=1.20.0",
"pandas>=1.3.0",
"scipy>=1.7.0",
"seaborn>=0.11.0",
"tokenizers>=0.13.0",
"tiktoken>=0.5.0",
"transformers>=4.20.0",
"fastparquet>=2025.12.0",
"pyarrow>=23.0.1",
"tree-sitter-analyzer>=1.10.4",
"tree-sitter-language-pack>=0.13.0",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Python Modules",
]
[project.urls]
Repository = "https://github.com/sanderland/tokenizer-intrinsic-evals"
[project.scripts]
tokenizer-analysis = "tokenizer_analysis.cli.run_analysis:main"
tokenizer-visualize = "tokenizer_analysis.cli.visualize_tokenization:main"
update-remote = "tokenizer_analysis.cli.update_remote:main"
[project.optional-dependencies]
code-ast = [
"tree-sitter-language-pack>=0.10.0",
]
parquet = [
"pyarrow>=10.0.0",
]
[dependency-groups]
dev = [
"black>=21.0",
"flake8>=3.8",
"mypy>=0.800",
"pytest>=6.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["tokenizer_analysis"]