Skip to content

Commit fc92b3e

Browse files
author
webdev778
committed
Initial commit
0 parents  commit fc92b3e

File tree

9 files changed

+283
-0
lines changed

9 files changed

+283
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
src/interscript/maps/*.py
2+
__pycache__

LICENSE.adoc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
= Licenses & Copyright
2+
3+
This license file adheres to the formatting guidelines of
4+
https://github.com/nevir/readable-licenses[readable-licenses].
5+
6+
7+
== Ribose BSD 2-Clause License
8+
9+
Copyright (c) 2019-, https://www.ribose.com[Ribose Inc].
10+
All rights reserved.
11+
12+
Redistribution and use in source and binary forms, with or without modification,
13+
are permitted provided that the following conditions are met:
14+
15+
1. Redistributions of source code must retain the above copyright notice,
16+
this list of conditions and the following disclaimer.
17+
18+
2. Redistributions in binary form must reproduce the above copyright notice,
19+
this list of conditions and the following disclaimer in the documentation
20+
and/or other materials provided with the distribution.
21+
22+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
26+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.adoc

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
= Interscript: Interoperable Script Conversion Systems for Python
2+
3+
== Purpose
4+
5+
This repository contains code for the Interscript Python runtime ("Interscript-Python").
6+
7+
This software allows performing script conversions by using the
8+
https://github.com/interscript/maps[default set of Interscript maps]
9+
hosted at GitHub.
10+
11+
Interscript is a project for interoperable script conversion systems
12+
and provides executable runtimes for multiple platforms.
13+
Full documentation available https://github.com/interscript/interscript/[here].
14+
15+
== Integration
16+
17+
This section provides instructions on how to utilize Interscript-Python
18+
with your application.
19+
20+
Interscript-Python can be used as a Python library
21+
22+
=== Configuration
23+
24+
[source,shell]
25+
----
26+
$ pip install interscript
27+
----
28+
29+
== Usage
30+
31+
[source,javascript]
32+
-----
33+
import interscript
34+
interscript.load_map('bgnpcgn-ukr-Cyrl-Latn-2019')
35+
print(interscript.transliterate('bgnpcgn-ukr-Cyrl-Latn-2019', input()))
36+
-----
37+
38+
39+
== Copyright and license
40+
41+
This is a Ribose project. Copyright Ribose.

example.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import sys
2+
sys.path.append('./src')
3+
import interscript
4+
interscript.load_map("bgnpcgn-ukr-Cyrl-Latn-2019")
5+
print(interscript.transliterate("bgnpcgn-ukr-Cyrl-Latn-2019", "привет"))

pyproject.toml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[project]
2+
name = "interscript"
3+
version = "0.0.1"
4+
authors = [
5+
{ name="Ribose Inc.", email="open.source@ribose.com" },
6+
]
7+
description = "Interoperable script conversion systems"
8+
readme = "README.adoc"
9+
requires-python = ">=3.8"
10+
classifiers = [
11+
"Programming Language :: Python :: 3",
12+
"License :: OSI Approved :: BSD License",
13+
"Operating System :: OS Independent",
14+
"Intended Audience :: Science/Research",
15+
"Intended Audience :: Developers",
16+
"Intended Audience :: Education",
17+
"Topic :: Text Processing :: Linguistic",
18+
]
19+
dependencies = ["regex"]
20+
21+
[project.urls]
22+
Homepage = "https://www.interscript.org"
23+
Issues = "https://github.com/interscript/interscript-python/issues"
24+
25+
[build-system]
26+
requires = ["hatchling"]
27+
build-backend = "hatchling.build"

src/interscript/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .interscript import *

src/interscript/functions.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import regex as re
2+
import unicodedata
3+
4+
def title_case(output, opts):
5+
if 'word_separator' not in opts:
6+
opts['word_separator'] = " "
7+
output = re.sub(r'(^|\n)(.)', lambda a: a.group(0).upper(), output)
8+
if opts['word_separator'] != "":
9+
sep = re.escape(opts['word_separator'])
10+
output = re.sub(sep + r'(.)', lambda a: a.group(0).upper(), output)
11+
return output
12+
13+
def downcase(output, opts):
14+
return output.lower()
15+
16+
def compose(output, opts):
17+
return unicodedata.normalize("NFC", output)
18+
19+
def decompose(output, opts):
20+
return unicodedata.normalize("NFD", output)
21+
22+
def separate(output, opts):
23+
if 'separator' not in opts:
24+
opts['separator'] = " "
25+
return opts['separator'].join(list(output))

src/interscript/interscript.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
__all__ = ["map_exist", "map_list", "functions", "stdlib", "load_map", "transliterate"]
2+
3+
import importlib.util
4+
import os
5+
6+
from . import functions as functions
7+
from . import stdlib as stdlib
8+
9+
maps = stdlib.maps
10+
11+
def map_exist(map):
12+
return map in maps.keys()
13+
14+
def map_list(map):
15+
return maps.keys()
16+
17+
def load_map(map_name):
18+
if map_exist(map_name):
19+
return
20+
21+
# Construct the path to the map file based on the map_name argument
22+
maps_dir = os.path.join(os.path.dirname(__file__), 'maps')
23+
map_file_path = os.path.join(maps_dir, f"{map_name}.py")
24+
25+
# Check if the map file exists
26+
if not os.path.exists(map_file_path):
27+
raise FileNotFoundError(f"No map file found for {map_name}")
28+
29+
# Load the module
30+
spec = importlib.util.spec_from_file_location(map_name, map_file_path)
31+
map_module = importlib.util.module_from_spec(spec)
32+
spec.loader.exec_module(map_module)
33+
34+
def transliterate(map, str, stage="main"):
35+
return maps[map]["stages"][stage](str)
36+

src/interscript/stdlib.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import regex as re
2+
3+
aliases = {
4+
"any_character": '.',
5+
"none": "",
6+
"space": " ",
7+
"whitespace": "[\\b \\t\\0\\r\\n]",
8+
"boundary": "\\b",
9+
"non_word_boundary": "\\B",
10+
"word": "\\w",
11+
"not_word": "\\W",
12+
"alpha": "[a-zA-Z]",
13+
"not_alpha": "[^a-zA-Z]",
14+
"digit": "\\d",
15+
"not_digit": "\\D",
16+
"line_start": "^",
17+
"line_end": "$",
18+
"string_start": "\\A",
19+
"string_end": "\\z"
20+
}
21+
22+
available_functions = [
23+
"title_case",
24+
"downcase",
25+
"compose",
26+
"decompose",
27+
"separate",
28+
]
29+
30+
maps = {}
31+
32+
def define_map(map):
33+
maps[map] = {
34+
"name": map,
35+
"aliases": {},
36+
"aliases_re": {},
37+
"cache": {},
38+
"stages": {},
39+
}
40+
41+
def get_alias(map, alias):
42+
return maps[map]["aliases"][alias];
43+
44+
def get_alias_re(map, alias):
45+
return maps[map]["aliases_re"][alias];
46+
47+
def add_map_stage(map, stage, fun):
48+
maps[map]["stages"][stage] = fun
49+
50+
def add_map_alias(map, alias, aliased):
51+
maps[map]["aliases"][alias] = aliased
52+
53+
def add_map_alias_re(map, alias, aliased):
54+
maps[map]["aliases_re"][alias] = aliased
55+
56+
57+
def parallel_replace_tree(str, tree):
58+
newstr = ""
59+
len_str = len(str)
60+
i = 0
61+
while i < len_str:
62+
c = str[i]
63+
64+
sub = ""
65+
branch = tree
66+
match, repl = None, None
67+
68+
j = 0
69+
while j < len_str - i:
70+
cc = str[i + j]
71+
if ord(cc) in branch:
72+
branch = branch[ord(cc)]
73+
sub += cc
74+
if None in branch: # Check for None to find the terminal node
75+
match = sub
76+
repl = branch[None]
77+
j += 1
78+
else:
79+
break
80+
81+
if match:
82+
i += len(match)
83+
newstr += repl
84+
else:
85+
newstr += c
86+
i += 1
87+
88+
return newstr
89+
90+
91+
92+
def parallel_regexp_gsub(s, subs_regexp, subs_hash):
93+
# Compile the regular expression from the data[0] pattern
94+
subs_regexp = re.compile(subs_regexp, re.MULTILINE)
95+
96+
# Define the replacement function
97+
def replacement(match):
98+
# Iterate through the named groups to find the matched one
99+
for name, value in match.groupdict().items():
100+
if value is not None:
101+
# Extract the numeric part of the name and convert it to an integer
102+
idx = int(name[1:]) # Assuming names are like "_1", "_2", etc.
103+
# Return the corresponding replacement from data[1]
104+
return subs_hash[idx]
105+
# If no named group was matched (which shouldn't happen), return the whole match
106+
return match.group(0)
107+
108+
# Perform the substitution and return the result
109+
return subs_regexp.sub(replacement, s)
110+
111+
def upper(match):
112+
return match.group().upper()
113+
114+
def lower(match):
115+
return match.group().lower()

0 commit comments

Comments
 (0)