Initial commit

webdev778 · webdev778 · commit fc92b3ed8fc3 · 2024-02-23T12:47:22.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+src/interscript/maps/*.py
+__pycache__
diff --git a/LICENSE.adoc b/LICENSE.adoc
@@ -0,0 +1,31 @@
+= Licenses & Copyright
+
+This license file adheres to the formatting guidelines of
+https://github.com/nevir/readable-licenses[readable-licenses].
+
+
+== Ribose BSD 2-Clause License
+
+Copyright (c) 2019-, https://www.ribose.com[Ribose Inc].
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1.  Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+2.  Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.adoc b/README.adoc
@@ -0,0 +1,41 @@
+= Interscript: Interoperable Script Conversion Systems for Python
+
+== Purpose
+
+This repository contains code for the Interscript Python runtime ("Interscript-Python").
+
+This software allows performing script conversions by using the
+https://github.com/interscript/maps[default set of Interscript maps]
+hosted at GitHub.
+
+Interscript is a project for interoperable script conversion systems
+and provides executable runtimes for multiple platforms.
+Full documentation available https://github.com/interscript/interscript/[here].
+
+== Integration
+
+This section provides instructions on how to utilize Interscript-Python
+with your application.
+
+Interscript-Python can be used as a Python library
+
+=== Configuration
+
+[source,shell]
+----
+$ pip install interscript
+----
+
+== Usage
+
+[source,javascript]
+-----
+import interscript
+interscript.load_map('bgnpcgn-ukr-Cyrl-Latn-2019')
+print(interscript.transliterate('bgnpcgn-ukr-Cyrl-Latn-2019', input()))
+-----
+
+
+== Copyright and license
+
+This is a Ribose project. Copyright Ribose.
diff --git a/example.py b/example.py
@@ -0,0 +1,5 @@
+import sys
+sys.path.append('./src')
+import interscript
+interscript.load_map("bgnpcgn-ukr-Cyrl-Latn-2019")
+print(interscript.transliterate("bgnpcgn-ukr-Cyrl-Latn-2019", "привет"))
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,27 @@
+[project]
+name = "interscript"
+version = "0.0.1"
+authors = [
+  { name="Ribose Inc.", email="open.source@ribose.com" },
+]
+description = "Interoperable script conversion systems"
+readme = "README.adoc"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Topic :: Text Processing :: Linguistic",
+]
+dependencies = ["regex"]
+
+[project.urls]
+Homepage = "https://www.interscript.org"
+Issues = "https://github.com/interscript/interscript-python/issues"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/src/interscript/__init__.py b/src/interscript/__init__.py
@@ -0,0 +1 @@
+from .interscript import *
diff --git a/src/interscript/functions.py b/src/interscript/functions.py
@@ -0,0 +1,25 @@
+import regex as re
+import unicodedata
+
+def title_case(output, opts):
+    if 'word_separator' not in opts:
+        opts['word_separator'] = " "
+    output = re.sub(r'(^|\n)(.)', lambda a: a.group(0).upper(), output)
+    if opts['word_separator'] != "":
+        sep = re.escape(opts['word_separator'])
+        output = re.sub(sep + r'(.)', lambda a: a.group(0).upper(), output)
+    return output
+
+def downcase(output, opts):
+    return output.lower()
+
+def compose(output, opts):
+    return unicodedata.normalize("NFC", output)
+
+def decompose(output, opts):
+    return unicodedata.normalize("NFD", output)
+
+def separate(output, opts):
+    if 'separator' not in opts:
+        opts['separator'] = " "
+    return opts['separator'].join(list(output))
diff --git a/src/interscript/interscript.py b/src/interscript/interscript.py
@@ -0,0 +1,36 @@
+__all__ = ["map_exist", "map_list", "functions", "stdlib", "load_map", "transliterate"]
+
+import importlib.util
+import os
+
+from . import functions as functions
+from . import stdlib as stdlib
+
+maps = stdlib.maps
+
+def map_exist(map):
+    return map in maps.keys()
+
+def map_list(map):
+    return maps.keys()
+
+def load_map(map_name):
+    if map_exist(map_name):
+        return
+
+    # Construct the path to the map file based on the map_name argument
+    maps_dir = os.path.join(os.path.dirname(__file__), 'maps')
+    map_file_path = os.path.join(maps_dir, f"{map_name}.py")
+
+    # Check if the map file exists
+    if not os.path.exists(map_file_path):
+        raise FileNotFoundError(f"No map file found for {map_name}")
+
+    # Load the module
+    spec = importlib.util.spec_from_file_location(map_name, map_file_path)
+    map_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(map_module)
+
+def transliterate(map, str, stage="main"):
+    return maps[map]["stages"][stage](str)
+
diff --git a/src/interscript/stdlib.py b/src/interscript/stdlib.py
@@ -0,0 +1,115 @@
+import regex as re
+
+aliases = {
+    "any_character": '.',
+    "none": "",
+    "space": " ",
+    "whitespace": "[\\b \\t\\0\\r\\n]",
+    "boundary": "\\b",
+    "non_word_boundary": "\\B",
+    "word": "\\w",
+    "not_word": "\\W",
+    "alpha": "[a-zA-Z]",
+    "not_alpha": "[^a-zA-Z]",
+    "digit": "\\d",
+    "not_digit": "\\D",
+    "line_start": "^",
+    "line_end": "$",
+    "string_start": "\\A",
+    "string_end": "\\z"
+}
+
+available_functions = [
+    "title_case",
+    "downcase",
+    "compose",
+    "decompose",
+    "separate",
+]
+
+maps = {}
+
+def define_map(map):
+    maps[map] = {
+        "name": map,
+        "aliases": {},
+        "aliases_re": {},
+        "cache": {},
+        "stages": {},
+    }
+
+def get_alias(map, alias):
+    return maps[map]["aliases"][alias];
+
+def get_alias_re(map, alias):
+    return maps[map]["aliases_re"][alias];
+
+def add_map_stage(map, stage, fun):
+    maps[map]["stages"][stage] = fun
+
+def add_map_alias(map, alias, aliased):
+    maps[map]["aliases"][alias] = aliased
+
+def add_map_alias_re(map, alias, aliased):
+    maps[map]["aliases_re"][alias] = aliased
+
+
+def parallel_replace_tree(str, tree):
+    newstr = ""
+    len_str = len(str)
+    i = 0
+    while i < len_str:
+        c = str[i]
+
+        sub = ""
+        branch = tree
+        match, repl = None, None
+
+        j = 0
+        while j < len_str - i:
+            cc = str[i + j]
+            if ord(cc) in branch:
+                branch = branch[ord(cc)]
+                sub += cc
+                if None in branch:  # Check for None to find the terminal node
+                    match = sub
+                    repl = branch[None]
+                j += 1
+            else:
+                break
+
+        if match:
+            i += len(match)
+            newstr += repl
+        else:
+            newstr += c
+            i += 1
+
+    return newstr
+
+
+
+def parallel_regexp_gsub(s, subs_regexp, subs_hash):
+    # Compile the regular expression from the data[0] pattern
+    subs_regexp = re.compile(subs_regexp, re.MULTILINE)
+
+    # Define the replacement function
+    def replacement(match):
+        # Iterate through the named groups to find the matched one
+        for name, value in match.groupdict().items():
+            if value is not None:
+                # Extract the numeric part of the name and convert it to an integer
+                idx = int(name[1:])  # Assuming names are like "_1", "_2", etc.
+                # Return the corresponding replacement from data[1]
+                return subs_hash[idx]
+        # If no named group was matched (which shouldn't happen), return the whole match
+        return match.group(0)
+
+    # Perform the substitution and return the result
+    return subs_regexp.sub(replacement, s)
+
+def upper(match):
+    return match.group().upper()
+
+def lower(match):
+    return match.group().lower()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+src/interscript/maps/*.py`
	`2`	`+__pycache__`