diff options
author | toonn <toonn@toonn.io> | 2022-03-31 19:44:51 +0200 |
---|---|---|
committer | toonn <toonn@toonn.io> | 2022-03-31 21:03:25 +0200 |
commit | d1e7fcb6acf9ae31be8946ae8b0e91d6c51e4cd7 (patch) | |
tree | f80ca61c05f892c267fc837d5b8c1603a32981d4 | |
parent | 89d3683e42f9d220e7e888edc5d82bdb29f423c3 (diff) |
Promote testing BiDi textretsam
Ranger's had BiDi text for a while now but not that many people
use a right-to-left language. So we've had a hard time finding
enough testers.
This is why we've decide to default to displaying things
right-to-left instead of left-to-right. We figure this'll get us
extra testers and the users who don't like it can always change
it back.
-rw-r--r-- | README.md | 269 | ||||
-rw-r--r-- | bidi/__init__.py | 86 | ||||
-rw-r--r-- | bidi/algorithm.py | 658 | ||||
-rw-r--r-- | bidi/mirror.py | 388 | ||||
-rw-r--r-- | ranger/gui/displayable.py | 11 | ||||
-rw-r--r-- | ranger/gui/widgets/browsercolumn.py | 6 | ||||
-rw-r--r-- | regnar.png | bin | 0 -> 88641 bytes | |||
-rw-r--r-- | regnar_logo.png | bin | 0 -> 84009 bytes |
8 files changed, 1273 insertions, 145 deletions
@@ -1,174 +1,175 @@ -ranger 1.9.3 +ranger 1.9.3 ============ -<img src="https://ranger.github.io/ranger_logo.png" width="150"> +<img src="https://raw.githubusercontent.com/ranger/ranger/retsam/regnar_logo.png" width="150"> -[![Build Status](https://travis-ci.org/ranger/ranger.svg?branch=master)](https://travis-ci.org/ranger/ranger) <a href="https://repology.org/metapackage/ranger/versions"> - <img src="https://repology.org/badge/latest-versions/ranger.svg" alt="latest packaged version(s)"> + <img src="https://repology.org/badge/latest-versions/ranger.svg" alt="latest packaged version(s)"> </a> -ranger is a console file manager with VI key bindings. It provides a -minimalistic and nice curses interface with a view on the directory hierarchy. -It ships with `rifle`, a file launcher that is good at automatically finding -out which program to use for what file type. +Promote testing BiDi text +========================= + +Ranger's had BiDi text for a while now but not that many people use a right-to-left language. So we've had a hard time finding enough testers. -![screenshot](https://raw.githubusercontent.com/ranger/ranger-assets/master/screenshots/screenshot.png) +This is why we've decide to default to displaying things right-to-left instead of left-to-right. We figure this'll get us extra testers and the users who don't like it can always change it back. -For `mc` aficionados there's also the multi-pane viewmode. +ranger is a console file manager with VI key bindings. It provides a +minimalistic and nice curses interface with a view on the directory hierarchy. +It ships with `rifle`, a file launcher that is good at automatically finding +out which program to use for what file type. -<p> -<img src="https://raw.githubusercontent.com/ranger/ranger-assets/master/screenshots/twopane.png" alt="two panes" width="49%" /> -<img src="https://raw.githubusercontent.com/ranger/ranger-assets/master/screenshots/multipane.png" alt="multiple panes" width="49%" /> -</p> +![screenshot](https://raw.githubusercontent.com/ranger/ranger/retsam/regnar.png) -This file describes ranger and how to get it to run. For instructions on the -usage, please read the man page (`man ranger` in a terminal). See `HACKING.md` -for development-specific information. +This file describes ranger and how to get it to run. For instructions on the +usage, please read the man page (`man ranger` in a terminal). See `HACKING.md` +for development-specific information. -For configuration, check the files in `ranger/config/` or copy the -default config to `~/.config/ranger` with `ranger --copy-config` -(see [instructions](#getting-started)). +For configuration, check the files in `ranger/config/` or copy the +default config to `~/.config/ranger` with `ranger --copy-config` +(see [instructions](#getting-started)). -The `examples/` directory contains several scripts and plugins that demonstrate how -ranger can be extended or combined with other programs. These files can be -found in the git repository or in `/usr/share/doc/ranger`. +The `examples/` directory contains several scripts and plugins that demonstrate how +ranger can be extended or combined with other programs. These files can be +found in the git repository or in `/usr/share/doc/ranger`. -A note to packagers: Versions meant for packaging are listed in the changelog -on the website. +A note to packagers: Versions meant for packaging are listed in the changelog +on the website. -About +About ----- -* Authors: see `AUTHORS` file -* License: GNU General Public License Version 3 -* Website: https://ranger.github.io/ -* Download: https://ranger.github.io/ranger-stable.tar.gz -* Bug reports: https://github.com/ranger/ranger/issues -* git clone https://github.com/ranger/ranger.git +* Authors: see `AUTHORS` file +* License: GNU General Public License Version 3 +* Website: https://ranger.github.io/ +* Download: https://ranger.github.io/ranger-stable.tar.gz +* Bug reports: https://github.com/ranger/ranger/issues +* git clone https://github.com/ranger/ranger.git -Design Goals +Design Goals ------------ -* An easily maintainable file manager in a high level language -* A quick way to switch directories and browse the file system -* Keep it small but useful, do one thing and do it well -* Console-based, with smooth integration into the unix shell +* An easily maintainable file manager in a high level language +* A quick way to switch directories and browse the file system +* Keep it small but useful, do one thing and do it well +* Console-based, with smooth integration into the unix shell -Features +Features -------- -* UTF-8 Support (if your Python copy supports it) -* Multi-column display -* Preview of the selected file/directory -* Common file operations (create/chmod/copy/delete/...) -* Renaming multiple files at once -* VIM-like console and hotkeys -* Automatically determine file types and run them with correct programs -* Change the directory of your shell after exiting ranger -* Tabs, bookmarks, mouse support... - - -Dependencies +* UTF-8 Support (if your Python copy supports it) +* Multi-column display +* Preview of the selected file/directory +* Common file operations (create/chmod/copy/delete/...) +* Renaming multiple files at once +* VIM-like console and hotkeys +* Automatically determine file types and run them with correct programs +* Change the directory of your shell after exiting ranger +* Tabs, bookmarks, mouse support... + + +Dependencies ------------ -* Python (`>=2.6` or `>=3.1`) with the `curses` module - and (optionally) wide-unicode support -* A pager (`less` by default) - -### Optional dependencies - -For general usage: - -* `file` for determining file types -* `chardet` (Python package) for improved encoding detection of text files -* `sudo` to use the "run as root" feature -* `python-bidi` (Python package) to display right-to-left file names correctly - (Hebrew, Arabic) - -For enhanced file previews (with `scope.sh`): - -* `img2txt` (from `caca-utils`) for ASCII-art image previews -* `w3mimgdisplay`, `ueberzug`, `mpv`, `iTerm2`, `kitty`, `terminology` or `urxvt` for image previews -* `convert` (from `imagemagick`) to auto-rotate images -* `rsvg-convert` (from [`librsvg`](https://wiki.gnome.org/Projects/LibRsvg)) - for SVG previews -* `ffmpeg`, or `ffmpegthumbnailer` for video thumbnails -* `highlight`, `bat` or `pygmentize` for syntax highlighting of code -* `atool`, `bsdtar`, `unrar` and/or `7z` to preview archives -* `bsdtar`, `tar`, `unrar`, `unzip` and/or `zipinfo` (and `sed`) to preview - archives as their first image -* `lynx`, `w3m` or `elinks` to preview html pages -* `pdftotext` or `mutool` (and `fmt`) for textual `pdf` previews, `pdftoppm` to - preview as image -* `djvutxt` for textual DjVu previews, `ddjvu` to preview as image -* `calibre` or `epub-thumbnailer` for image previews of ebooks -* `transmission-show` for viewing BitTorrent information -* `mediainfo` or `exiftool` for viewing information about media files -* `odt2txt` for OpenDocument text files (`odt`, `ods`, `odp` and `sxw`) -* `python` or `jq` for JSON files -* `fontimage` for font previews -* `openscad` for 3D model previews (`stl`, `off`, `dxf`, `scad`, `csg`) -* `draw.io` for [draw.io](https://app.diagrams.net/) diagram previews - (`drawio` extension) - -Installing +* Python (`>=2.6` or `>=3.1`) with the `curses` module + and (optionally) wide-unicode support +* A pager (`less` by default) + +### Optional dependencies + +For general usage: + +* `file` for determining file types +* `chardet` (Python package) for improved encoding detection of text files +* `sudo` to use the "run as root" feature +* `python-bidi` (Python package) to display right-to-left file names correctly + (Hebrew, Arabic) + +For enhanced file previews (with `scope.sh`): + +* `img2txt` (from `caca-utils`) for ASCII-art image previews +* `w3mimgdisplay`, `ueberzug`, `mpv`, `iTerm2`, `kitty`, `terminology` or `urxvt` for image previews +* `convert` (from `imagemagick`) to auto-rotate images +* `rsvg-convert` (from [`librsvg`](https://wiki.gnome.org/Projects/LibRsvg)) + for SVG previews +* `ffmpeg`, or `ffmpegthumbnailer` for video thumbnails +* `highlight`, `bat` or `pygmentize` for syntax highlighting of code +* `atool`, `bsdtar`, `unrar` and/or `7z` to preview archives +* `bsdtar`, `tar`, `unrar`, `unzip` and/or `zipinfo` (and `sed`) to preview + archives as their first image +* `lynx`, `w3m` or `elinks` to preview html pages +* `pdftotext` or `mutool` (and `fmt`) for textual `pdf` previews, `pdftoppm` to + preview as image +* `djvutxt` for textual DjVu previews, `ddjvu` to preview as image +* `calibre` or `epub-thumbnailer` for image previews of ebooks +* `transmission-show` for viewing BitTorrent information +* `mediainfo` or `exiftool` for viewing information about media files +* `odt2txt` for OpenDocument text files (`odt`, `ods`, `odp` and `sxw`) +* `python` or `jq` for JSON files +* `fontimage` for font previews +* `openscad` for 3D model previews (`stl`, `off`, `dxf`, `scad`, `csg`) +* `draw.io` for [draw.io](https://app.diagrams.net/) diagram previews + (`drawio` extension) + +Installing ---------- -Use the package manager of your operating system to install ranger. -You can also install ranger through PyPI: ```pip install ranger-fm```. - -<details> - <summary> - Check current version: - <sub> - <a href="https://repology.org/metapackage/ranger/versions"> - <img src="https://repology.org/badge/tiny-repos/ranger.svg" alt="Packaging status"> - </a> - </sub> - </summary> - <a href="https://repology.org/metapackage/ranger/versions"> - <img src="https://repology.org/badge/vertical-allrepos/ranger.svg" alt="Packaging status"> - </a> -</details> - -### Installing from a clone -Note that you don't *have* to install ranger; you can simply run `ranger.py`. - -To install ranger manually: +Use the package manager of your operating system to install ranger. +You can also install ranger through PyPI: ```pip install ranger-fm```. + +<details> + <summary> + Check current version: + <sub> + <a href="https://repology.org/metapackage/ranger/versions"> + <img src="https://repology.org/badge/tiny-repos/ranger.svg" alt="Packaging status"> + </a> + </sub> + </summary> + <a href="https://repology.org/metapackage/ranger/versions"> + <img src="https://repology.org/badge/vertical-allrepos/ranger.svg" alt="Packaging status"> + </a> +</details> + +### Installing from a clone +Note that you don't *have* to install ranger; you can simply run `ranger.py`. + +To install ranger manually: ``` -sudo make install +sudo make install ``` -This translates roughly to: +This translates roughly to: ``` -sudo python setup.py install --optimize=1 --record=install_log.txt +sudo python setup.py install --optimize=1 --record=install_log.txt ``` -This also saves a list of all installed files to `install_log.txt`, which you can -use to uninstall ranger. +This also saves a list of all installed files to `install_log.txt`, which you can +use to uninstall ranger. -Getting Started +Getting Started --------------- -After starting ranger, you can use the Arrow Keys or `h` `j` `k` `l` to -navigate, `Enter` to open a file or `q` to quit. The third column shows a -preview of the current file. The second is the main column and the first shows -the parent directory. +After starting ranger, you can use the Arrow Keys or `h` `j` `k` `l` to +navigate, `Enter` to open a file or `q` to quit. The third column shows a +preview of the current file. The second is the main column and the first shows +the parent directory. -Ranger can automatically copy default configuration files to `~/.config/ranger` -if you run it with the switch `--copy-config=( rc | scope | ... | all )`. -See `ranger --help` for a description of that switch. Also check -`ranger/config/` for the default configuration. +Ranger can automatically copy default configuration files to `~/.config/ranger` +if you run it with the switch `--copy-config=( rc | scope | ... | all )`. +See `ranger --help` for a description of that switch. Also check +`ranger/config/` for the default configuration. -Going Further +Going Further --------------- -* To get the most out of ranger, read the [Official User Guide](https://github.com/ranger/ranger/wiki/Official-user-guide). -* For frequently asked questions, see the [FAQ](https://github.com/ranger/ranger/wiki/FAQ%3A-Frequently-Asked-Questions). -* For more information on customization, see the [wiki](https://github.com/ranger/ranger/wiki). +* To get the most out of ranger, read the [Official User Guide](https://github.com/ranger/ranger/wiki/Official-user-guide). +* For frequently asked questions, see the [FAQ](https://github.com/ranger/ranger/wiki/FAQ%3A-Frequently-Asked-Questions). +* For more information on customization, see the [wiki](https://github.com/ranger/ranger/wiki). -Community +Community --------------- -For help, support, or if you just want to hang out with us, you can find us here: -* **IRC**: channel **#ranger** on [Libera.Chat](https://libera.chat/guides/connect). Don't have an IRC client? Join us via the [webchat](https://web.libera.chat/#ranger)! -* **Reddit**: [r/ranger](https://www.reddit.com/r/ranger/) +For help, support, or if you just want to hang out with us, you can find us here: +* **IRC**: channel **#ranger** on [Libera.Chat](https://libera.chat/guides/connect). Don't have an IRC client? Join us via the [webchat](https://web.libera.chat/#ranger)! +* **Reddit**: [r/ranger](https://www.reddit.com/r/ranger/) + +🐟 diff --git a/bidi/__init__.py b/bidi/__init__.py new file mode 100644 index 00000000..70196b96 --- /dev/null +++ b/bidi/__init__.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# This file is part of python-bidi +# +# python-bidi is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Copyright (C) 2008-2010 Yaacov Zamir <kzamir_a_walla.co.il>, +# Copyright (C) 2010-2015 Meir kriheli <mkriheli@gmail.com>. + +""" +Implementation of Unicode Bidirectional Algorithm +http://www.unicode.org/unicode/reports/tr9/ +""" + +VERSION = '0.4.1' + + +def main(): + """Will be used to create the console script""" + + import optparse + import sys + import codecs + import locale + import six + from .algorithm import get_display + + parser = optparse.OptionParser() + + parser.add_option('-e', '--encoding', + dest='encoding', + default='utf-8', + type='string', + help='Text encoding (default: utf-8)') + + parser.add_option('-u', '--upper-is-rtl', + dest='upper_is_rtl', + default=False, + action='store_true', + help="Treat upper case chars as strong 'R' " + 'for debugging (default: False).') + + parser.add_option('-d', '--debug', + dest='debug', + default=False, + action='store_true', + help="Output to stderr steps taken with the algorithm") + + parser.add_option('-b', '--base-dir', + dest='base_dir', + default=None, + type='string', + help="Override base direction [L|R]") + + options, rest = parser.parse_args() + + if options.base_dir and options.base_dir not in 'LR': + parser.error('option -b can be L or R') + + # allow unicode in sys.stdout.write + if six.PY2: + sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) + + if rest: + lines = rest + else: + lines = sys.stdin + + for line in lines: + display = get_display(line, options.encoding, options.upper_is_rtl, + options.base_dir, options.debug) + # adjust the encoding as unicode, to match the output encoding + if not isinstance(display, six.text_type): + display = display.decode(options.encoding) + + six.print_(display, end='') diff --git a/bidi/algorithm.py b/bidi/algorithm.py new file mode 100644 index 00000000..10a93791 --- /dev/null +++ b/bidi/algorithm.py @@ -0,0 +1,658 @@ +# This file is part of python-bidi +# +# python-bidi is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Copyright (C) 2008-2010 Yaacov Zamir <kzamir_a_walla.co.il>, +# Copyright (C) 2010-2015 Meir kriheli <mkriheli@gmail.com>. +"bidirectional algorithm implementation" +import sys + +import inspect +from collections import deque +from unicodedata import bidirectional, mirrored +import six + +from .mirror import MIRRORED + + +# Some definitions +PARAGRAPH_LEVELS = {'L': 0, 'AL': 1, 'R': 1} +EXPLICIT_LEVEL_LIMIT = 62 + + +def _LEAST_GREATER_ODD(x): + return (x + 1) | 1 + + +def _LEAST_GREATER_EVEN(x): + return (x + 2) & ~1 + + +X2_X5_MAPPINGS = { + 'RLE': (_LEAST_GREATER_ODD, 'N'), + 'LRE': (_LEAST_GREATER_EVEN, 'N'), + 'RLO': (_LEAST_GREATER_ODD, 'R'), + 'LRO': (_LEAST_GREATER_EVEN, 'L'), +} + +# Added 'B' so X6 won't execute in that case and X8 will run it's course +X6_IGNORED = list(X2_X5_MAPPINGS.keys()) + ['BN', 'PDF', 'B'] +X9_REMOVED = list(X2_X5_MAPPINGS.keys()) + ['BN', 'PDF'] + + +def _embedding_direction(x): + return ('L', 'R')[x % 2] + + +_IS_UCS2 = sys.maxunicode == 65535 +_SURROGATE_MIN, _SURROGATE_MAX = 55296, 56319 # D800, DBFF + + +def debug_storage(storage, base_info=False, chars=True, runs=False): + "Display debug information for the storage" + + import codecs + import locale + import sys + + if six.PY2: + stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr) + else: + stderr = sys.stderr + + caller = inspect.stack()[1][3] + stderr.write('in %s\n' % caller) + + if base_info: + stderr.write(u' base level : %d\n' % storage['base_level']) + stderr.write(u' base dir : %s\n' % storage['base_dir']) + + if runs: + stderr.write(u' runs : %s\n' % list(storage['runs'])) + + if chars: + output = u' Chars : ' + for _ch in storage['chars']: + if _ch != '\n': + output += _ch['ch'] + else: + output += 'C' + stderr.write(output + u'\n') + + output = u' Res. levels : %s\n' % u''.join( + [six.text_type(_ch['level']) for _ch in storage['chars']]) + stderr.write(output) + + _types = [_ch['type'].ljust(3) for _ch in storage['chars']] + + for i in range(3): + if i: + output = u' %s\n' + else: + output = u' Res. types : %s\n' + stderr.write(output % u''.join([_t[i] for _t in _types])) + + +def get_base_level(text, upper_is_rtl=False): + """Get the paragraph base embedding level. Returns 0 for LTR, + 1 for RTL. + + `text` a unicode object. + + Set `upper_is_rtl` to True to treat upper case chars as strong 'R' + for debugging (default: False). + + """ + + base_level = None + + prev_surrogate = False + # P2 + for _ch in text: + # surrogate in case of ucs2 + if _IS_UCS2 and (_SURROGATE_MIN <= ord(_ch) <= _SURROGATE_MAX): + prev_surrogate = _ch + continue + elif prev_surrogate: + _ch = prev_surrogate + _ch + prev_surrogate = False + + # treat upper as RTL ? + if upper_is_rtl and _ch.isupper(): + base_level = 1 + break + + bidi_type = bidirectional(_ch) + + if bidi_type in ('AL', 'R'): + base_level = 1 + break + + elif bidi_type == 'L': + base_level = 0 + break + + # P3 + if base_level is None: + base_level = 0 + + return base_level + + +def get_embedding_levels(text, storage, upper_is_rtl=False, debug=False): + """Get the paragraph base embedding level and direction, + set the storage to the array of chars""" + + prev_surrogate = False + base_level = storage['base_level'] + + # preset the storage's chars + for _ch in text: + if _IS_UCS2 and (_SURROGATE_MIN <= ord(_ch) <= _SURROGATE_MAX): + prev_surrogate = _ch + continue + elif prev_surrogate: + _ch = prev_surrogate + _ch + prev_surrogate = False + + if upper_is_rtl and _ch.isupper(): + bidi_type = 'R' + else: + bidi_type = bidirectional(_ch) + + storage['chars'].append({ + 'ch': _ch, + 'level': base_level, + 'type': bidi_type, + 'orig': bidi_type + }) + if debug: + debug_storage(storage, base_info=True) + + +def explicit_embed_and_overrides(storage, debug=False): + """Apply X1 to X9 rules of the unicode algorithm. + + See http://unicode.org/reports/tr9/#Explicit_Levels_and_Directions + + """ + overflow_counter = almost_overflow_counter = 0 + directional_override = 'N' + levels = deque() + + # X1 + embedding_level = storage['base_level'] + + for _ch in storage['chars']: + bidi_type = _ch['type'] + + level_func, override = X2_X5_MAPPINGS.get(bidi_type, (None, None)) + + if level_func: + # So this is X2 to X5 + # if we've past EXPLICIT_LEVEL_LIMIT, note it and do nothing + + if overflow_counter != 0: + overflow_counter += 1 + continue + + new_level = level_func(embedding_level) + if new_level < EXPLICIT_LEVEL_LIMIT: + levels.append((embedding_level, directional_override)) + embedding_level, directional_override = new_level, override + + elif embedding_level == EXPLICIT_LEVEL_LIMIT - 2: + # The new level is invalid, but a valid level can still be + # achieved if this level is 60 and we encounter an RLE or + # RLO further on. So record that we 'almost' overflowed. + almost_overflow_counter += 1 + + else: + overflow_counter += 1 + else: + # X6 + if bidi_type not in X6_IGNORED: + _ch['level'] = embedding_level + if directional_override != 'N': + _ch['type'] = directional_override + + # X7 + elif bidi_type == 'PDF': + if overflow_counter: + overflow_counter -= 1 + elif almost_overflow_counter and \ + embedding_level != EXPLICIT_LEVEL_LIMIT - 1: + almost_overflow_counter -= 1 + elif levels: + embedding_level, directional_override = levels.pop() + + # X8 + elif bidi_type == 'B': + levels.clear() + overflow_counter = almost_overflow_counter = 0 + embedding_level = _ch['level'] = storage['base_level'] + directional_override = 'N' + + # Removes the explicit embeds and overrides of types + # RLE, LRE, RLO, LRO, PDF, and BN. Adjusts extended chars + # next and prev as well + + # Applies X9. See http://unicode.org/reports/tr9/#X9 + storage['chars'] = [_ch for _ch in storage['chars'] + if _ch['type'] not in X9_REMOVED] + + calc_level_runs(storage) + + if debug: + debug_storage(storage, runs=True) + + +def calc_level_runs(storage): + """Split the storage to run of char types at the same level. + + Applies X10. See http://unicode.org/reports/tr9/#X10 + """ + # run level depends on the higher of the two levels on either side of + # the boundary If the higher level is odd, the type is R; otherwise, + # it is L + + storage['runs'].clear() + chars = storage['chars'] + + # empty string ? + if not chars: + return + + def calc_level_run(b_l, b_r): + return ['L', 'R'][max(b_l, b_r) % 2] + + first_char = chars[0] + + sor = calc_level_run(storage['base_level'], first_char['level']) + eor = None + + run_start = run_length = 0 + + prev_level, prev_type = first_char['level'], first_char['type'] + + for _ch in chars: + curr_level, curr_type = _ch['level'], _ch['type'] + + if curr_level == prev_level: + run_length += 1 + else: + eor = calc_level_run(prev_level, curr_level) + storage['runs'].append({'sor': sor, 'eor': eor, 'start': run_start, + 'type': prev_type, 'length': run_length}) + sor = eor + run_start += run_length + run_length = 1 + + prev_level, prev_type = curr_level, curr_type + + # for the last char/runlevel + eor = calc_level_run(curr_level, storage['base_level']) + storage['runs'].append({'sor': sor, 'eor': eor, 'start': run_start, + 'type': curr_type, 'length': run_length}) + + +def resolve_weak_types(storage, debug=False): + """Resolve weak type rules W1 - W3. + + See: http://unicode.org/reports/tr9/#Resolving_Weak_Types + + """ + + for run in storage['runs']: + prev_strong = prev_type = run['sor'] + start, length = run['start'], run['length'] + chars = storage['chars'][start:start+length] + for _ch in chars: + # W1. Examine each nonspacing mark (NSM) in the level run, and + # change the type of the NSM to the type of the previous character. + # If the NSM is at the start of the level run, it will get the type + # of sor. + bidi_type = _ch['type'] + + if bidi_type == 'NSM': + _ch['type'] = bidi_type = prev_type + + # W2. Search backward from each instance of a European number until + # the first strong type (R, L, AL, or sor) is found. If an AL is + # found, change the type of the European number to Arabic number. + if bidi_type == 'EN' and prev_strong == 'AL': + _ch['type'] = 'AN' + + # update prev_strong if needed + if bidi_type in ('R', 'L', 'AL'): + prev_strong = bidi_type + + prev_type = _ch['type'] + + # W3. Change all ALs to R + for _ch in chars: + if _ch['type'] == 'AL': + _ch['type'] = 'R' + + # W4. A single European separator between two European numbers changes + # to a European number. A single common separator between two numbers of + # the same type changes to that type. + for idx in range(1, len(chars) - 1): + bidi_type = chars[idx]['type'] + prev_type = chars[idx-1]['type'] + next_type = chars[idx+1]['type'] + + if bidi_type == 'ES' and (prev_type == next_type == 'EN'): + chars[idx]['type'] = 'EN' + + if bidi_type == 'CS' and prev_type == next_type and \ + prev_type in ('AN', 'EN'): + chars[idx]['type'] = prev_type + + # W5. A sequence of European terminators adjacent to European numbers + # changes to all European numbers. + for idx in range(len(chars)): + if chars[idx]['type'] == 'EN': + for et_idx in range(idx-1, -1, -1): + if chars[et_idx]['type'] == 'ET': + chars[et_idx]['type'] = 'EN' + else: + break + for et_idx in range(idx+1, len(chars)): + if chars[et_idx]['type'] == 'ET': + chars[et_idx]['type'] = 'EN' + else: + break + + # W6. Otherwise, separators and terminators change to Other Neutral. + for _ch in chars: + if _ch['type'] in ('ET', 'ES', 'CS'): + _ch['type'] = 'ON' + + # W7. Search backward from each instance of a European number until the + # first strong type (R, L, or sor) is found. If an L is found, then + # change the type of the European number to L. + prev_strong = run['sor'] + for _ch in chars: + if _ch['type'] == 'EN' and prev_strong == 'L': + _ch['type'] = 'L' + + if _ch['type'] in ('L', 'R'): + prev_strong = _ch['type'] + + if debug: + debug_storage(storage, runs=True) + + +def resolve_neutral_types(storage, debug): + """Resolving neutral types. Implements N1 and N2 + + See: http://unicode.org/reports/tr9/#Resolving_Neutral_Types + + """ + + for run in storage['runs']: + start, length = run['start'], run['length'] + # use sor and eor + chars = [{'type': run['sor']}] + storage['chars'][start:start+length] +\ + [{'type': run['eor']}] + total_chars = len(chars) + + seq_start = None + for idx in range(total_chars): + _ch = chars[idx] + if _ch['type'] in ('B', 'S', 'WS', 'ON'): + # N1. A sequence of neutrals takes the direction of the + # surrounding strong text if the text on both sides has the same + # direction. European and Arabic numbers act as if they were R + # in terms of their influence on neutrals. Start-of-level-run + # (sor) and end-of-level-run (eor) are used at level run + # boundaries. + if seq_start is None: + seq_start = idx + prev_bidi_type = chars[idx-1]['type'] + else: + if seq_start is not None: + next_bidi_type = chars[idx]['type'] + + if prev_bidi_type in ('AN', 'EN'): + prev_bidi_type = 'R' + + if next_bidi_type in ('AN', 'EN'): + next_bidi_type = 'R' + + for seq_idx in range(seq_start, idx): + if prev_bidi_type == next_bidi_type: + chars[seq_idx]['type'] = prev_bidi_type + else: + # N2. Any remaining neutrals take the embedding + # direction. The embedding direction for the given + # neutral character is derived from its embedding + # level: L if the character is set to an even level, + # and R if the level is odd. + chars[seq_idx]['type'] = \ + _embedding_direction(chars[seq_idx]['level']) + + seq_start = None + + if debug: + debug_storage(storage) + + +def resolve_implicit_levels(storage, debug): + """Resolving implicit levels (I1, I2) + + See: http://unicode.org/reports/tr9/#Resolving_Implicit_Levels + + """ + for run in storage['runs']: + start, length = run['start'], run['length'] + chars = storage['chars'][start:start+length] + + for _ch in chars: + # only those types are allowed at this stage + assert _ch['type'] in ('L', 'R', 'EN', 'AN'),\ + '%s not allowed here' % _ch['type'] + + if _embedding_direction(_ch['level']) == 'L': + # I1. For all characters with an even (left-to-right) embedding + # direction, those of type R go up one level and those of type + # AN or EN go up two levels. + if _ch['type'] == 'R': + _ch['level'] += 1 + elif _ch['type'] != 'L': + _ch['level'] += 2 + else: + # I2. For all characters with an odd (right-to-left) embedding + # direction, those of type L, EN or AN go up one level. + if _ch['type'] != 'R': + _ch['level'] += 1 + + if debug: + debug_storage(storage, runs=True) + + +def reverse_contiguous_sequence(chars, line_start, line_end, highest_level, + lowest_odd_level): + """L2. From the highest level found in the text to the lowest odd + level on each line, including intermediate levels not actually + present in the text, reverse any contiguous sequence of characters + that are at that level or higher. + + """ + for level in range(highest_level, lowest_odd_level-1, -1): + _start = _end = None + + for run_idx in range(line_start, line_end+1): + run_ch = chars[run_idx] + + if run_ch['level'] >= level: + if _start is None: + _start = _end = run_idx + else: + _end = run_idx + else: + if _end is not None: + chars[_start:+_end+1] = \ + reversed(chars[_start:+_end+1]) + _start = _end = None + + # anything remaining ? + if _start is not None: + chars[_start:+_end+1] = \ + reversed(chars[_start:+_end+1]) + + +def reorder_resolved_levels(storage, debug): + """L1 and L2 rules""" + + # Applies L1. + + should_reset = True + chars = storage['chars'] + + for _ch in chars[::-1]: + # L1. On each line, reset the embedding level of the following + # characters to the paragraph embedding level: + if _ch['orig'] in ('B', 'S'): + # 1. Segment separators, + # |