my xfce4 dotfiles
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

213 lines
7.8 KiB

"""
Update Emoji.py
Refeshes OMZ emoji database based on the latest Unicode spec
"""
import re
import json
spec = open("emoji-data.txt", "r")
# Regexes
# regex_emoji will return, respectively:
# the code points, its type (status), the actual emoji, and its official name
regex_emoji = r"^([\w ].*?\S)\s*;\s*([\w-]+)\s*#\s*(.*?)\s(\S.*).*$"
# regex_group returns the group of subgroup that a line opens
regex_group = r"^#\s*(group|subgroup):\s*(.*)$"
headers = """
# emoji-char-definitions.zsh - Emoji definitions for oh-my-zsh emoji plugin
#
# This file is auto-generated by update_emoji.py. Do not edit it manually.
#
# This contains the definition for:
# $emoji - which maps character names to Unicode characters
# $emoji_flags - maps country names to Unicode flag characters using region
# indicators
# $emoji_mod - maps modifier components to Unicode characters
# $emoji_groups - a single associative array to avoid cluttering up the
# global namespace, and to allow adding additional group
# definitions at run time. The keys are the group names, and
# the values are whitespace-separated lists of emoji
# character names.
# Main emoji
typeset -gAH emoji
# National flags
typeset -gAH emoji_flags
# Combining modifiers
typeset -gAH emoji_mod
# Emoji groups
typeset -gAH emoji_groups
"""
#######
# Adding country codes
#######
# This is the only part of this script that relies on an external library
# (country_converter), and is hence commented out by default.
# You can uncomment it to have country codes added as aliases for flag
# emojis. (By default, when you install this extension, country codes are
# included as aliases, but not if you re-run this script without uncommenting.)
# Warning: country_converter is very verbose, and will print warnings all over
# your terminal.
# import country_converter as coco # pylint: disable=wrong-import-position
# cc = coco.CountryConverter()
# def country_iso(_all_names, _omz_name):
# """ Using the external library country_converter,
# this funciton can detect the ISO2 and ISO3 codes
# of the country. It takes as argument the array
# with all the names of the emoji, and returns that array."""
# omz_no_underscore = re.sub(r'_', r' ', _omz_name)
# iso2 = cc.convert(names=[omz_no_underscore], to='ISO2')
# if iso2 != 'not found':
# _all_names.append(iso2)
# iso3 = cc.convert(names=[omz_no_underscore], to='ISO3')
# _all_names.append(iso3)
# return _all_names
#######
# Helper functions
#######
def code_to_omz(_code_points):
""" Returns a ZSH-compatible Unicode string from the code point(s) """
return r'\U' + r'\U'.join(_code_points.split(' '))
def name_to_omz(_name, _group, _subgroup, _status):
""" Returns a reasonable snake_case name for the emoji. """
def snake_case(_string):
""" Does the regex work of snake_case """
remove_dots = re.sub(r'\.\(\)', r'', _string)
replace_ands = re.sub(r'\&', r'and', remove_dots)
remove_whitespace = re.sub(r'[^\#\*\w]', r'_', replace_ands)
return re.sub(r'__', r'_', remove_whitespace)
shortname = ""
split_at_colon = lambda s: s.split(": ")
# Special treatment by group and subgroup
# If the emoji is a flag, we strip "flag" from its name
if _group == "Flags" and len(split_at_colon(_name)) > 1:
shortname = snake_case(split_at_colon(_name)[1])
else:
shortname = snake_case(_name)
# Special treatment by status
# Enables us to have every emoji combination,
# even the one that are not officially sanctionned
# and are implemeted by, say, only one vendor
if _status == "unqualified":
shortname += "_unqualified"
elif _status == "minimally-qualified":
shortname += "_minimally"
return shortname
def increment_name(_shortname):
""" Increment the short name by 1. If you get, say,
'woman_detective_unqualified', it returns
'woman_detective_unqualified_1', and then
'woman_detective_unqualified_2', etc. """
last_char = _shortname[-1]
if last_char.isdigit():
num = int(last_char)
return _shortname[:-1] + str(num + 1)
return _shortname + "_1"
########
# Going through every line
########
group, subgroup, short_name_buffer = "", "", ""
emoji_database = []
for line in spec:
# First, test if this line opens a group or subgroup
group_match = re.findall(regex_group, line)
if group_match != []:
gr_or_sub, name = group_match[0]
if gr_or_sub == "group":
group = name
elif gr_or_sub == "subgroup":
subgroup = name
continue # Moving on...
# Second, test if this line references one emoji
emoji_match = re.findall(regex_emoji, line)
if emoji_match != []:
code_points, status, emoji, name = emoji_match[0]
omz_codes = code_to_omz(code_points)
omz_name = name_to_omz(name, group, subgroup, status)
# If this emoji has the same shortname as the preceding one
if omz_name in short_name_buffer:
omz_name = increment_name(short_name_buffer)
short_name_buffer = omz_name
emoji_database.append(
[omz_codes, status, emoji, omz_name, group, subgroup])
spec.close()
########
# Write to emoji-char-definitions.zsh
########
# Aliases for emojis are retrieved through the DB of Gemoji
# Retrieved on Aug 9 2019 from the following URL:
# https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json
gemoji_db = open("gemoji_db.json")
j = json.load(gemoji_db)
aliases_map = {entry['emoji']: entry['aliases'] for entry in j}
all_omz_names = [emoji_data[3] for emoji_data in emoji_database]
# Let's begin writing to this file
output = open("emoji-char-definitions.zsh", "w")
output.write(headers)
emoji_groups = {"fruits": "\n", "vehicles": "\n", "hands": "\n",
"people": "\n", "animals": "\n", "faces": "\n",
"flags": "\n"}
# First, write every emoji down
for _omz_codes, _status, _emoji, _omz_name, _group, _subgroup in emoji_database:
# One emoji can be mapped to multiple names (aliases or country codes)
names_for_this_emoji = [_omz_name]
# Variable that indicates in which map the emoji will be located
emoji_map = "emoji"
if _status == "component":
emoji_map = "emoji_mod"
if _group == "Flags":
emoji_map = "emoji_flags"
# Adding country codes (Optional, see above)
# names_for_this_emoji = country_iso(names_for_this_emoji, _omz_name)
# Check if there is an alias available in the Gemoji DB
if _emoji in aliases_map.keys():
for alias in aliases_map[_emoji]:
if alias not in all_omz_names:
names_for_this_emoji.append(alias)
# And now we write to the definitions file
for one_name in names_for_this_emoji:
output.write(f"{emoji_map}[{one_name}]=$'{_omz_codes}'\n")
# Storing the emoji in defined subgroups for the next step
if _status == "fully-qualified":
if _subgroup == "food-fruit":
emoji_groups["fruits"] += f" {_omz_name}\n"
elif "transport-" in _subgroup:
emoji_groups["vehicles"] += f" {_omz_name}\n"
elif "hand-" in _subgroup:
emoji_groups["hands"] += f" {_omz_name}\n"
elif "person-" in _subgroup or _subgroup == "family":
emoji_groups["people"] += f" {_omz_name}\n"
elif "animal-" in _subgroup:
emoji_groups["animals"] += f" {_omz_name}\n"
elif "face-" in _subgroup:
emoji_groups["faces"] += f" {_omz_name}\n"
elif _group == "Flags":
emoji_groups["flags"] += f" {_omz_name}\n"
# Second, write the subgroups to the end of the file
for name, string in emoji_groups.items():
output.write(f'\nemoji_groups[{name}]="{string}"\n')
output.close()