From b6c1221eb131f67b44b6f3b09660fa3be56447bd Mon Sep 17 00:00:00 2001 From: Viktor Ashirov Date: Jan 30 2018 13:08:37 +0000 Subject: Issue 49530 - Add pseudolocalization option for dbgen Bug Description: Pseudolocalization can be used to test matching rules and indexing of UTF-8 strings in various attributes. Fix Description: Add new function 'pseudlocalize' to utils module of lib389 that accepts string and returns pseudolocalized string. For example: "389 Directory Server" will become "③⑧⑨ Ðîŕéçţöŕý Šéŕṽéŕ" Add new optional argument 'pseudol10n' for dbgen module that is False by default. If True, attributes like cn, sn, title, description, etc will be pseudolocalized. https://pagure.io/389-ds-base/issue/49530 Reviewed by: lkrispen, tbordaz, spichugi (Thank you all!) --- diff --git a/src/lib389/lib389/dbgen.py b/src/lib389/lib389/dbgen.py index 447c697..e0099a7 100644 --- a/src/lib389/lib389/dbgen.py +++ b/src/lib389/lib389/dbgen.py @@ -8,6 +8,7 @@ # Replacement of the dbgen.pl utility +from lib389.utils import pseudolocalize import random import os import pwd @@ -95,7 +96,7 @@ roomNumber: 5164 carLicense: 21SJJAG l: {LOCATION} ou: {OU} -mail: {FIRST}.{LAST}@example.com +mail: {UID}@example.com postalAddress: 518, Dept #851, Room#{OU} title: {TITLE} usercertificate;binary:: MIIBvjCCASegAwIBAgIBAjANBgkqhkiG9w0BAQQFADAnMQ8wDQYD @@ -114,48 +115,20 @@ DBGEN_HEADER = """dn: {SUFFIX} objectClass: top objectClass: domain dc: example -aci: (target=ldap:///{SUFFIX})(targetattr=*)(version 3.0; acl "acl1"; allow(write) userdn = "ldap:///self";) +aci: (target=ldap:///{SUFFIX})(targetattr=*)(version 3.0; acl "acl1"; allow(write) userdn = "ldap:///self";) aci: (target=ldap:///{SUFFIX})(targetattr=*)(version 3.0; acl "acl2"; allow(write) groupdn = "ldap:///cn=Directory Administrators, {SUFFIX}";) aci: (target=ldap:///{SUFFIX})(targetattr=*)(version 3.0; acl "acl3"; allow(read, search, compare) userdn = "ldap:///anyone";) -dn: ou=Accounting,{SUFFIX} -objectClass: top -objectClass: organizationalUnit -ou: Accounting - -dn: ou=Product Development,{SUFFIX} -objectClass: top -objectClass: organizationalUnit -ou: Product Development - -dn: ou=Product Testing,{SUFFIX} -objectClass: top -objectClass: organizationalUnit -ou: Product Testing - -dn: ou=Human Resources,{SUFFIX} -objectClass: top -objectClass: organizationalUnit -ou: Human Resources - -dn: ou=Payroll,{SUFFIX} -objectClass: top -objectClass: organizationalUnit -ou: Payroll - -dn: ou=People,{SUFFIX} -objectClass: top -objectClass: organizationalUnit -ou: People +""" -dn: ou=Groups,{SUFFIX} +DBGEN_OU_TEMPLATE = """dn: ou={OU},{SUFFIX} objectClass: top objectClass: organizationalUnit -ou: Groups +ou: {OU} """ -def dbgen(instance, number, ldif_file, suffix): +def dbgen(instance, number, ldif_file, suffix, pseudol10n=False): familyname_file = os.path.join(instance.ds_paths.data_dir, 'dirsrv/data/dbgen-FamilyNames') givename_file = os.path.join(instance.ds_paths.data_dir, 'dirsrv/data/dbgen-GivenNames') familynames = [] @@ -167,6 +140,9 @@ def dbgen(instance, number, ldif_file, suffix): with open(ldif_file, 'w') as output: output.write(DBGEN_HEADER.format(SUFFIX=suffix)) + for ou in DBGEN_OUS: + ou = pseudolocalize(ou) if pseudol10n else ou + output.write(DBGEN_OU_TEMPLATE.format(SUFFIX=suffix, OU=ou)) for i in range(0, number): # Pick a random ou ou = random.choice(DBGEN_OUS) @@ -175,9 +151,16 @@ def dbgen(instance, number, ldif_file, suffix): # How do we subscript from a generator? initials = "%s. %s" % (first[0], last[0]) uid = "%s%s%s" % (first[0], last, i) - dn = "uid=%s,ou=%s,%s" % (uid, ou, suffix) l = random.choice(DBGEN_LOCATIONS) title = "%s %s" % (random.choice(DBGEN_TITLE_LEVELS), random.choice(DBGEN_POSITIONS)) + if pseudol10n: + ou = pseudolocalize(ou) + first = pseudolocalize(first) + last = pseudolocalize(last) + initials = pseudolocalize(initials) + l = pseudolocalize(l) + title = pseudolocalize(title) + dn = "uid=%s,ou=%s,%s" % (uid, ou, suffix) output.write(DBGEN_TEMPLATE.format( DN=dn, UID=uid, @@ -197,5 +180,3 @@ def dbgen(instance, number, ldif_file, suffix): uid = pwd.getpwnam(instance.userid).pw_uid gid = grp.getgrnam(instance.userid).gr_gid os.chown(ldif_file, uid, gid) - - diff --git a/src/lib389/lib389/utils.py b/src/lib389/lib389/utils.py index 70748a6..74a7d21 100644 --- a/src/lib389/lib389/utils.py +++ b/src/lib389/lib389/utils.py @@ -32,6 +32,7 @@ import socket import time import sys import filecmp +import six from socket import getfqdn from ldapurl import LDAPUrl from contextlib import closing @@ -75,6 +76,105 @@ searches = { '(&(objectclass=glue)(objectclass=extensibleobject))', ['dn']) } +# Map table for pseudolocalized strings +_chars = { + " ": six.u("\u2003"), + "!": six.u("\u00a1"), + "\"": six.u("\u2033"), + "#": six.u("\u266f"), + "$": six.u("\u20ac"), + "%": six.u("\u2030"), + "&": six.u("\u214b"), + "'": six.u("\u00b4"), + ")": six.u("}"), + "(": six.u("{"), + "*": six.u("\u204e"), + "+": six.u("\u207a"), + ",": six.u("\u060c"), + "-": six.u("\u2010"), + ".": six.u("\u00b7"), + "/": six.u("\u2044"), + "0": six.u("\u24ea"), + "1": six.u("\u2460"), + "2": six.u("\u2461"), + "3": six.u("\u2462"), + "4": six.u("\u2463"), + "5": six.u("\u2464"), + "6": six.u("\u2465"), + "7": six.u("\u2466"), + "8": six.u("\u2467"), + "9": six.u("\u2468"), + ":": six.u("\u2236"), + ";": six.u("\u204f"), + "<": six.u("\u2264"), + "=": six.u("\u2242"), + ">": six.u("\u2265"), + "?": six.u("\u00bf"), + "@": six.u("\u055e"), + "A": six.u("\u00c5"), + "B": six.u("\u0181"), + "C": six.u("\u00c7"), + "D": six.u("\u00d0"), + "E": six.u("\u00c9"), + "F": six.u("\u0191"), + "G": six.u("\u011c"), + "H": six.u("\u0124"), + "I": six.u("\u00ce"), + "J": six.u("\u0134"), + "K": six.u("\u0136"), + "L": six.u("\u013b"), + "M": six.u("\u1e40"), + "N": six.u("\u00d1"), + "O": six.u("\u00d6"), + "P": six.u("\u00de"), + "Q": six.u("\u01ea"), + "R": six.u("\u0154"), + "S": six.u("\u0160"), + "T": six.u("\u0162"), + "U": six.u("\u00db"), + "V": six.u("\u1e7c"), + "W": six.u("\u0174"), + "X": six.u("\u1e8a"), + "Y": six.u("\u00dd"), + "Z": six.u("\u017d"), + "[": six.u("\u2045"), + "\\": six.u("\u2216"), + "]": six.u("\u2046"), + "^": six.u("\u02c4"), + "_": six.u("\u203f"), + "`": six.u("\u2035"), + "a": six.u("\u00e5"), + "b": six.u("\u0180"), + "c": six.u("\u00e7"), + "d": six.u("\u00f0"), + "e": six.u("\u00e9"), + "f": six.u("\u0192"), + "g": six.u("\u011d"), + "h": six.u("\u0125"), + "i": six.u("\u00ee"), + "j": six.u("\u0135"), + "k": six.u("\u0137"), + "l": six.u("\u013c"), + "m": six.u("\u0271"), + "n": six.u("\u00f1"), + "o": six.u("\u00f6"), + "p": six.u("\u00fe"), + "q": six.u("\u01eb"), + "r": six.u("\u0155"), + "s": six.u("\u0161"), + "t": six.u("\u0163"), + "u": six.u("\u00fb"), + "v": six.u("\u1e7d"), + "w": six.u("\u0175"), + "x": six.u("\u1e8b"), + "y": six.u("\u00fd"), + "z": six.u("\u017e"), + "{": six.u("("), + "}": six.u(")"), + "|": six.u("\u00a6"), + "~": six.u("\u02de"), +} + # # Utilities # @@ -871,3 +971,13 @@ def ensure_dict_str(val): else: retdict[k] = ensure_str(val[k]) return retdict + + +def pseudolocalize(string): + pseudo_string = six.u("") + for char in string: + try: + pseudo_string += _chars[char] + except KeyError: + pseudo_string += char + return pseudo_string