Skip to content

Commit ce61c0f

Browse files
committed
refactor capitalization and regex config (derek73#1)
Just noticed they are actually the same kind of thing, so create a dotdict to handle their config
1 parent b8db4dd commit ce61c0f

4 files changed

Lines changed: 134 additions & 75 deletions

File tree

README.rst

Lines changed: 105 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,12 @@ Usage
111111
u'Jason Alexander'
112112
>>> name
113113
<HumanName : [
114-
Title: ''
115-
First: 'Juan'
116-
Middle: 'Jason Alexander'
117-
Last: 'Velasquez y Garcia'
118-
Suffix: 'Jr.'
119-
Nickname: ''
114+
title: ''
115+
first: 'Juan'
116+
middle: 'Jason Alexander'
117+
last: 'Velasquez y Garcia'
118+
suffix: 'Jr.'
119+
nickname: ''
120120
]>
121121
>>> name = HumanName("Dr. Juan Q. Xavier de la Vega III")
122122
>>> name2 = HumanName("de la vega, dr. juan Q. xavier III")
@@ -177,16 +177,16 @@ included in the equals test since they do not signify a different
177177
person.
178178

179179

180-
Customizing the Parser with Your Own Constants
181-
----------------------------------------------
180+
Customizing the Parser with Your Own Configuration
181+
--------------------------------------------------
182182

183183
Recognition of titles, prefixes, suffixes and conjunctions is provided
184184
by matching the lower case characters of a name piece with pre-defined
185185
sets located in nameparser.config_. Since everyone's data are a
186186
little bit different, you can easily adjust these predefined sets to
187187
help fine tune the parser for your dataset.
188188

189-
These constants are set at the module level using nameparser.config_.
189+
These constants are defined in the nameparser.config_ module.
190190

191191
.. _nameparser.config: https://github.com/derek73/python-nameparser/tree/master/nameparser/config
192192

@@ -237,24 +237,24 @@ that "Hon" can be parsed as a first name.
237237
>>> hn = HumanName("Hon Solo")
238238
>>> hn
239239
<HumanName : [
240-
Title: 'Hon'
241-
First: ''
242-
Middle: ''
243-
Last: 'Solo'
244-
Suffix: ''
245-
Nickname: ''
240+
title: 'Hon'
241+
first: ''
242+
middle: ''
243+
last: 'Solo'
244+
suffix: ''
245+
nickname: ''
246246
]>
247247
>>> from nameparser.config import constants
248248
>>> constants.titles.remove('hon')
249249
>>> hn = HumanName("Hon Solo")
250250
>>> hn
251251
<HumanName : [
252-
Title: ''
253-
First: 'Hon'
254-
Middle: ''
255-
Last: 'Solo'
256-
Suffix: ''
257-
Nickname: ''
252+
title: ''
253+
first: 'Hon'
254+
middle: ''
255+
last: 'Solo'
256+
suffix: ''
257+
nickname: ''
258258
]>
259259

260260

@@ -273,12 +273,12 @@ methods and each string will be added or removed.
273273
>>> hn = HumanName("Assoc Dean of Chemistry Robert Johns")
274274
>>> hn
275275
<HumanName : [
276-
Title: 'Assoc Dean of Chemistry'
277-
First: 'Robert'
278-
Middle: ''
279-
Last: 'Johns'
280-
Suffix: ''
281-
Nickname: ''
276+
title: 'Assoc Dean of Chemistry'
277+
first: 'Robert'
278+
middle: ''
279+
last: 'Johns'
280+
suffix: ''
281+
nickname: ''
282282
]>
283283

284284

@@ -292,26 +292,27 @@ the config on one instance could modify the behavior of another instance.
292292

293293
::
294294

295+
>>> from nameparser import HumanName
295296
>>> hn = HumanName("Dean Robert Johns")
296297
>>> hn.C.titles.add('dean')
297298
>>> hn
298299
<HumanName : [
299-
Title: 'Dean'
300-
First: 'Robert'
301-
Middle: ''
302-
Last: 'Johns'
303-
Suffix: ''
304-
Nickname: ''
300+
title: 'Dean'
301+
first: 'Robert'
302+
middle: ''
303+
last: 'Johns'
304+
suffix: ''
305+
nickname: ''
305306
]>
306307
>>> hn2 = HumanName("Dean Robert Johns")
307308
>>> hn2
308309
<HumanName : [
309-
Title: 'Dean'
310-
First: 'Robert'
311-
Middle: ''
312-
Last: 'Johns'
313-
Suffix: ''
314-
Nickname: ''
310+
title: 'Dean'
311+
first: 'Robert'
312+
middle: ''
313+
last: 'Johns'
314+
suffix: ''
315+
nickname: ''
315316
]>
316317

317318

@@ -323,26 +324,73 @@ reference to the module-level config values with the behavior described above.
323324

324325
::
325326

327+
>>> from nameparser import HumanName
326328
>>> hn = HumanName("Dean Robert Johns", None)
327329
>>> hn.C.titles.add('dean')
328330
>>> hn
329331
<HumanName : [
330-
Title: 'Dean'
331-
First: 'Robert'
332-
Middle: ''
333-
Last: 'Johns'
334-
Suffix: ''
335-
Nickname: ''
332+
title: 'Dean'
333+
first: 'Robert'
334+
middle: ''
335+
last: 'Johns'
336+
suffix: ''
337+
nickname: ''
336338
]>
339+
>>> hn.has_own_config
340+
True
337341
>>> hn2 = HumanName("Dean Robert Johns")
338342
>>> hn2
339343
<HumanName : [
340-
Title: ''
341-
First: 'Dean'
342-
Middle: 'Robert'
343-
Last: 'Johns'
344-
Suffix: ''
345-
Nickname: ''
344+
title: ''
345+
first: 'Dean'
346+
middle: 'Robert'
347+
last: 'Johns'
348+
suffix: ''
349+
nickname: ''
350+
]>
351+
>>> hn2.has_own_config
352+
False
353+
354+
355+
Refreshing the Parse
356+
++++++++++++++++++++
357+
358+
The full name is parsed upon assignment to the ``full_name`` attribute or
359+
instantiation. Sometimes after making changes to configuration or other inner
360+
data after assigning the full name, the name will need to be re-parsed with
361+
the ``parse_full_name()`` method before you see those changes with ``repr()``.
362+
363+
364+
>>> from nameparser import HumanName
365+
>>> hn = HumanName("Dean Robert Johns")
366+
>>> hn
367+
<HumanName : [
368+
title: 'Dean'
369+
first: 'Robert'
370+
middle: ''
371+
last: 'Johns'
372+
suffix: ''
373+
nickname: ''
374+
]>
375+
>>> hn.C.titles.add('dean')
376+
>>> hn
377+
<HumanName : [
378+
title: 'Dean'
379+
first: 'Robert'
380+
middle: ''
381+
last: 'Johns'
382+
suffix: ''
383+
nickname: ''
384+
]>
385+
>>> hn.parse_full_name()
386+
>>> hn
387+
<HumanName : [
388+
title: ''
389+
first: 'Dean'
390+
middle: 'Robert'
391+
last: 'Johns'
392+
suffix: ''
393+
nickname: ''
346394
]>
347395

348396

@@ -370,11 +418,12 @@ name will be parsed.
370418

371419
$ ./tests.py "Secretary of State Hillary Rodham-Clinton"
372420
<HumanName : [
373-
Title: 'Secretary of State'
374-
First: 'Hillary'
375-
Middle: ''
376-
Last: 'Rodham-Clinton'
377-
Suffix: ''
421+
title: 'Secretary of State'
422+
first: 'Hillary'
423+
middle: ''
424+
last: 'Rodham-Clinton'
425+
suffix: ''
426+
nickname: ''
378427
]>
379428
380429

nameparser/config/__init__.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,15 @@
1111
from nameparser.config.titles import FIRST_NAME_TITLES
1212
from nameparser.config.regexes import REGEXES
1313

14-
class Manager(collections.Set):
14+
class SetManager(collections.Set):
15+
'''
16+
Easily add and remove config variables per module or instance.
17+
18+
Only special functionality beyond that provided by set() is
19+
to normalize constants for comparison (lower case, no periods)
20+
when they are add()ed and remove()d and allow passing multiple
21+
string arguments to the add() and remove() methods.
22+
'''
1523
def __init__(self, elements):
1624
self.elements = set(elements)
1725

@@ -48,28 +56,33 @@ def remove(self, *strings):
4856
return self.elements
4957

5058

51-
class Regexes(object):
52-
def __init__(self):
53-
for name, re in REGEXES:
54-
setattr(self, name, re)
59+
class TupleManager(dict):
60+
'''
61+
aka, dotdict. Change the tuple into a slightly more friendly dictionary with
62+
dot.notation access.
63+
'''
64+
def __getattr__(self, attr):
65+
return self.get(attr)
66+
__setattr__= dict.__setitem__
67+
__delattr__= dict.__delitem__
5568

5669

5770
class Constants(object):
5871

5972
def __init__(self):
60-
self.prefixes = Manager(PREFIXES)
61-
self.suffixes = Manager(SUFFIXES)
62-
self.titles = Manager(TITLES)
63-
self.first_name_titles = Manager(FIRST_NAME_TITLES)
64-
self.conjunctions = Manager(CONJUNCTIONS)
65-
self.RE = Regexes()
73+
self.prefixes = SetManager(PREFIXES)
74+
self.suffixes = SetManager(SUFFIXES)
75+
self.titles = SetManager(TITLES)
76+
self.first_name_titles = SetManager(FIRST_NAME_TITLES)
77+
self.conjunctions = SetManager(CONJUNCTIONS)
78+
self.capitalization_exceptions = TupleManager(CAPITALIZATION_EXCEPTIONS)
79+
self.RE = TupleManager(REGEXES)
6680

6781
@property
6882
def suffixes_prefixes_titles(self):
6983
return self.prefixes | self.suffixes | self.titles
7084

71-
# these arent strings so Manager isn't helpful
72-
capitalization_exceptions = CAPITALIZATION_EXCEPTIONS
73-
7485

86+
# provide a common instance for the module to share
87+
# so its adjust configuration for the entire module.
7588
constants = Constants()

nameparser/parser.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from nameparser.util import lc
88
from nameparser.config import constants
99
from nameparser.config import Constants
10-
from nameparser.config import regexes
11-
from nameparser.config import Regexes
1210

1311
# http://code.google.com/p/python-nameparser/issues/detail?id=10
1412
log = logging.getLogger('HumanName')
@@ -103,7 +101,7 @@ def __str__(self):
103101
def __repr__(self):
104102
if self.unparsable:
105103
return "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__,}
106-
return "<%(class)s : [\n\tTitle: '%(title)s' \n\tFirst: '%(first)s' \n\tMiddle: '%(middle)s' \n\tLast: '%(last)s' \n\tSuffix: '%(suffix)s'\n\tNickname: '%(nickname)s'\n]>" % {
104+
return "<%(class)s : [\n\ttitle: '%(title)s' \n\tfirst: '%(first)s' \n\tmiddle: '%(middle)s' \n\tlast: '%(last)s' \n\tsuffix: '%(suffix)s'\n\tnickname: '%(nickname)s'\n]>" % {
107105
'class': self.__class__.__name__,
108106
'title': self.title,
109107
'first': self.first,
@@ -484,7 +482,7 @@ def find_p(p):
484482
def cap_word(self, word):
485483
if self.is_prefix(word) or self.is_conjunction(word):
486484
return lc(word)
487-
exceptions = dict(self.C.capitalization_exceptions)
485+
exceptions = self.C.capitalization_exceptions
488486
if word in exceptions:
489487
return exceptions[word]
490488
mac_match = self.C.RE.mac.match(word)

tests.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from nameparser.util import u
1515
from nameparser.config import Constants
1616
from nameparser.config import constants
17-
from nameparser.config import Regexes
1817

1918

2019

0 commit comments

Comments
 (0)