# -*- coding: utf-8 -*- import re from datetime import timedelta, datetime import calendar # Variations of dates that the parser can capture year_variations = ['year', 'years', 'yrs'] day_variations = ['days', 'day'] minute_variations = ['minute', 'minutes', 'mins'] hour_variations = ['hrs', 'hours', 'hour'] week_variations = ['weeks', 'week', 'wks'] month_variations = ['month', 'months'] # Variables used for RegEx Matching day_names = 'monday|tuesday|wednesday|thursday|friday|saturday|sunday' month_names_long = ( 'january|february|march|april|may|june|july|august|september|october|november|december' ) month_names = month_names_long + '|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec' day_nearest_names = 'today|yesterday|tomorrow|tonight|tonite' numbers = ( '(^a(?=\s)|one|two|three|four|five|six|seven|eight|nine|ten|' 'eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|' 'eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|' 'eighty|ninety|hundred|thousand)' ) re_dmy = '(' + '|'.join(day_variations + minute_variations + year_variations + week_variations + month_variations) + ')' re_duration = '(before|after|earlier|later|ago|from\snow)' re_year = '(19|20)\d{2}|^(19|20)\d{2}' re_timeframe = 'this|coming|next|following|previous|last|end\sof\sthe' re_ordinal = 'st|nd|rd|th|first|second|third|fourth|fourth|' + re_timeframe re_time = r'(?P\d{1,2})(\:(?P\d{1,2})|(?Pam|pm))' re_separator = 'of|at|on' # A list tuple of regular expressions / parser fn to match # Start with the widest match and narrow it down because the order of the match in this list matters regex = [ ( re.compile( r''' ( ((?P%s)[,\s]\s*)? #Matches Monday, 12 Jan 2012, 12 Jan 2012 etc (?P\d{1,2}) # Matches a digit (%s)? [-\s] # One or more space (?P%s) # Matches any month name [-\s] # Space (?P%s) # Year ((\s|,\s|\s(%s))?\s*(%s))? ) ''' % (day_names, re_ordinal, month_names, re_year, re_separator, re_time), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: datetime( int(m.group('year') if m.group('year') else base_date.year), HASHMONTHS[m.group('month').strip().lower()], int(m.group('day') if m.group('day') else 1), ) + timedelta(**convert_time_to_hour_minute( m.group('hour'), m.group('minute'), m.group('convention') )) ), ( re.compile( r''' ( ((?P%s)[,\s][-\s]*)? #Matches Monday, Jan 12 2012, Jan 12 2012 etc (?P%s) # Matches any month name [-\s] # Space ((?P\d{1,2})) # Matches a digit (%s)? ([-\s](?P%s))? # Year ((\s|,\s|\s(%s))?\s*(%s))? ) ''' % (day_names, month_names, re_ordinal, re_year, re_separator, re_time), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: datetime( int(m.group('year') if m.group('year') else base_date.year), HASHMONTHS[m.group('month').strip().lower()], int(m.group('day') if m.group('day') else 1) ) + timedelta(**convert_time_to_hour_minute( m.group('hour'), m.group('minute'), m.group('convention') )) ), ( re.compile( r''' ( (?P%s) # Matches any month name [-\s] # One or more space (?P\d{1,2}) # Matches a digit (%s)? [-\s]\s*? (?P%s) # Year ((\s|,\s|\s(%s))?\s*(%s))? ) ''' % (month_names, re_ordinal, re_year, re_separator, re_time), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: datetime( int(m.group('year') if m.group('year') else base_date.year), HASHMONTHS[m.group('month').strip().lower()], int(m.group('day') if m.group('day') else 1), ) + timedelta(**convert_time_to_hour_minute( m.group('hour'), m.group('minute'), m.group('convention') )) ), ( re.compile( r''' ( ((?P\d+|(%s[-\s]?)+)\s)? # Matches any number or string 25 or twenty five (?P%s)s?\s # Matches days, months, years, weeks, minutes (?P%s) # before, after, earlier, later, ago, from now (\s*(?P(%s)))? ((\s|,\s|\s(%s))?\s*(%s))? ) ''' % (numbers, re_dmy, re_duration, day_nearest_names, re_separator, re_time), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: date_from_duration( base_date, m.group('number'), m.group('unit').lower(), m.group('duration').lower(), m.group('base_time') ) + timedelta(**convert_time_to_hour_minute( m.group('hour'), m.group('minute'), m.group('convention') )) ), ( re.compile( r''' ( (?P%s) # First quarter of 2014 \s+ quarter\sof \s+ (?P%s) ) ''' % (re_ordinal, re_year), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: date_from_quarter( base_date, HASHORDINALS[m.group('ordinal').lower()], int(m.group('year') if m.group('year') else base_date.year) ) ), ( re.compile( r''' ( (?P\d+) (?P%s) # 1st January 2012 ((\s|,\s|\s(%s))?\s*)? (?P%s) ([,\s]\s*(?P%s))? ) ''' % (re_ordinal, re_separator, month_names, re_year), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: datetime( int(m.group('year') if m.group('year') else base_date.year), int(HASHMONTHS[m.group('month').lower()] if m.group('month') else 1), int(m.group('ordinal_value') if m.group('ordinal_value') else 1), ) ), ( re.compile( r''' ( (?P%s) \s+ (?P\d+) (?P%s) # January 1st 2012 ([,\s]\s*(?P%s))? ) ''' % (month_names, re_ordinal, re_year), (re.VERBOSE | re.IGNORECASE) ), lambda m, base_date: datetime( int(m.group('year') if m.group('year') else base_date.year), int(HASHMONTHS[m.group('month').lower()] if m.group('month') else 1), int(m.group('ordinal_value') if m.group('ordinal_value') else 1), ) ), ( re.compile( r''' (?P