#115 Convert wiki/RST format to Markdown for Trac Ticket Migration
Opened 7 years ago by mreynolds. Modified 7 years ago
mreynolds/pagure-importer issue91  into  master

@@ -11,6 +11,19 @@ 

      get_close_status, is_image, issue_to_json, get_secure_filename)

  from pagure_importer.utils.models import User, Issue, IssueComment

  

+ wikilink_pattern = re.compile('\[http(.*)\]')

+ wikilink_extract = re.compile('\[(.*)\]')

+ wikiheading1_pattern = re.compile('^= (.*) =$')

+ wikiheading2_pattern = re.compile('^== (.*) ==$')

+ wikiheading3_pattern = re.compile('^=== (.*) ===$')

+ strikethrough_pattern = re.compile('~~(.*)~~')

+ # Trac priorities

+ priority_map = {'blocker': 1,

+                 'critical': 2,

+                 'major': 3,

+                 'minor': 4,

+                 'trivial': 5}

+ 

  

  def to_timestamp(tm):

      ''' Convert to timestamp which can be jsonified '''
@@ -21,11 +34,91 @@ 

      return ts

  

  

+ def strip_wikilink(content):

+     ''' Need to remove wiki link format from custom fields.  They come in a

+     variety of forms that can be comma or whitespace separated.  They can also

+     include link names which must also be removed.

+ 

+         [https://bugzilla.redhat.com/show_bug.cgi?id=772777]

+         [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777]

+         [https://bugzilla.com/6666666 6666666]

+     '''

+ 

+     links = []

+     if wikilink_pattern.search(content):

+         # Looks like we have a link in here

+         links = []

+         mylist = re.findall(r'\[([^]]*)\]', content)

+         for i in mylist:

+             links.append(i.split(' ', 1)[0])

+         return ', '.join(links)

+     else:

+         return content

+ 

+ 

+ def convert_headers(line):

+     ''' Convert wikiformat headers

+     '''

+     level_count = 1

+     for header in [wikiheading1_pattern,

+                    wikiheading2_pattern,

+                    wikiheading3_pattern]:

+         try:

+             level = header.search(line).group(1)

+             if level:

+                 line = "%s %s" % ('#' * level_count, level)

+                 break  # No need to check other heading levels

+         except:

+             # Try the next heading level

+             pass

+         level_count += 1

+ 

+     return line

+ 

+ 

+ def convert_wikilinks(line):

+     ''' Convert wikiformat links

+     '''

+     if wikilink_pattern.search(line):

+         try:

+             result = wikilink_extract.search(line).group(1)

+             if result:

+                 parts = result.split(' ', 1)

+                 if len(parts) == 1:

+                     mdlink = '[%s](%s)' % (parts[0], parts[0])

+                 elif len(parts) == 2:

+                     mdlink = '[%s](%s)' % (parts[1], parts[0])

+                 line = line.replace('[' + result + ']', mdlink)

+         except:

+             # Not a link, not a problem

+             pass

+ 

+     return line

+ 

+ 

+ def convert_strike(line):

+     ''' Convert wikiformat striked text

+     '''

+     striked_result = strikethrough_pattern.search(line)

+     if striked_result:

+         try:

+             striked_text = striked_result.group(1)

+             if striked_text:

+                 orig_text = '~~%s~~' % striked_text

+                 new_text = '<s>%s</s>' % striked_text

+                 line = line.replace(orig_text, new_text)

+         except:

+             # Not striked

+             pass

+     return line

+ 

+ 

  class TracImporter(object):

      ''' Pagure importer for trac instance '''

  

      def __init__(self, project_url, username, password, offset, repo_name,

-                  repo_folder, nopush, fasclient=None, tags=False, private=False):

+                  repo_folder, nopush, fasclient=None, tags=False,

+                  private=False):

          ''' Instantiate a TracImporter object '''

          self.username = username

          self.password = password
@@ -84,7 +177,12 @@ 

          all_ticket_fields = self.request('ticket.getTicketFields')

          custom_fields = []

          for field in all_ticket_fields:

-             if field.get('custom') is True:

+             if (field.get('custom') is True or

+                     field.get('label').lower() == "component" or

+                     field.get('label').lower() == "keywords" or

+                     field.get('label').lower() == "version" or

+                     field.get('label').lower() == "type" or

+                     field.get('label').lower() == "cc"):

                  current_field = {}

                  current_field['name'] = field['name']

                  key_type = 'text'
@@ -92,8 +190,85 @@ 

                      key_type = 'boolean'

                  current_field['key_type'] = key_type

                  custom_fields.append(current_field)

+ 

          return custom_fields

  

+     def WikiToMD(self, content):

+         ''' Convert wiki/RST format to Markdown.  Code blocks, bold/italics,

+         wiki links, lists, striked text, and headers. '''

+ 

+         code_block = False

+         in_list = False

+         nested_level = 0

+         prev_indent = 0

+         new_content = ""

+ 

+         for line in content.split('\n'):

+             line = line.replace("\r", "")

+             if "{{{" in line:

+                 code_block = True

+                 line = line.replace("{{{", "```")

+             if "}}}" in line:

+                 code_block = False

+                 line = line.replace("}}}", "```")

+             if not code_block:

+                 #

+                 # Convert bullet lists.  The start and end of a list needs

+                 # an empty line.  wikiformat uses both '*' and '-' for its

+                 # lists.  However, markdown only supports '-'.

+                 #

+                 if line.startswith('* '):

+                     if not in_list:

+                         new_content = "%s\n" % (new_content)

+                     in_list = True

+                     line = line[1:]

+                     line = '-%s' % (line)

+                 elif line.startswith('- '):

+                     # No need to modify the line, just add the new line

+                     if not in_list:

+                         new_content = "%s\n" % (new_content)

+                     in_list = True

+                 elif line.startswith(' '):

+                     # Check for nested lists

+                     nested_line = line.lstrip(' ')

+                     if nested_line.startswith('* ') or \

+                        nested_line.startswith('- '):

+                         # Adjust the nested list level as needed

+                         indent = len(line) - len(nested_line)

+                         if indent > prev_indent:

+                             nested_level += 1

+                         elif indent < prev_indent:

+                             nested_level -= 1

+                         prev_indent = indent

+ 

+                         # Set the proper indentation for markdown

+                         line = ('%s-%s' % ('    ' * nested_level,

+                                            nested_line[1:]))

+                 else:

+                     if in_list:

+                         # Add the closing empty line

+                         new_content = "%s\n" % (new_content)

+                     in_list = False

+                     nested_level = 0

+                     prev_indent = 0

+ 

+                 # Convert headers

+                 line = convert_headers(line)

+ 

+                 # Convert wiki links

+                 line = convert_wikilinks(line)

+ 

+                 # Convert striked through text

+                 line = convert_strike(line)

+ 

+                 # Convert bold and italic text (do this last)

+                 line = line.replace("'''", "**")  # Convert bold text

+                 line = line.replace("''", "*")  # Convert italic text

+ 

+             new_content = "%s%s\n" % (new_content, line)

+ 

+         return new_content

+ 

      def import_issues(self, repo_name, trac_query='max=0&order=id'):

          ''' Queries the trac instance via its jsonrpc API and convert the

          tickets into JSON blob to be imported into pagure's ticket git repo.
@@ -103,7 +278,6 @@ 

          :kwarg trac_query: the query to call trac with in order to retrieve

              all the tickets.

              Defaults to ``max=0&order=id``

- 

          '''

  

          tickets_id = self.request('ticket.query', trac_query)
@@ -130,9 +304,14 @@ 

                          else:

                              comments[key].comment += ('\n[%s](%s)' %

                                                        (attach_name, url))

+                 else:

+                     # Convert any RST formatting to Markdown

+                     comments[key].comment = \

+                         self.WikiToMD(comments[key].comment)

                  pagure_issue.comments.append(comments[key].to_json())

              click.echo('Updated ' + repo_name + ' with issue :' +

                         str(ticket_id) + '/' + str(tickets_id[-1]))

+ 

              issue_to_json(pagure_issue, self.clone_repo_location)

  

      def get_custom_fields_of_ticket(self, trac_ticket):
@@ -149,6 +328,8 @@ 

                  pagure_field['value'] = trac_ticket.get(

                                      pagure_field['name'], "").strip()

                  if pagure_field['value']:

+                     pagure_field['value'] = \

+                         strip_wikilink(pagure_field['value'])

                      pagure_fields.append(pagure_field)

          return pagure_fields

  
@@ -180,6 +361,8 @@ 

          pagure_issue_content = trac_ticket['description']

          if pagure_issue_content == '':

              pagure_issue_content = '#No Description Provided'

+         else:

+             pagure_issue_content = self.WikiToMD(pagure_issue_content)

  

          issue_status, close_status = self.get_ticket_status(trac_ticket)

  
@@ -207,6 +390,12 @@ 

          if 'milestone' in trac_ticket and trac_ticket['milestone'] != '':

              pagure_milestone = trac_ticket['milestone']

  

+         # The priority of the issue

+         priority = None

+         if 'priority' in trac_ticket and trac_ticket['priority'] != '' and \

+            trac_ticket['priority'] in priority_map:

+             priority = priority_map[trac_ticket['priority']]

+ 

          # Issue tags

          pagure_issue_tags = []

          if self.tags:
@@ -227,6 +416,7 @@ 

              title=pagure_issue_title,

              content=pagure_issue_content,

              status=issue_status,

+             priority=priority,

              close_status=close_status,

              date_created=pagure_issue_created_at,

              user=pagure_issue_user.to_json(),

@@ -9,12 +9,13 @@ 

              self, id, title, content,

              status, date_created, user, private, attachment, tags,

              depends, blocks, assignee, close_status, comments=None,

-             milestone=None, custom_fields=None):

+             milestone=None, custom_fields=None, priority=None):

  

          self.id = id

          self.title = title

          self.content = content

          self.status = status

+         self.priority = priority

          self.close_status = close_status

          self.date_created = date_created

          self.user = user
@@ -42,6 +43,7 @@ 

              'date_created': self.date_created,

              'user': self.user,

              'private': self.private,

+             'priority': self.priority,

              'tags': self.tags,

              'depends': self.depends,

              'blocks': self.blocks,

This patch convert a majority of wiki/RST formats to Markdown. It also strips the wikilink format from custom fields, and converts the trac priority to the Pagure priority format.

1 new commit added

  • Fix pep8 errors
7 years ago

This method is around 120 lines of code, which makes it challenging to read. I recommend breaking it into smaller helper methods so that each logical component is easier to understand, and most importantly, easier to test.

Would you not want to replace \r with \n?

This change looks reasonable to me. In addition to my other recommendations, I suggest writing automated tests for this code. It would help prove that it works as expected, and it actually seems like it could be a fun set of tests too.

Actually, in Trac I always see this pattern: "\r\n". I never see "\r" by itself

rebased

7 years ago

This is really helpful; thanks, @mreynolds.

It could also detect commit hashes and generate code links like Trac does. I used the following (which you are welcome to take in the event that there isn't a better way to do this):

def convert_hashes(line):
    hashes = re.findall(r"\b[0-9a-f]{40}\b", line)
    for h in hashes:
        line = line.replace(h, "[%s](/gssproxy/c/%s)" % (h[:7], h))
    return line

(Of course, "gssproxy" would need to be replaced with the correct project name.)