#7 Rewrite parse_cid to be a lot more sophisticated
Closed 6 years ago by adamwill. Opened 7 years ago by adamwill.

file modified
+104 -18
@@ -403,7 +403,7 @@ 

      else:

          return ''

  

- def parse_cid(origcid, dist=False):

+ def parse_cid(origcid, dist=False, dic=False):

      """If dist is false-y, get (release, date, type, respin) values

      from a Pungi 4 compose id. If dist is truth-y, get (dist, release,

      date, type, respin) values from a Pungi 4 compose id.
@@ -413,26 +413,112 @@ 

      if not cid:

          raise ValueError("{0} does not appear to be a valid Pungi 4 Fedora "

                           "compose ID!".format(origcid))

-     # The dist can have '-' in it, of course, because life is awful

-     # FIXME: This is very stupid and will break on layered products,

-     # which have a lot more - in them. It can theoretically break in

-     # several other cases, but I don't know if Fedora is currently

-     # hitting any of those. I'm currently thinking about how to deal

-     # with this. See:

+     # Writing a truly generic compose ID parser is pretty tricky. See

      # https://github.com/release-engineering/productmd/issues/77

-     # We may just have to rejig things to require the full compose

-     # metadata and give up on parsing compose IDs at all. That would

-     # be a lot of work for fedfind. A somewhat uglier alternative for

-     # fedfind's purposes would be to give it special knowledge of the

-     # Fedora product shortnames with - in them, or a generic handler

-     # for 'Fedora-(FOO)' or something.

-     (gotdist, release, _) = cid.rsplit('-', 2)

-     release = release.lower()

+     # for some background. This is my best effort. I'm trying to get

+     # it upstreamed currently.

+ 

+     # init values

+     mainshort = ''

+     mainver = ''

+     maintype = ''

+     baseshort = ''

+     basever = ''

+     basetype = ''

+     variant = ''

+ 

+     # find date, type, respin

      (date, typ, respin) = productmd.composeinfo.get_date_type_respin(cid)

-     if dist:

-         return (gotdist, release, date, typ, respin)

+     # now split on the date, we only care about what comes before

+     part = cid.split(date)[0][:-1]

+ 

+     # Handle "HACK: there are 2 RHEL 5 composes"

+     if part.endswith("-Client"):

+         variant = "Client"

+         part = part[:-len('-Client')]

+     elif part.endswith("-Server"):

+         variant = "Server"

+         part = part[:-len('-Server')]

+ 

+     # Next part back must be either a version type suffix or a version

+     # we don't know yet if this is the main version or the base

+     # version for a layered product

+     somever = part.split('-')[-1]

+     # See if it's a type_suffix

+     if somever.lower() in productmd.common.RELEASE_TYPES:

+         sometype = somever

+         part = '-'.join(part.split('-')[:-1])

+         somever = part.split('-')[-1]

      else:

-         return (release, date, typ, respin)

+         sometype = ''

+ 

+     # strip off the 'somever' we just processed

+     part = '-'.join(part.split('-')[:-1])

+ 

+     # what remains is either:

+     # mainshort

+     # or:

+     # mainshort-mainver-baseshort

+     # But this is where things get fun, because sadly, both mainshort

+     # and baseshort could have - in them and mainver could have a type

+     # suffix. So, life is fun. Working in our favour, shortnames are

+     # *not* allowed to contain digits. Let's see if we can spot what

+     # looks like a '-mainver'- component...

+     elems = part.split('-')

+     for (idx, cand) in enumerate(elems):

+         # can't be the first or the last

+         if idx == 0 or idx == len(elems) - 1:

+             continue

+         # now see if the cand looks like a version

+         # FIXME: we should use RELEASE_VERSION_RE here, but not until

+         # it enforces this stricter rule. We have decided that only

+         # 'Rawhide' or anything that looks like a version number -

+         # starts with a digit, can have any number of subsequent

+         # digits and/or digit groups separated with single periods -

+         # is a valid version.

+         verreg = re.compile(r'(Rawhide|[0-9]+(\.?[0-9]+)*)$')

+         match = verreg.match(cand)

+         if match:

+             mainver = match.group(1)

+             # check for a type suffix

+             nextel = elems[idx+1]

+             if nextel.lower() in productmd.common.RELEASE_TYPES:

+                 maintype = nextel

+                 elems.pop(idx+1)

+             basever = somever

+             basetype = sometype

+             mainshort = '-'.join(elems[:idx])

+             baseshort = '-'.join(elems[idx+1:])

+ 

+     # if we didn't establish a mainver above, we must not be layered,

+     # and what remains is just mainshort, and somever is mainver

+     if not mainshort:

+         mainshort = '-'.join(elems)

+         mainver = somever

+         maintype = sometype

+ 

+     if not maintype:

+         maintype = 'ga'

+     if basever and not basetype:

+         basetype = 'ga'

+ 

+     if dic:

+         return {

+             'short': mainshort,

+             'version': mainver,

+             'version_type': maintype,

+             'base_short': baseshort,

+             'base_version': basever,

+             'base_type': basetype,

+             'variant': variant,

+             'date': date,

+             'compose_type': typ,

+             'respin': respin

+         }

+     elif dist:

+         return (mainshort, mainver.lower(), date, typ, respin)

+     else:

+         return (mainver.lower(), date, typ, respin)

  

  def cid_from_label(release, label, short='fedora'):

      """Get the compose ID for a compose by label. Must also specify

file modified
+1 -1
@@ -1826,7 +1826,7 @@ 

          # if we got this far, we already know we exist

          self._exists = True

          # custom fake CID for these releases

-         self._fakecid = "FedoraRespin-{0}-{1}.0".format(self.release, self.compose)

+         self._fakecid = "FedoraRespin-{0}-updates-{1}.0".format(self.release, self.compose)

  

      def __repr__(self):

          return "{0}()".format(self.__class__)

file modified
+79 -16
@@ -300,6 +300,8 @@ 

          assert res == 'Fedora-Atomic-24-20161004.1'

          res = fedfind.helpers.find_cid('FedoraRespin-24-20161206.0')

          assert res == 'FedoraRespin-24-20161206.0'

+         res = fedfind.helpers.find_cid('FedoraRespin-24-updates-20161206.0')

+         assert res == 'FedoraRespin-24-updates-20161206.0'

          # a few 'close but no cigars' just for safety...

          res = fedfind.helpers.find_cid('https://foo.com/compose/Fedora-24-20160314.t.u/compose')

          assert res == ''
@@ -308,33 +310,94 @@ 

          res = fedfind.helpers.find_cid('https://foo.com/compose/Fedora/24-20160314.n.0/compose')

          assert res == ''

  

-     def test_parse_cid(self):

+     @pytest.mark.parametrize(

+         ("cid", "short", "version", "version_type", "base_short", "base_version", "base_type",

+          "variant", "date", "compose_type", "respin"),

+         [

+             ('Fedora-24-20160314.n.0', 'Fedora', '24', 'ga', '', '', '', '', '20160314', 'nightly',

+              0),

+             ('Fedora-24-20160314.0', 'Fedora', '24', 'ga', '', '', '', '', '20160314',

+              'production', 0),

+             ('Fedora-24-20160314.t.0', 'Fedora', '24', 'ga', '', '', '', '', '20160314', 'test', 0),

+             ('Fedora-Rawhide-20160314.n.1', 'Fedora', 'Rawhide', 'ga', '', '', '', '', '20160314',

+              'nightly', 1),

+             ('Fedora-Atomic-24-20160628.0', 'Fedora-Atomic', '24', 'ga', '', '', '', '',

+              '20160628', 'production', 0),

+             ('Fedora-20-19700101.0', 'Fedora', '20', 'ga', '', '', '', '', '19700101',

+              'production', 0),

+             ('FedoraRespin-24-updates-20161206.0', 'FedoraRespin', '24', 'updates', '', '', '', '',

+              '20161206', 'production', 0),

+             # older fake Respin CID

+             ('FedoraRespin-24-20161206.0', 'FedoraRespin', '24', 'ga', '', '', '', '', '20161206',

+              'production', 0),

+             # taken from productmd test_composeinfo

+             ('Fedora-22-20160622.0', 'Fedora', '22', 'ga', '', '', '', '', '20160622', 'production',

+              0),

+             # FIXME: 'ci' is a valid compose type now but get_date_type_respin doesn't handle it

+             # 'Fedora-22-20160622.ci.0'

+             ('Fedora-22-updates-20160622.0', 'Fedora', '22', 'updates', '', '', '', '', '20160622',

+              'production', 0),

+             ('Fedora-22-updates-20160622.n.0', 'Fedora', '22', 'updates', '', '', '', '',

+              '20160622', 'nightly', 0),

+             ('Fedora-22-BASE-3-20160622.0', 'Fedora', '22', 'ga', 'BASE', '3', 'ga', '', '20160622',

+              'production', 0),

+             ('Fedora-22-BASE-3-20160622.n.0', 'Fedora', '22', 'ga', 'BASE', '3', 'ga', '',

+              '20160622', 'nightly', 0),

+             ('Fedora-22-updates-BASE-3-20160622.0', 'Fedora', '22', 'updates', 'BASE', '3', 'ga',

+              '', '20160622', 'production', 0),

+             ('Fedora-22-updates-BASE-3-20160622.n.0', 'Fedora', '22', 'updates', 'BASE', '3', 'ga',

+              '', '20160622', 'nightly', 0),

+             ('Fedora-22-BASE-3-updates-20160622.0', 'Fedora', '22', 'ga', 'BASE', '3', 'updates',

+              '', '20160622', 'production', 0),

+             ('Fedora-22-BASE-3-updates-20160622.n.0', 'Fedora', '22', 'ga', 'BASE', '3', 'updates',

+              '', '20160622', 'nightly', 0),

+             ('Fedora-22-updates-BASE-3-updates-20160622.0', 'Fedora', '22', 'updates', 'BASE', '3',

+              'updates', '', '20160622', 'production', 0),

+             ('Fedora-22-updates-BASE-3-updates-20160622.n.0', 'Fedora', '22', 'updates', 'BASE',

+              '3', 'updates', '', '20160622', 'nightly', 0),

+             # Rawhide complex

+             ('Fedora-Rawhide-updates-RHEL-6.3.4-20160513.t.1', 'Fedora', 'Rawhide', 'updates',

+              'RHEL', '6.3.4', 'ga', '', '20160513', 'test', 1),

+             # I think this is how a RHEL 5 one looks?

+             ('Fedora-5-updates-Server-20160523.2', 'Fedora', '5', 'updates', '', '', '', 'Server',

+              '20160523', 'production', 2),

+             ('Fedora-5-updates-Client-20160523.2', 'Fedora', '5', 'updates', '', '', '', 'Client',

+              '20160523', 'production', 2),

+         ]

+     )

+     def test_parse_cid(self, cid, short, version, version_type, base_short, base_version,

+                        base_type, variant, date, compose_type, respin):

          """Check parse_cid works correctly, with various known compose

          ID forms.

          """

+         assert fedfind.helpers.parse_cid(cid, dic=True) == {

+             'short': short,

+             'version': version,

+             'version_type': version_type,

+             'base_short': base_short,

+             'base_version': base_version,

+             'base_type': base_type,

+             'variant': variant,

+             'date': date,

+             'compose_type': compose_type,

+             'respin': respin

+         }

+ 

+     def test_parse_cid_tuple(self):

+         """Check the two older tuple formats for parse_cid work as

+         expected.

+         """

          res = fedfind.helpers.parse_cid('Fedora-24-20160314.n.0')

          assert res == ('24', '20160314', 'nightly', 0)

          res = fedfind.helpers.parse_cid('Fedora-24-20160314.n.0', dist=True)

          assert res == ('Fedora', '24', '20160314', 'nightly', 0)

-         res = fedfind.helpers.parse_cid('Fedora-24-20160314.0')

-         assert res == ('24', '20160314', 'production', 0)

-         res = fedfind.helpers.parse_cid('Fedora-24-20160314.t.0')

-         assert res == ('24', '20160314', 'test', 0)

-         res = fedfind.helpers.parse_cid('Fedora-Rawhide-20160314.n.1')

-         assert res == ('rawhide', '20160314', 'nightly', 1)

-         # Two-week Atomic uses a different product name

-         res = fedfind.helpers.parse_cid('Fedora-Atomic-24-20160628.0')

-         assert res == ('24', '20160628', 'production', 0)

          res = fedfind.helpers.parse_cid('Fedora-Atomic-24-20160628.0', dist=True)

          assert res == ('Fedora-Atomic', '24', '20160628', 'production', 0)

-         # should raise ValueError for non-Pungi 4-ish CID

+ 

+     def test_parse_cid_error(self):

+         """Check parse_cid raises ValueError for non-Pungi 4-ish CID."""

          with pytest.raises(ValueError):

              res = fedfind.helpers.parse_cid('23-20160530')

-         # should handle our own fake compose IDs, though

-         res = fedfind.helpers.parse_cid('Fedora-20-19700101.0', dist=True)

-         assert res == ('Fedora', '20', '19700101', 'production', 0)

-         res = fedfind.helpers.parse_cid('FedoraRespin-24-20161206.0', dist=True)

-         assert res == ('FedoraRespin', '24', '20161206', 'production', 0)

  

      @mock.patch('fedfind.helpers.download_json', return_value=PDC_JSON_REAL, autospec=True)

      def test_cid_from_label(self, fakejson):

file modified
+6 -3
@@ -601,10 +601,13 @@ 

          # same release

          got2 = fedfind.release.get_release(cid=got.cid)

          assert got.version == got2.version

+         # getting a release with the old form of fake compose ID also

+         got3 = fedfind.release.get_release(cid='FedoraRespin-24-20161120.0')

+         assert got.version == got3.version

          # getting a release from the URL should get the same release

-         got3 = fedfind.release.get_release(

+         got4 = fedfind.release.get_release(

              url='https://dl.fedoraproject.org/pub/alt/live-respins/')

-         assert got3.version == got2.version

+         assert got4.version == got3.version

          # trying to get RespinRelease with a non-matching release

          # or compose should raise an error

          with pytest.raises(ValueError):
@@ -614,7 +617,7 @@ 

          with pytest.raises(ValueError):

              fedfind.release.get_release('25', 'Respin', '20170305')

          with pytest.raises(ValueError):

-             fedfind.release.get_release(cid='FedoraRespin-25-20170305.0')

+             fedfind.release.get_release(cid='FedoraRespin-25-updates-20170305.0')

  

      @pytest.mark.parametrize(

          ("release", "milestone", "compose", "respin", "cid"),

What we had was relying a lot on things that happen to be true
of Fedora compose IDs, but aren't part of the rules for compose
IDs in general at all. This should come a lot closer to being
a 'generic' compose ID parser that can handle almost anything
that conforms to the rules productmd enforces. It should handle
layered products, type suffixes, and RHEL-ish versions.

Signed-off-by: Adam Williamson awilliam@redhat.com

rebased

7 years ago

rebased

7 years ago

rebased

7 years ago

rebased

7 years ago

rebased

7 years ago

This got merged at some point, though not via this PR.

Pull-Request has been closed by adamwill

6 years ago