#1464 API for reserving NVRs for content generators
Merged 2 years ago by mikem. Opened 2 years ago by tkopecek.
tkopecek/koji issue1463  into  master

file modified
+4 -1
@@ -1290,6 +1290,7 @@ 

                        help=_("Do not display progress of the upload"))

      parser.add_option("--link", action="store_true", help=_("Attempt to hardlink instead of uploading"))

      parser.add_option("--test", action="store_true", help=_("Don't actually import"))

+     parser.add_option("--token", action="store", default=None, help=_("Build reservation token"))

      (options, args) = parser.parse_args(args)

      if len(args) < 2:

          parser.error(_("Please specify metadata files directory"))
@@ -1334,7 +1335,7 @@ 

              if callback:

                  print('')

  

-     session.CGImport(metadata, serverdir)

+     session.CGImport(metadata, serverdir, options.token)

  

  

  def handle_import_comps(goptions, session, args):
@@ -3152,6 +3153,8 @@ 

          info['state'] = koji.BUILD_STATES[info['state']]

          print("BUILD: %(name)s-%(version)s-%(release)s [%(id)d]" % info)

          print("State: %(state)s" % info)

+         if info['state'] == 'BUILDING':

+             print("Reserved by: %(reserved_by_name)s" % info)

          print("Built by: %(owner_name)s" % info)

          source = info.get('source')

          if source is not None:

@@ -15,4 +15,14 @@ 

  -- add better index for sessions

  CREATE INDEX sessions_expired ON sessions(expired);

  

+ -- table for content generator build reservations

+ CREATE TABLE build_reservations (

+ 	build_id INTEGER NOT NULL REFERENCES build(id),

+ 	cg_id INTEGER NOT NULL REFERENCES content_generator(id),

+ 	token VARCHAR(64),

+         created TIMESTAMP NOT NULL,

+ 	PRIMARY KEY (build_id)

+ ) WITHOUT OIDS;

+ CREATE INDEX build_reservations_created ON build_reservations(created);

+ 

  COMMIT;

file modified
+8
@@ -507,6 +507,14 @@ 

  	UNIQUE (cg_id, user_id, active)

  ) WITHOUT OIDS;

  

+ CREATE TABLE build_reservations (

+ 	build_id INTEGER NOT NULL REFERENCES build(id),

+ 	cg_id INTEGER NOT NULL REFERENCES content_generator(id),

+ 	token VARCHAR(64),

+         created TIMESTAMP NOT NULL,

+ 	PRIMARY KEY (build_id)

+ ) WITHOUT OIDS;

+ CREATE INDEX build_reservations_created ON build_reservations(created);

  

  -- here we track the buildroots on the machines

  CREATE TABLE buildroot (

@@ -44,6 +44,7 @@ 

     epoch.

  -  owner: The owner of the build task in username format. This field

     is optional.

+ -  build_id: Reserved build ID. This field is optional.

  -  extra: A map of extra metadata associated with the build, which

     must include one of:

  

@@ -114,3 +114,30 @@ 

  Metadata will be provided by the Content Generator as a JSON file. There

  is a proposal of the :doc:`Content Generator

  Metadata <content_generator_metadata>` format available for review.

+ 

+ API

+ ===

+ 

+ Relevant API calls for Content Generator are:

+ 

+ - ``CGImport(metadata, directory, token=None)``: This is basic integration point

+   of Content Generator with koji. It is supplied with metadata as json encoded

+   string or dict or filename of metadata described in previous chapter and

+   directory with all uploaded content referenced from metadata. These files

+   needs to be uploaded before ``CGImport`` is called.

+ 

+   Optionally, ``token`` can be specified in case, that build ID reservation was

+   done before.

+ 

+ - ``CGInitBuild(cg, data)``: It can be helpful in many cases to reserve NVR for

+   future build before Content Generator evend starts building.  Especially, if

+   there is some CI or other workflow competing for same NVRs.  This call creates

+   special ``token`` which can be used to claim specific build (ID + NVR). Such

+   claimed build will be displayed as BUILDING and can be used by ``CGImport``

+   call later.

+ 

+   As an input are here Content Generator name and `data` which is basically

+   dictionary with name/version/release/epoch keys. Call will return a dict

+   containing ``token`` and ``build_id``. ``token`` would be used in subsequent

+   call of ``CGImport`` while ``build_id`` needs to be part of metadata (as item

+   in ``build`` key).

file modified
+151 -31
@@ -44,6 +44,11 @@ 

  import traceback

  import six.moves.xmlrpc_client

  import zipfile

+ try:

+     # py 3.6+

+     import secrets

+ except ImportError:

+     import random

  

  import rpm

  import six
@@ -3703,6 +3708,8 @@ 

        completion_ts: time the build was completed (epoch, may be null)

        source: the SCM URL of the sources used in the build

        extra: dictionary with extra data about the build

+       reserved_id: ID of CG which reserved this build (only in BUILDING state)

+       reserved_name: name of CG which reserved this build (only in BUILDING state)

  

      If there is no build matching the buildInfo given, and strict is specified,

      raise an error.  Otherwise return None.
@@ -3746,6 +3753,13 @@ 

          else:

              return None

      else:

+         result['reserved_by'] = None

+         if result['state'] == koji.BUILD_STATES['BUILDING']:

+             token = get_reservation_token(result['id'])

+             if token:

+                 cg = lookup_name('content_generator', token['cg_id'], strict=True)

+                 result['reserved_by_id'] = cg['id']

+                 result['reserved_by_name'] = cg['name']

          return result

  

  
@@ -5182,8 +5196,11 @@ 

      _set_build_volume(build, volume, strict=True)

  

  

- def new_build(data):

-     """insert a new build entry"""

+ def new_build(data, strict=False):

+     """insert a new build entry

+ 

+     If strict is specified, raise an exception, if build already exists.

+     """

  

      data = data.copy()

  
@@ -5222,10 +5239,11 @@ 

      #check for existing build

      old_binfo = get_build(data)

      if old_binfo:

+         if strict:

+             raise koji.GenericError('Existing build found: %s' % data)

          recycle_build(old_binfo, data)

          # Raises exception if there is a problem

          return old_binfo['id']

-     #else

      koji.plugin.run_callbacks('preBuildStateChange', attribute='state', old=None, new=data['state'], info=data)

  

      #insert the new data
@@ -5534,7 +5552,51 @@ 

      return rpminfo

  

  

- def cg_import(metadata, directory):

+ def generate_token(nbytes=32):

+     """

+     Generate random hex-string token of length 2 * nbytes

+     """

+     if secrets:

+         return secrets.token_hex(nbytes=nbytes)

+     else:

+         values = ['%02x' % random.randint(0, 256) for x in range(nbytes)]

+         return ''.join(values)

+ 

+ 

+ def get_reservation_token(build_id):

+     query = QueryProcessor(

+         tables=['build_reservations'],

+         columns=['build_id', 'cg_id', 'token'],

+         clauses=['build_id = %(build_id)d'],

+         values=locals(),

+     )

+     return query.executeOne()

+ 

+ 

+ def cg_init_build(cg, data):

+     """Create (reserve) a build_id for given data.

+ 

+     If build already exists, init_build will raise GenericError

+     """

+     assert_cg(cg)

+     data['owner'] = context.session.user_id

+     data['state'] = koji.BUILD_STATES['BUILDING']

+     data['completion_time'] = None

+     build_id = new_build(data, strict=True)

+     # store token

+     token = generate_token()

+     cg_id = lookup_name('content_generator', cg, strict=True)['id']

+     insert = InsertProcessor(table='build_reservations')

+     insert.set(build_id=build_id,

+                cg_id=cg_id,

+                token=token)

+     insert.rawset(created='NOW()')

+     insert.execute()

+ 

+     return {'build_id': build_id, 'token': token}

+ 

+ 

+ def cg_import(metadata, directory, token=None):

      """Import build from a content generator

  

      metadata can be one of the following
@@ -5544,7 +5606,7 @@ 

      """

  

      importer = CG_Importer()

-     return importer.do_import(metadata, directory)

+     return importer.do_import(metadata, directory, token)

  

  

  class CG_Importer(object):
@@ -5553,8 +5615,7 @@ 

          self.buildinfo = None

          self.metadata_only = False

  

-     def do_import(self, metadata, directory):

- 

+     def do_import(self, metadata, directory, token=None):

          metadata = self.get_metadata(metadata, directory)

          self.directory = directory

  
@@ -5567,7 +5628,7 @@ 

          self.assert_cg_access()

  

          # prepare data for import

-         self.prep_build()

+         self.prep_build(token)

          self.prep_brs()

          self.prep_outputs()

  
@@ -5579,7 +5640,7 @@ 

                  directory=directory)

  

          # finalize import

-         self.get_build()

+         self.get_build(token)

          self.import_brs()

          try:

              self.import_outputs()
@@ -5677,26 +5738,47 @@ 

                  raise koji.GenericError("Destination directory already exists: %s" % path)

  

  

-     def prep_build(self):

+ 

+     def prep_build(self, token=None):

          metadata = self.metadata

-         buildinfo = get_build(metadata['build'], strict=False)

-         if buildinfo:

-             # TODO : allow in some cases

-             raise koji.GenericError("Build already exists: %r" % buildinfo)

+         if metadata['build'].get('build_id'):

+             if len(self.cgs) != 1:

+                 raise koji.GenericError("Reserved builds can handle only single content generator.")

+             cg_id = list(self.cgs)[0]

+             build_id = metadata['build']['build_id']

+             buildinfo = get_build(build_id, strict=True)

+             build_token = get_reservation_token(build_id)

+             if not build_token or build_token['token'] != token:

+                 raise koji.GenericError("Token doesn't match build ID %s" % build_id)

+             if build_token['cg_id'] != cg_id:

+                 raise koji.GenericError('Build ID %s is not reserved by this CG' % build_id)

+             if buildinfo['state'] != koji.BUILD_STATES['BUILDING']:

+                 raise koji.GenericError('Build ID %s is not in BUILDING state' % build_id)

+             if buildinfo['name'] != metadata['build']['name'] or \

+                buildinfo['version'] != metadata['build']['version'] or \

+                buildinfo['release'] != metadata['build']['release'] or \

+                buildinfo['epoch'] != metadata['build']['epoch']:

+                 raise koji.GenericError("Build (%i) NVR is different" % build_id)

          else:

-             # gather needed data

-             buildinfo = dslice(metadata['build'], ['name', 'version', 'release', 'extra', 'source'])

-             # epoch is not in the metadata spec, but we allow it to be specified

-             buildinfo['epoch'] = metadata['build'].get('epoch', None)

-             buildinfo['start_time'] = \

-                 datetime.datetime.fromtimestamp(float(metadata['build']['start_time'])).isoformat(' ')

-             buildinfo['completion_time'] = \

-                 datetime.datetime.fromtimestamp(float(metadata['build']['end_time'])).isoformat(' ')

-             owner = metadata['build'].get('owner', None)

-             if owner:

-                 if not isinstance(owner, six.string_types):

-                     raise koji.GenericError("Invalid owner format (expected username): %s" % owner)

-                 buildinfo['owner'] = get_user(owner, strict=True)['id']

+             buildinfo = get_build(metadata['build'], strict=False)

+             if buildinfo and not metadata['build'].get('build_id'):

+                 # TODO : allow in some cases

+                 raise koji.GenericError("Build already exists: %r" % buildinfo)

+         # gather needed data

+         buildinfo = dslice(metadata['build'], ['name', 'version', 'release', 'extra', 'source'])

+         if 'build_id' in metadata['build']:

+             buildinfo['build_id'] = metadata['build']['build_id']

+         # epoch is not in the metadata spec, but we allow it to be specified

+         buildinfo['epoch'] = metadata['build'].get('epoch', None)

+         buildinfo['start_time'] = \

+             datetime.datetime.fromtimestamp(float(metadata['build']['start_time'])).isoformat(' ')

+         buildinfo['completion_time'] = \

+             datetime.datetime.fromtimestamp(float(metadata['build']['end_time'])).isoformat(' ')

+         owner = metadata['build'].get('owner', None)

+         if owner:

+             if not isinstance(owner, six.string_types):

+                 raise koji.GenericError("Invalid owner format (expected username): %s" % owner)

+             buildinfo['owner'] = get_user(owner, strict=True)['id']

          self.buildinfo = buildinfo

  

          koji.check_NVR(buildinfo, strict=True)
@@ -5722,10 +5804,25 @@ 

          return buildinfo

  

  

-     def get_build(self):

-         build_id = new_build(self.buildinfo)

-         buildinfo = get_build(build_id, strict=True)

- 

+     def get_build(self, token=None):

+         try:

+             binfo = dslice(self.buildinfo, ('name', 'version', 'release'))

+             buildinfo = get_build(binfo, strict=True)

+             build_token = get_reservation_token(buildinfo['build_id'])

+             if len(self.cgs) != 1:

+                 raise koji.GenericError("Reserved builds can handle only single content generator.")

+             cg_id = list(self.cgs)[0]

+             if buildinfo.get('task_id') or \

+                buildinfo['state'] != koji.BUILD_STATES['BUILDING'] or \

+                not build_token or \

+                build_token['cg_id'] != cg_id or \

+                build_token['token'] != token:

+                 raise koji.GenericError("Build is not reserved")

+             buildinfo['extra'] = self.buildinfo['extra']

+             build_id = buildinfo['build_id']

+         except Exception:

+             build_id = new_build(self.buildinfo)

+             buildinfo = get_build(build_id, strict=True)

          # handle special build types

          for btype in self.typeinfo:

              tinfo = self.typeinfo[btype]
@@ -5745,6 +5842,23 @@ 

              if [o for o in self.prepped_outputs if o['type'] == 'rpm']:

                  new_typed_build(buildinfo, 'rpm')

  

+         # update build state

+         if buildinfo.get('extra'):

+             extra = json.dumps(buildinfo['extra'])

+         else:

+             extra = None

+         owner = get_user(self.buildinfo['owner'], strict=True)['id']

+         source = self.buildinfo.get('source')

+         st_complete = koji.BUILD_STATES['COMPLETE']

+         st_old = buildinfo['state']

+         koji.plugin.run_callbacks('preBuildStateChange', attribute='state', old=st_old, new=st_complete, info=buildinfo)

+         update = UpdateProcessor('build', clauses=['id=%(id)s'], values=buildinfo)

+         update.set(state=st_complete, extra=extra, owner=owner, source=source)

+         update.rawset(completion_time='NOW()')

+         update.execute()

+         buildinfo = get_build(build_id, strict=True)

+         koji.plugin.run_callbacks('postBuildStateChange', attribute='state', old=st_old, new=st_complete, info=buildinfo)

+ 

          self.buildinfo = buildinfo

          return buildinfo

  
@@ -7504,6 +7618,11 @@ 

          build_notification(task_id, build_id)

          if cancel_task:

              Task(task_id).cancelFull(strict=False)

+ 

+     # remove possible CG reservations

+     delete = "DELETE FROM build_reservations WHERE build_id = %(build_id)i"

+     _dml(delete, {'build_id': build_id})

+ 

      build = get_build(build_id, strict=True)

      koji.plugin.run_callbacks('postBuildStateChange', attribute='state', old=st_old, new=st_canceled, info=build)

      return True
@@ -9481,6 +9600,7 @@ 

          fullpath = '%s/%s' % (koji.pathinfo.work(), filepath)

          import_archive(fullpath, buildinfo, type, typeInfo)

  

+     CGInitBuild = staticmethod(cg_init_build)

      CGImport = staticmethod(cg_import)

  

      untaggedBuilds = staticmethod(untagged_builds)

@@ -95,7 +95,7 @@ 

          self.assert_console_message(stdout, expected)

          linked_upload_mock.assert_not_called()

          session.uploadWrapper.assert_has_calls(calls)

-         session.CGImport.assert_called_with(metadata, fake_srv_path)

+         session.CGImport.assert_called_with(metadata, fake_srv_path, None)

  

          # Case 2, running in fg, progress off

          with mock.patch(utils.get_builtin_open()):
@@ -105,7 +105,7 @@ 

          self.assert_console_message(stdout, expected)

          linked_upload_mock.assert_not_called()

          session.uploadWrapper.assert_has_calls(calls)

-         session.CGImport.assert_called_with(metadata, fake_srv_path)

+         session.CGImport.assert_called_with(metadata, fake_srv_path, None)

  

          # reset mocks

          linked_upload_mock.reset_mock()
@@ -129,7 +129,7 @@ 

  

          linked_upload_mock.assert_has_calls(calls)

          session.uploadWrapper.assert_not_called()

-         session.CGImport.assert_called_with(metadata, fake_srv_path)

+         session.CGImport.assert_called_with(metadata, fake_srv_path, None)

  

          # make sure there is no message on output

          self.assert_console_message(stdout, '')
@@ -213,10 +213,11 @@ 

  (Specify the --help global option for a list of other help options)

  

  Options:

-   -h, --help    show this help message and exit

-   --noprogress  Do not display progress of the upload

-   --link        Attempt to hardlink instead of uploading

-   --test        Don't actually import

+   -h, --help     show this help message and exit

+   --noprogress   Do not display progress of the upload

+   --link         Attempt to hardlink instead of uploading

+   --test         Don't actually import

+   --token=TOKEN  Build reservation token

  """ % self.progname)

  

  

@@ -82,6 +82,11 @@ 

        <th>Completed</th><td>$util.formatTimeLong($build.completion_time)</td>

      </tr>

      #end if

+     #if $build.reserved_by_name

+     <tr>

+       <th>Reserved by</th><td>$build.reserved_by_name</td>

+     </tr>

+     #end if

      #if $task

      <tr>

        <th>Task</th><td><a href="taskinfo?taskID=$task.id" class="task$util.taskState($task.state)">$koji.taskLabel($task)</a></td>

If a CG calls init_build(), then later calls init_build() with the same build data, will this cause the second call to fail? (That's my hoped-for behaviour...)

If a CG calls init_build(), then later calls init_build() with the same build data, will this cause the second call to fail? (That's my hoped-for behaviour...)

Not now - it will return same build id as previous call. Anyway, I can add it.

1 new commit added

  • raise an error on repeated call cgInitBuild for same nvr
2 years ago

I need to look at this more, but I have concerns.

The sanity check is weakened in recycle_build. While there are new sanity checks added elsewhere, but it still worries me.

The handling of strict in new_build seems backwards.

I'm not sure about using build.extra for the reservation data. Maybe it's ok, but it feels off.

Does that init_build() method belong in CG_Importer? It doesn't use self.

rebased onto 55a7fc70be96a96cb6461eda4ea82e110c81e872

2 years ago

2 new commits added

  • remove debug print
  • leave recycle_build untouched
2 years ago

I've rewritten a part of it to not use recycle_build at all, fixed strict issue and put init_build into cg_init_build completely.

I'm not sure about using build.extra. Would it be cleaner to introduce new build state aka RESERVED?

Would it be cleaner to introduce new build state aka RESERVED?

It's not so much an extra state. The BUILDING state already pretty much means this.

The main thing here is ensuring that the entity that does the final import matches the entity that performed the reservation. With the old way, we had:

  • a dedicated field in the build table (task_id) to indicate the entity
  • a pretty good way to validate the entity (check host, check that host has that task id open)
  • strong constraints ensuring that only one host can have a given task open
  • an additional state progression sanity check

So here, we need some way to record and validate the entity. This is tricky since all we really have is the content generator info, and we don't know how the CG may be structured (e.g. could the CG have parallel processes performing builds and talking to Koji about it?).

The build.extra field is there and can hold any kind of data, so it's certainly possible to put some reservation data there. It just feels a little off to me. This data is not about the build result, it's technical process stuff.

If we don't use build.extra, I guess we're talking a schema change. Either a new field for build or a new table.

Apart from that, we don't have as strong an entity identifier to work with. A CG id is much less specific than a task id. Should we allow the CG to specify some sort of reservation key value? Or just return one to them? Is that too paranoid?

Upside down: Reservation token seems right to me. I was also wondering, how I should enforce, that same entity which reserved value is using it later. I would create random value to store with build and return to CG. I wouldn't allow CG to set up its own - it could lead to crappy implementation with fixed tokens or predictable ones. It is better for us to be handling it.

In such case, it could be valuable to store this token in extra data forever for audit reasons. Maybe, there is no real use case to track it after build was imported. But what I can imagine is that CG is able to prove, that it was this source CG which imported final data (than just showing build ID).

In such light, separate db field makes sense {'reserved_by_cg: True, 'token': 'xyz'}. On the other hand, it is a field which will not be used in most of the cases. We still can have it as a subfield of build.extra. For separation there could be something like similar json-field build.reservation (or somewhat more generic name for technical/process data) or only INT/VARCHAR build.reservation_token (I'm leaning to this one, we can swap it later for something more generic if such situation appears).

I think a separate varchar field makes the most sense.

If this value will be used to prove the CG has the reservation, then the api should not report it through the normal calls (e.g. getBuild). This makes me wonder if it shouldn't be in a separate table.

We probably also want to record which cg did the reserving. That info would be helpful to display normally.

rebased onto 93eb668a7c06b9605d7200e615bff21bd5baa2c5

2 years ago

1 new commit added

  • migration for build_reservations
2 years ago

I've added tokens via separated table.

  • Where would you think, CG should be displayed?
  • I'm not happy with putting token to CG metadata['build'] from same reason as abusing build.extra. Do you think, that it is ok, or should I move it to separate key (metadata['reservation']) or move completely out of metadata and just change CGImport(metadata, directory, token=None) signature?
CREATE TABLE build_reservations (
       build_id INTEGER NOT NULL REFERENCES build(id),
       user_id INTEGER NOT NULL REFERENCES users(id),
       token VARCHAR(64),
       PRIMARY KEY (build_id)

I wonder if user_id is the right value to include here. Should it be cg_id instead?

It's possible (or at least allowed) for multiple users to act as the same CG. If we were in the future to have kojid use the CG interface to import builds as a "koji" CG, then the user making the reservation could be different from the user performing the final import.

I'm not happy with putting token to CG metadata['build'] from same reason as abusing build.extra.

I agree. Seems like the wrong place

Do you think, that it is ok, or should I move it to separate key (metadata['reservation']) or move completely out of metadata and just change CGImport(metadata, directory, token=None) signature?

To me the metadata seems like the wrong place entirely, given the transient nature of the token. Adding a new arg seems a little wonky too, but I think I might prefer this route. I'm open to argument though. Perhaps I am missing something.

Where would you think, CG should be displayed?

In the buildinfo display, but only when the build is in the BUILDING state. Granted, we should be removing the reservation entry when the build leaves this state.

This makes me ask the question though: should the cg_id be in the build table instead of the reservation table? This is maybe a can of worms since technically the build metadata can span multiple CGs (a fact I've never liked). OR, could we just leave cg_id out and rely purely on the token for the reservation?

'binascii' imported but unused

1 new commit added

  • move token from metadata to api option
2 years ago
  • I've moved token from metadata to an option.
  • display - When build is in BUILDING state, owner is cg, only after import it is rewritten to true owner. That was my idea, that we've stored CG in table to know, which cg imported it and which token it was using. So, that was the reason to not delete it. On the other hand, not sure, if it would be ever used.

If cg_id is in reservation table or build table doesn't make much difference in case of more CGs. Reservation table also allows only one CG.

If we don't want to store token forever, I would move cg_id to build table.

5 new commits added

  • fix tests for CLI
  • extended getBuild to return reservations
  • delete tokens on cancelBuild
  • restrict to cg_id
  • remove debug
2 years ago

1 new commit added

  • CLI shows reservation
2 years ago

Changes according to discussion yesterday. I did one more thing - extended getBuild to return cg info for builds in BUILDING state to be able to show it in web ui/CLI

Metadata Update from @tkopecek:
- Pull-request tagged with: testing-ready

2 years ago

1 new commit added

  • docs for CG reservation API
2 years ago

1 new commit added

  • unify reserved_id/name usage
2 years ago

rebased onto d833400

2 years ago

Possible additional commit with moving cg_id from build_reservations table to build table.
https://pagure.io/fork/tkopecek/koji/c/2b02cb229e09c3edc18e0ce2c68189a2fd03c005?branch=issue1463a

Possible additional commit

Looks ok overall.

There's a stray debug print statement in left in the hub code

With the cg_id out of the reservation table, all that data is transient. I think we can clear it when when the import finalizes.

https://github.com/mikem23/koji-playground/commits/issue1463a

Metadata Update from @mfilip:
- Pull-request tagged with: testing-done

2 years ago

Commit bf39468 fixes this pull-request

Pull-Request has been merged by mikem

2 years ago

Merged with a couple fixes related to epoch

@mikem - It seems, that it is not merged properly - I can't see change in buildinfo.chtml in current master.