#571 Support large ints over xmlrpc using i8 tag
Merged 6 years ago by mikem. Opened 6 years ago by mikem.
mikem/koji xmlrpc-large-int  into  master

file modified
+3 -3
@@ -7,7 +7,6 @@ 

  import os.path

  import pprint

  import sys

- import six.moves.xmlrpc_client

  from six.moves import cStringIO

  from six.moves.urllib.parse import quote

  
@@ -15,6 +14,7 @@ 

  sys.path.insert(1, os.path.join(os.getcwd(), 'hub'))

  import koji

  import kojixmlrpc

+ import koji.xmlrpcplus

  

  

  def get_url(environ):
@@ -58,7 +58,7 @@ 

          else:

              args.append(nice_literal(s))

      args = koji.encode_args(*args, **kwargs)

-     request = six.moves.xmlrpc_client.dumps(args, method, allow_none=1)

+     request = koji.xmlrpcplus.dumps(args, method, allow_none=1)

      return request

  

  
@@ -68,7 +68,7 @@ 

  

  

  def parse_response(data):

-     p, u = six.moves.xmlrpc_client.getparser()

+     p, u = koji.xmlrpcplus.getparser()

      for chunk in data:

          p.feed(chunk)

      p.close()

@@ -588,3 +588,15 @@ 

  API. You can get listing of all available calls, arguments and basic help via

  calling `koji list-api` command. This call will also provide you API

  extensions provided by plugins in that particular koji instance.

+ 

+ Because of the data Koji routinely deals with, we use the following extensions

+ to the xmlrpc standard:

+ 

+     * We use the ``nil`` extension to represent null values (e.g. None in

+       Python). Koji's library handles this automatically. If you are using a

+       different library, you may need to explicitly enable this (e.g. enabling

+       allow_none in Python's own xmlrpc library).

+     * We represent large integers with the ``i8`` tag. This standard is borrowed

+       from Apache's `ws-xmlrpc <https://ws.apache.org/xmlrpc/types.html>`

+       implementation. Python's own xmlrpc library understands this tag, even

+       thought it will not emit it.

file modified
+3 -2
@@ -55,6 +55,8 @@ 

  import types

  import xmlrpclib

  import zipfile

+ 

+ import koji.xmlrpcplus

  from koji.context import context

  

  try:
@@ -559,8 +561,7 @@ 

              raise koji.GenericError("invalid channel policy")

  

      # encode xmlrpc request

-     opts['request'] = xmlrpclib.dumps(tuple(arglist), methodname=method,

-                                       allow_none=1)

+     opts['request'] = koji.xmlrpcplus.dumps(tuple(arglist), methodname=method)

      opts['state'] = koji.TASK_STATES['FREE']

      opts['method'] = method

      koji.plugin.run_callbacks('preTaskStateChange', attribute='state', old=None, new='FREE', info=opts)

file modified
+7 -19
@@ -26,11 +26,8 @@ 

  import sys

  import time

  import traceback

- import types

  import pprint

  import resource

- import xmlrpclib

- from xmlrpclib import getparser, dumps, Fault

  

  import koji

  import koji.auth
@@ -38,21 +35,14 @@ 

  import koji.plugin

  import koji.policy

  import koji.util

+ # import xmlrpclib functions from koji to use tweaked Marshaller

+ from koji.xmlrpcplus import getparser, dumps, Fault, ExtendedMarshaller

  from koji.context import context

  

  

- # Workaround to allow xmlrpclib deal with iterators

- class Marshaller(xmlrpclib.Marshaller):

+ class Marshaller(ExtendedMarshaller):

  

-     dispatch = xmlrpclib.Marshaller.dispatch.copy()

- 

-     def dump_generator(self, value, write):

-         dump = self.__dump

-         write("<value><array><data>\n")

-         for v in value:

-             dump(v, write)

-         write("</data></array></value>\n")

-     dispatch[types.GeneratorType] = dump_generator

+     dispatch = ExtendedMarshaller.dispatch.copy()

  

      def dump_datetime(self, value, write):

          # For backwards compatibility, we return datetime objects as strings
@@ -60,8 +50,6 @@ 

          self.dump_string(value, write)

      dispatch[datetime.datetime] = dump_datetime

  

- xmlrpclib.Marshaller = Marshaller

- 

  

  class HandlerRegistry(object):

      """Track handlers for RPC calls"""
@@ -240,10 +228,10 @@ 

              response = handler(environ)

              # wrap response in a singleton tuple

              response = (response,)

-             response = dumps(response, methodresponse=1, allow_none=1)

+             response = dumps(response, methodresponse=1, marshaller=Marshaller)

          except Fault, fault:

              self.traceback = True

-             response = dumps(fault)

+             response = dumps(fault, marshaller=Marshaller)

          except:

              self.traceback = True

              # report exception back to server
@@ -267,7 +255,7 @@ 

                  else:

                      faultString = "%s: %s" % (e_class, e)

              self.logger.warning(tb_str)

-             response = dumps(Fault(faultCode, faultString))

+             response = dumps(Fault(faultCode, faultString), marshaller=Marshaller)

  

          return response

  

file modified
+1 -1
@@ -2,7 +2,7 @@ 

  PACKAGE = $(shell basename `pwd`)

  ifeq ($(PYTHON), python3)

      # for python3 we fully support only basic library + CLI

-     PYFILES = __init__.py util.py plugin.py

+     PYFILES = __init__.py util.py plugin.py xmlrpcplus.py

      PYSCRIPTS =

      SUBDIRS =

  else

file modified
+4 -4
@@ -75,11 +75,11 @@ 

  import traceback

  from . import util

  import warnings

- import six.moves.xmlrpc_client

  import xml.sax

  import xml.sax.handler

- from six.moves.xmlrpc_client import loads, dumps, Fault

  import six.moves.urllib

+ from koji.xmlrpcplus import getparser, loads, dumps, Fault

+ 

  

  PROFILE_MODULES = {}  # {module_name: module_instance}

  
@@ -2390,7 +2390,7 @@ 

          return ret

  

      def _read_xmlrpc_response(self, response):

-         p, u = six.moves.xmlrpc_client.getparser()

+         p, u = getparser()

          for chunk in response.iter_content(8192):

              if self.opts.get('debug_xmlrpc', False):

                  print("body: %r" % chunk)
@@ -2651,7 +2651,7 @@ 

              while True:

                  if debug:

                      self.logger.debug("uploadFile(%r,%r,%r,%r,%r,...)" %(path, name, sz, digest, offset))

-                 if self.callMethod('uploadFile', path, name, encode_int(sz), digest, encode_int(offset), data, **volopts):

+                 if self.callMethod('uploadFile', path, name, sz, digest, offset, data, **volopts):

                      break

                  if tries <= retries:

                      tries += 1

file modified
+6 -6
@@ -22,6 +22,7 @@ 

  

  import koji

  import koji.tasks

+ import koji.xmlrpcplus

  from koji.tasks import safe_rmtree

  from koji.util import md5_constructor, adler32_constructor, parseStatus, dslice

  import os
@@ -34,7 +35,6 @@ 

  import sys

  import traceback

  import errno

- import xmlrpclib

  

  

  def incremental_upload(session, fname, fd, path, retries=5, logger=None):
@@ -61,7 +61,7 @@ 

  

          tries = 0

          while True:

-             if session.uploadFile(path, fname, koji.encode_int(size), digest, koji.encode_int(offset), data):

+             if session.uploadFile(path, fname, size, digest, offset, data):

                  break

  

              if tries <= retries:
@@ -1209,12 +1209,12 @@ 

          try:

              response = (handler.run(),)

              # note that we wrap response in a singleton tuple

-             response = xmlrpclib.dumps(response, methodresponse=1, allow_none=1)

+             response = koji.xmlrpcplus.dumps(response, methodresponse=1, allow_none=1)

              self.logger.info("RESPONSE: %r" % response)

              self.session.host.closeTask(handler.id, response)

              return

-         except xmlrpclib.Fault, fault:

-             response = xmlrpclib.dumps(fault)

+         except koji.xmlrpcplus.Fault, fault:

+             response = koji.xmlrpcplus.dumps(fault)

              tb = ''.join(traceback.format_exception(*sys.exc_info())).replace(r"\n", "\n")

              self.logger.warn("FAULT:\n%s" % tb)

          except (SystemExit, koji.tasks.ServerExit, KeyboardInterrupt):
@@ -1233,7 +1233,7 @@ 

              if issubclass(e_class, koji.GenericError):

                  #just pass it through

                  tb = str(e)

-             response = xmlrpclib.dumps(xmlrpclib.Fault(faultCode, tb))

+             response = koji.xmlrpcplus.dumps(koji.xmlrpcplus.Fault(faultCode, tb))

  

          # if we get here, then we're handling an exception, so fail the task

          self.session.host.failTask(handler.id, response)

file added
+118
@@ -0,0 +1,118 @@ 

+ """

+ Custom xmlrpc handling for Koji

+ """

+ 

+ import six

+ import six.moves.xmlrpc_client as xmlrpc_client

+ import types

+ 

+ 

+ # duplicate a few values that we need

+ getparser = xmlrpc_client.getparser

+ loads = xmlrpc_client.loads

+ Fault = xmlrpc_client.Fault

+ 

+ 

+ class ExtendedMarshaller(xmlrpc_client.Marshaller):

+ 

+     dispatch = xmlrpc_client.Marshaller.dispatch.copy()

+ 

+     def _dump(self, value, write):

+         # Parent class is unfriendly to subclasses :-/

+         f = self.dispatch[type(value)]

+         f(self, value, write)

+ 

+     def dump_generator(self, value, write):

+         dump = self._dump

+         write("<value><array><data>\n")

+         for v in value:

+             dump(v, write)

+         write("</data></array></value>\n")

+     dispatch[types.GeneratorType] = dump_generator

+ 

+     MAXI8 = 2 ** 63 - 1

+     MINI8 = -2 ** 63

+ 

+     def dump_int(self, value, write):

+         # python2's xmlrpclib doesn't support i8 extension for marshalling,

+         # but can unmarshall it correctly.

+         if (value > self.MAXI8 or value < self.MINI8):

+             raise OverflowError("long int exceeds XML-RPC limits")

+         elif (value > xmlrpc_client.MAXINT or

+                 value < xmlrpc_client.MININT):

+             write("<value><i8>")

+             write(str(int(value)))

+             write("</i8></value>\n")

+         else:

+             write("<value><int>")

+             write(str(int(value)))

+             write("</int></value>\n")

+     dispatch[int] = dump_int

+ 

+     # we always want to allow None

+     def dump_nil(self, value, write):

+         write("<value><nil/></value>")

+     dispatch[type(None)] = dump_nil

+ 

+ 

+ if six.PY2:

+     ExtendedMarshaller.dispatch[long] = ExtendedMarshaller.dump_int

+ 

+ 

+ 

+ def dumps(params, methodname=None, methodresponse=None, encoding=None,

+           allow_none=1, marshaller=None):

+     """encode an xmlrpc request or response

+ 

+     Differences from the xmlrpclib version:

+         - allow_none is on by default

+         - uses our ExtendedMarshaller by default

+         - option to specify marshaller

+     """

+ 

+     if isinstance(params, Fault):

+         methodresponse = 1

+     elif not isinstance(params, tuple):

+         raise TypeError('params must be a tuple of Fault instance')

+     elif methodresponse and len(params) != 1:

+         raise ValueError('response tuple must be a singleton')

+ 

+     if not encoding:

+         encoding = "utf-8"

+ 

+     if marshaller is not None:

+         m = marshaller(encoding)

+     else:

+         m = ExtendedMarshaller(encoding)

+ 

+     data = m.dumps(params)

+ 

+     if encoding != "utf-8":

+         xmlheader = "<?xml version='1.0' encoding='%s'?>\n" % str(encoding)

+     else:

+         xmlheader = "<?xml version='1.0'?>\n"  # utf-8 is default

+ 

+     # standard XML-RPC wrappings

+     if methodname:

+         # a method call

+         if six.PY2 and isinstance(methodname, six.text_type):

+             # Do we need this?

+             methodname = methodname.encode(encoding, 'xmlcharrefreplace')

+         parts = (

+             xmlheader,

+             "<methodCall>\n"

+             "<methodName>", methodname, "</methodName>\n",

+             data,

+             "</methodCall>\n"

+             )

+     elif methodresponse:

+         # a method response, or a fault structure

+         parts = (

+             xmlheader,

+             "<methodResponse>\n",

+             data,

+             "</methodResponse>\n"

+             )

+     else:

+         return data  # return as is

+     return ''.join(parts)

@@ -0,0 +1,178 @@ 

+ # coding=utf-8

+ import unittest

+ 

+ from six.moves import xmlrpc_client

+ from koji import xmlrpcplus

+ 

+ 

+ class TestDump(unittest.TestCase):

+ 

+     standard_data = [

+             "Hello World",

+             5,

+             5.5,

+             None,

+             True,

+             False,

+             u'Hævē s°mə ŭnıčođė',

+             [1],

+             {"a": 1},

+             ["fnord"],

+             {"a": ["b", 1, 2, None], "b": {"c": 1}},

+         ]

+ 

+     def test_call(self):

+         method = 'my_rpc_method'

+         for value in self.standard_data:

+             value = (value, "other arg")

+             enc = xmlrpcplus.dumps(value, methodname=method)

+             _enc = xmlrpc_client.dumps(value, methodname=method, allow_none=1)

+             self.assertEqual(enc, _enc)

+             params, method = xmlrpc_client.loads(enc)

+             self.assertEqual(params, value)

+             self.assertEqual(method, method)

+ 

+     def test_response(self):

+         for value in self.standard_data:

+             value = (value,)

+             enc = xmlrpcplus.dumps(value, methodresponse=1)

+             _enc = xmlrpc_client.dumps(value, methodresponse=1, allow_none=1)

+             self.assertEqual(enc, _enc)

+             params, method = xmlrpc_client.loads(enc)

+             self.assertEqual(params, value)

+             self.assertEqual(method, None)

+ 

+     def test_just_data(self):

+         # xmlrpc_client supports this case, so I guess we should too

+         # neither method call nor response

+         for value in self.standard_data:

+             value = (value, "foo", "bar")

+             enc = xmlrpcplus.dumps(value)

+             _enc = xmlrpc_client.dumps(value, allow_none=1)

+             self.assertEqual(enc, _enc)

+             params, method = xmlrpc_client.loads(enc)

+             self.assertEqual(params, value)

+             self.assertEqual(method, None)

+ 

+     def gendata(self):

+         for value in self.standard_data:

+             yield value

+ 

+     def test_generator(self):

+         value = (self.gendata(),)

+         enc = xmlrpcplus.dumps(value, methodresponse=1)

+         params, method = xmlrpc_client.loads(enc)

+         expect = (list(self.gendata()),)

+         self.assertEqual(params, expect)

+         self.assertEqual(method, None)

+ 

+     long_data = [

+             2 ** 63 - 1,

+             -(2 ** 63),

+             [2**n - 1 for n  in range(64)],

+             {"a": [2 ** 63 - 23, 5], "b": 2**63 - 42},

+             ]

+ 

+     def test_i8(self):

+         for value in self.long_data:

+             value = (value,)

+             enc = xmlrpcplus.dumps(value, methodresponse=1)

+             params, method = xmlrpc_client.loads(enc)

+             self.assertEqual(params, value)

+             self.assertEqual(method, None)

+         # and as a call

+         method = "foomethod"

+         value = tuple(self.long_data)

+         enc = xmlrpcplus.dumps(value, methodname=method)

+         params, method = xmlrpc_client.loads(enc)

+         self.assertEqual(params, value)

+         self.assertEqual(method, method)

+ 

+     def test_overflow(self):

+         value = (2**64,)

+         with self.assertRaises(OverflowError):

+             xmlrpcplus.dumps(value)

+ 

+     def test_fault(self):

+         code = 1001

+         msg = "some useless error"

+         f1 = xmlrpcplus.Fault(code, msg)

+         f2 = xmlrpc_client.Fault(code, msg)

+         value = f1

+         enc = xmlrpcplus.dumps(value, methodresponse=1)

+         _enc = xmlrpc_client.dumps(value, methodresponse=1, allow_none=1)

+         self.assertEqual(enc, _enc)

+         try:

+             params, method = xmlrpc_client.loads(enc)

+         except xmlrpc_client.Fault as e:

+             self.assertEqual(e.faultCode, code)

+             self.assertEqual(e.faultString, msg)

+         else:

+             raise Exception('Fault not raised')

+ 

+     def test_badargs(self):

+         wrong_type = ["a", 0, 0.1, [], {}, True]

+         for value in wrong_type:

+             with self.assertRaises(TypeError):

+                 xmlrpcplus.dumps(value, methodname="foo")

+         # responses much be singletons

+         value = (1, 2, 3)

+         with self.assertRaises(ValueError):

+             xmlrpcplus.dumps(value, methodresponse=1)

+ 

+     def test_marshaller(self):

+         value = 3.14159

+         value = (value,)

+         enc = xmlrpcplus.dumps(value, methodresponse=1, marshaller=MyMarshaller)

+         params, method = xmlrpc_client.loads(enc)

+         # MyMarshaller rounds off floats

+         self.assertEqual(params, (3,))

+         self.assertEqual(method, None)

+ 

+     def test_encoding(self):

+         data = [

+                 45,

+                 ["hello", "world"],

+                 {"a": 5.5, "b": [None]},

+                 ]

+         for value in data:

+             value = (value,)

+             enc = xmlrpcplus.dumps(value, methodresponse=1, encoding='us-ascii')

+             _enc = xmlrpc_client.dumps(value, methodresponse=1, allow_none=1, encoding='us-ascii')

+             self.assertEqual(enc, _enc)

+             params, method = xmlrpc_client.loads(enc)

+             self.assertEqual(params, value)

+             self.assertEqual(method, None)

+ 

+     def test_no_i8(self):

+         # we shouldn't use i8 if we don't have to

+         data = [

+                 23,

+                 42,

+                 -1024,

+                 2 ** 31 - 1,

+                 -2 ** 31,

+                 [2**31 -1],

+                 {"a": -2 ** 31, "b": 3.14},

+                 ]

+         for value in data:

+             value = (value,)

+             enc = xmlrpcplus.dumps(value, methodresponse=1, encoding='us-ascii')

+             _enc = xmlrpc_client.dumps(value, methodresponse=1, allow_none=1, encoding='us-ascii')

+             if 'i8' in enc or 'I8' in enc:

+                 raise Exception('i8 used unnecessarily')

+             self.assertEqual(enc, _enc)

+             params, method = xmlrpc_client.loads(enc)

+             self.assertEqual(params, value)

+             self.assertEqual(method, None)

+ 

+ 

+ class MyMarshaller(xmlrpcplus.ExtendedMarshaller):

+ 

+     dispatch = xmlrpcplus.ExtendedMarshaller.dispatch.copy()

+ 

+     def dump_float_rounded(self, value, write):

+         value = int(value)

+         self.dump_int(value, write)

+ 

+     dispatch[float] = dump_float_rounded

file modified
+2 -2
@@ -731,8 +731,8 @@ 

          """

          remotepath = os.path.dirname(os.path.join(self.getUploadDir(), filepath))

          filename = os.path.basename(filepath)

-         self.session.uploadFile(remotepath, filename, koji.encode_int(size),

-                                 md5sum, koji.encode_int(offset), data)

+         self.session.uploadFile(remotepath, filename, size,

+                                 md5sum, offset, data)

  

      def verifyChecksum(self, path, checksum, algo='sha1'):

          local_path = os.path.abspath(os.path.join(self.output_dir, path))

file modified
+1 -2
@@ -774,14 +774,13 @@ 

  

  def _chunk_file(server, environ, taskID, name, offset, size, volume):

      remaining = size

-     encode_int = koji.encode_int

      while True:

          if remaining <= 0:

              break

          chunk_size = 1048576

          if remaining < chunk_size:

              chunk_size = remaining

-         content = server.downloadTaskOutput(taskID, name, offset=encode_int(offset), size=chunk_size, volume=volume)

+         content = server.downloadTaskOutput(taskID, name, offset=offset, size=chunk_size, volume=volume)

          if not content:

              break

          yield content

Python's xmlrpc decoder understands the i8 tag that comes from the ws-xmlrpc spec [1], but the encoder will not emit it. Unfortunately, 32 bits is just not enough anymore.

In these changes, Tomas and I have modified the encoder to emit i8 tags when necessary. We were already using a modified encoder in the hub code. Now, we have generalized this and moved it into the library (so that clients can send large ints to the hub too).

We have stopped using encode_int() in client code, allowing clients to send the large ints as i8. Even old hubs will be able to decode these. However, we have kept encode_int() is the few places it was used in the hub for backwards api compatibility.

Most of the new code is in the new koji.xmlrpcplus lib, which has full unit test coverage.

These changes should fix #464

[1] https://ws.apache.org/xmlrpc/types.html

This is a significant change. Reviews are welcome :sweat_smile:

A word about compatibility. As near as I can tell, the only times when this should change the result of a hub call are cases where the call would fail to encode due to an OverflowError.

All python clients should be able to decode these i8 tags. Other clients might not be able to. In such cases, they will encounter a decoding error where before the would have gotten a Fault from the hub due the overflow. So, errors either way.

We have not altered the places where the hub returned large integers as strings as a workaround, so code that expects/relies on this will not fail.

These changes are based on those from #470

@mikem, Is there any feasibility and reasonableness to keep the old behavior as a version in request/response header?

tested current code on my vm.
It works fine.
:thumbsup:

@mikem, Is there any feasibility and reasonableness to keep the old behavior as a version in request/response header?

It's not something we've done before, and it could actually limit the fix to require the client to alter their request in order to get the new behavior. Remember, that all existing python clients (and possibly others) can already understand the i8 tags.

Note that, in the test_i8 unit test, we are encoding large ints with our new encoder and decoding them with the plain python xmlrpclib decoder.

Also, in several unit tests we assert that the encoded output is identical between our encoder and python's, for data that lacks large ints.

rebased

6 years ago

Commit 4cec371 fixes this pull-request

Pull-Request has been merged by mikem@redhat.com

6 years ago