#55 releases.json: Fetch image checksums and sizes for the images
Closed 5 years ago by robyduck. Opened 7 years ago by mbriza.
mbriza/fedora-websites f25  into  f25

file modified
+1 -1
@@ -34,7 +34,7 @@ 

  #### 1. Setup your system

  

      sudo dnf install git gettext python-genshi python-lxml python-setuptools python-dateutil \

-     python-dogpile-cache babel python-feedparser fedfind

+     python-dogpile-cache babel python-feedparser fedfind python2-requests python-pyquery

  

      sudo dnf groups install 'Web Server'

  

@@ -3,19 +3,29 @@ 

  import fedfind.release

  import fedfind.helpers

  import json

+ import os.path

+ import lxml

+ import requests

+ from pyquery import PyQuery

+ import re

+ import sys

  

  output = []

+ folders = set()

+ checksum_files = set()

+ hashes = {}

  

- def hashify(version, milestone, arch, link, variant, subvariant):

+ def hashify(version, milestone, arch, link, subvariant):

      return { 'version': version

             , 'arch': arch

             , 'link': link

-            , 'variant': variant

             , 'subvariant': subvariant

             }

  

+ #FIXME futureproof

  releases_to_report = [

-       fedfind.release.get_release(25, 'Beta')

+       fedfind.release.get_release(25)

+     , fedfind.release.get_release(25, 'Beta')

      , fedfind.release.get_release(25, 'Alpha')

      , fedfind.release.get_release(24)

      , fedfind.release.get_release(24, 'Beta')
@@ -33,8 +43,42 @@ 

                  rel.milestone,

                  img['arch'],

                  location,

-                 img['variant'],

                  img['subvariant'])

          output.append(h)

+         folders.add(os.path.dirname(location))

+ 

+ for folder in folders:

+     try:

+         pq = PyQuery(folder)

+         for link in pq.items('a'):

+             if 'CHECKSUM' in link.attr('href'):

+                 checksum_files.add("/".join([folder, link.attr('href')]))

+                 break

+     except lxml.etree.XMLSyntaxError:

+         pass

+     except requests.exceptions.ConnectionError:

+         print >> sys.stderr, "Couldn't fetch from: %s" % (folder)

+ 

+ for file in checksum_files:

+     checksums = requests.get(file)

+     if checksums.status_code == 200:

+         for line in checksums.content.split('\n'):

+             m = re.search('^SHA256 \((.+)\) = (.+)$', line)

+             if m:

+                 hashes[m.group(1)] = m.group(2)

+ 

+ oldoutput = list(output)

+ output = list()

+ for item in oldoutput:

+     if os.path.basename(item['link']) in hashes:

+         item['sha256'] = hashes[os.path.basename(item['link'])]

+     else:

+         print >> sys.stderr, "No checksum for %s" % (item['link'])

+     head = requests.head(item['link'].replace("https://download.fedoraproject.org/pub/", "https://mirrors.nic.cz/"), allow_redirects=True)

+     if head.status_code == 200 and 'Content-Length' in head.headers:

+         item['size'] = head.headers['Content-Length']

+     else:

+         print >> sys.stderr, "No size for %s" % (item['link'])

+     output.append(item)

  

  print json.dumps(output)

Adds a dependency on python2-requests (which is okay i guess) and python-pyquery (which may not be ok) because i'm lazy to do proper DOM parsing and i don't know a better way to find a file in the same folder as some other file on a remote HTTP server.

I don't know Python very well so please feel free to fix my code.

I am in my parents house in a village with a pretty bad internet connection so it was slooooow and it seemed like the mirror i'm getting is broken. Hopefully this will be fine somewhere closer to the mirror.

Ricky mentioned he's been working on getting the release date so I left that alone so far.

Links to stuff it's been unable to fetch the data for are reported to stderr.

There is some junk in the diff I didn't cause. Sorry for that but this is my first pull request on Pagure and I thought I was doing the same thing as on Github.

The junk's gone after creating the PR, nevermind.

Yeah, not sure if we should add this dependency, but let's test it a bit with Ricky.

Also, we already parse the release date from the taskjuggler file on fpeople, you can see the script here: https://pagure.io/fedora-websites/raw/master/f/getfedora.org/build/release_schedule.py

@codeblock : Have you had a chance to look at this yet?

Closing this PR without merging it for now. Fedfind could probably be a better place where to address it and it seems to just duplicate some information we already can get with our scripts.
If needed please open a new PR or issue.

Pull-Request has been closed by robyduck

5 years ago