Friday 31 July 2015

Parse XML in Scala/Python

1. Scala

import scala.xml._

def parseXML(xmlDoc: Elem)={

    val datum = (xmlDoc \ "domain.web.MonthlyGcmDatum")

    val bccr = datum.filter(x => (x \ "gcm").text == "bccr_bcm2_0")

    bccr.foreach { gcm =>
      (gcm \ "monthVals").foreach { month =>
        (month \ "double").foreach { pr =>
          println(pr.text)
        }
      }
    }
}

new File(dir).listFiles.filter(_.getName.endsWith(".xml")).foreach { f =>

  val xmlDoc = Try(XML.loadFile(dir + f.getName)) match {

  case Success(pom) => {
     parseXML(pom)
  }
  case Failure(th) =>
     println(f.getAbsolutePath + " is not a valid XML file")
  }
}


2. Python


  
    from xml.dom import minidom
    doc = minidom.parse(fileName)
    gcms = doc.getElementsByTagName("gcm")

    country = fileName.replace(".xml", '').split("/")[1]
    pr = (country, )

    for gcm in gcms:
        
        if gcm.firstChild.data == "bccr_bcm2_0":
            monthVals = doc.getElementsByTagName("monthVals")[0]

            for month in monthVals.getElementsByTagName("double"):
                pr +=(float(month.firstChild.data),)



3. The sample XML looks like:

<domain.web.MonthlyGcmDatum>
    <gcm>bccr_bcm2_0</gcm>
    <variable>pr</variable>
    <monthVals>
      <double>48.13440323</double>
      <double>22.02027893</double>
      <double>12.22474861</double>
      <double>8.798520088</double>
      <double>52.74710083</double>
      <double>156.4017639</double>
      <double>152.3449402</double>
      <double>157.5389404</double>
      <double>118.4415054</double>
      <double>131.4727631</double>
      <double>131.8924103</double>
      <double>77.31517029</double>
    </monthVals>
    <fromYear>1980</fromYear>
    <toYear>1999</toYear>
  </domain.web.MonthlyGcmDatum>


No comments:

Post a Comment