第11章 PMML モデルの例
PMML は、XML スキーマ を定義しており、このスキーマを使用することで、PMML モデルが異なる PMML 準拠のプラットフォーム間で使用できるようになります。PMML 仕様では、プロデューサーとコンシューマー適合を満たしていることが前提で、複数のソフトウェアプラットフォームが同じファイルを使用してオーサリング、テスト、および実稼働環境での実行を可能にします。
以下は、PMML 回帰、スコアカード、ツリー、マイニング、およびクラスタリングモデルの例です。これらの例では、Red Hat Decision Manager のデシジョンサーバーと統合可能なサポート対象モデルを紹介します。
PMML の詳細例は、DMG の PMML Sample Files ページを参照してください。
PMML 回帰モデルの例
<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2"> <Header copyright="JBoss"/> <DataDictionary numberOfFields="5"> <DataField dataType="double" name="fld1" optype="continuous"/> <DataField dataType="double" name="fld2" optype="continuous"/> <DataField dataType="string" name="fld3" optype="categorical"> <Value value="x"/> <Value value="y"/> </DataField> <DataField dataType="double" name="fld4" optype="continuous"/> <DataField dataType="double" name="fld5" optype="continuous"/> </DataDictionary> <RegressionModel algorithmName="linearRegression" functionName="regression" modelName="LinReg" normalizationMethod="logit" targetFieldName="fld4"> <MiningSchema> <MiningField name="fld1"/> <MiningField name="fld2"/> <MiningField name="fld3"/> <MiningField name="fld4" usageType="predicted"/> <MiningField name="fld5" usageType="target"/> </MiningSchema> <RegressionTable intercept="0.5"> <NumericPredictor coefficient="5" exponent="2" name="fld1"/> <NumericPredictor coefficient="2" exponent="1" name="fld2"/> <CategoricalPredictor coefficient="-3" name="fld3" value="x"/> <CategoricalPredictor coefficient="3" name="fld3" value="y"/> <PredictorTerm coefficient="0.4"> <FieldRef field="fld1"/> <FieldRef field="fld2"/> </PredictorTerm> </RegressionTable> </RegressionModel> </PMML>
PMML スコアカードモデルの例
<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2"> <Header copyright="JBoss"/> <DataDictionary numberOfFields="4"> <DataField name="param1" optype="continuous" dataType="double"/> <DataField name="param2" optype="continuous" dataType="double"/> <DataField name="overallScore" optype="continuous" dataType="double" /> <DataField name="finalscore" optype="continuous" dataType="double" /> </DataDictionary> <Scorecard modelName="ScorecardCompoundPredicate" useReasonCodes="true" isScorable="true" functionName="regression" baselineScore="15" initialScore="0.8" reasonCodeAlgorithm="pointsAbove"> <MiningSchema> <MiningField name="param1" usageType="active" invalidValueTreatment="asMissing"> </MiningField> <MiningField name="param2" usageType="active" invalidValueTreatment="asMissing"> </MiningField> <MiningField name="overallScore" usageType="target"/> <MiningField name="finalscore" usageType="predicted"/> </MiningSchema> <Characteristics> <Characteristic name="ch1" baselineScore="50" reasonCode="reasonCh1"> <Attribute partialScore="20"> <SimplePredicate field="param1" operator="lessThan" value="20"/> </Attribute> <Attribute partialScore="100"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="param1" operator="greaterOrEqual" value="20"/> <SimplePredicate field="param2" operator="lessOrEqual" value="25"/> </CompoundPredicate> </Attribute> <Attribute partialScore="200"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="param1" operator="greaterOrEqual" value="20"/> <SimplePredicate field="param2" operator="greaterThan" value="25"/> </CompoundPredicate> </Attribute> </Characteristic> <Characteristic name="ch2" reasonCode="reasonCh2"> <Attribute partialScore="10"> <CompoundPredicate booleanOperator="or"> <SimplePredicate field="param2" operator="lessOrEqual" value="-5"/> <SimplePredicate field="param2" operator="greaterOrEqual" value="50"/> </CompoundPredicate> </Attribute> <Attribute partialScore="20"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="param2" operator="greaterThan" value="-5"/> <SimplePredicate field="param2" operator="lessThan" value="50"/> </CompoundPredicate> </Attribute> </Characteristic> </Characteristics> </Scorecard> </PMML>
PMML ツリーモデルの例
<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2"> <Header copyright="JBOSS"/> <DataDictionary numberOfFields="5"> <DataField dataType="double" name="fld1" optype="continuous"/> <DataField dataType="double" name="fld2" optype="continuous"/> <DataField dataType="string" name="fld3" optype="categorical"> <Value value="true"/> <Value value="false"/> </DataField> <DataField dataType="string" name="fld4" optype="categorical"> <Value value="optA"/> <Value value="optB"/> <Value value="optC"/> </DataField> <DataField dataType="string" name="fld5" optype="categorical"> <Value value="tgtX"/> <Value value="tgtY"/> <Value value="tgtZ"/> </DataField> </DataDictionary> <TreeModel functionName="classification" modelName="TreeTest"> <MiningSchema> <MiningField name="fld1"/> <MiningField name="fld2"/> <MiningField name="fld3"/> <MiningField name="fld4"/> <MiningField name="fld5" usageType="predicted"/> </MiningSchema> <Node score="tgtX"> <True/> <Node score="tgtX"> <SimplePredicate field="fld4" operator="equal" value="optA"/> <Node score="tgtX"> <CompoundPredicate booleanOperator="surrogate"> <SimplePredicate field="fld1" operator="lessThan" value="30.0"/> <SimplePredicate field="fld2" operator="greaterThan" value="20.0"/> </CompoundPredicate> <Node score="tgtX"> <SimplePredicate field="fld2" operator="lessThan" value="40.0"/> </Node> <Node score="tgtZ"> <SimplePredicate field="fld2" operator="greaterOrEqual" value="10.0"/> </Node> </Node> <Node score="tgtZ"> <CompoundPredicate booleanOperator="or"> <SimplePredicate field="fld1" operator="greaterOrEqual" value="60.0"/> <SimplePredicate field="fld1" operator="lessOrEqual" value="70.0"/> </CompoundPredicate> <Node score="tgtZ"> <SimpleSetPredicate booleanOperator="isNotIn" field="fld4"> <Array type="string">optA optB</Array> </SimpleSetPredicate> </Node> </Node> </Node> <Node score="tgtY"> <CompoundPredicate booleanOperator="or"> <SimplePredicate field="fld4" operator="equal" value="optA"/> <SimplePredicate field="fld4" operator="equal" value="optC"/> </CompoundPredicate> <Node score="tgtY"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="fld1" operator="greaterThan" value="10.0"/> <SimplePredicate field="fld1" operator="lessThan" value="50.0"/> <SimplePredicate field="fld4" operator="equal" value="optA"/> <SimplePredicate field="fld2" operator="lessThan" value="100.0"/> <SimplePredicate field="fld3" operator="equal" value="false"/> </CompoundPredicate> </Node> <Node score="tgtZ"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="fld4" operator="equal" value="optC"/> <SimplePredicate field="fld2" operator="lessThan" value="30.0"/> </CompoundPredicate> </Node> </Node> </Node> </TreeModel> </PMML>
PMML マイニングモデルの例 (modelChain)
<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2"> <Header> <Application name="Drools-PMML" version="7.0.0-SNAPSHOT" /> </Header> <DataDictionary numberOfFields="7"> <DataField name="age" optype="continuous" dataType="double" /> <DataField name="occupation" optype="categorical" dataType="string"> <Value value="SKYDIVER" /> <Value value="ASTRONAUT" /> <Value value="PROGRAMMER" /> <Value value="TEACHER" /> <Value value="INSTRUCTOR" /> </DataField> <DataField name="residenceState" optype="categorical" dataType="string"> <Value value="AP" /> <Value value="KN" /> <Value value="TN" /> </DataField> <DataField name="validLicense" optype="categorical" dataType="boolean" /> <DataField name="overallScore" optype="continuous" dataType="double" /> <DataField name="grade" optype="categorical" dataType="string"> <Value value="A" /> <Value value="B" /> <Value value="C" /> <Value value="D" /> <Value value="F" /> </DataField> <DataField name="qualificationLevel" optype="categorical" dataType="string"> <Value value="Unqualified" /> <Value value="Barely" /> <Value value="Well" /> <Value value="Over" /> </DataField> </DataDictionary> <MiningModel modelName="SampleModelChainMine" functionName="classification"> <MiningSchema> <MiningField name="age" /> <MiningField name="occupation" /> <MiningField name="residenceState" /> <MiningField name="validLicense" /> <MiningField name="overallScore" /> <MiningField name="qualificationLevel" usageType="target"/> </MiningSchema> <Segmentation multipleModelMethod="modelChain"> <Segment id="1"> <True /> <Scorecard modelName="Sample Score 1" useReasonCodes="true" isScorable="true" functionName="regression" baselineScore="0.0" initialScore="0.345"> <MiningSchema> <MiningField name="age" usageType="active" invalidValueTreatment="asMissing" /> <MiningField name="occupation" usageType="active" invalidValueTreatment="asMissing" /> <MiningField name="residenceState" usageType="active" invalidValueTreatment="asMissing" /> <MiningField name="validLicense" usageType="active" invalidValueTreatment="asMissing" /> <MiningField name="overallScore" usageType="predicted" /> </MiningSchema> <Output> <OutputField name="calculatedScore" displayName="Final Score" dataType="double" feature="predictedValue" targetField="overallScore" /> </Output> <Characteristics> <Characteristic name="AgeScore" baselineScore="0.0" reasonCode="ABZ"> <Extension name="cellRef" value="$B$8" /> <Attribute partialScore="10.0"> <Extension name="cellRef" value="$C$10" /> <SimplePredicate field="age" operator="lessOrEqual" value="5" /> </Attribute> <Attribute partialScore="30.0" reasonCode="CX1"> <Extension name="cellRef" value="$C$11" /> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="age" operator="greaterOrEqual" value="5" /> <SimplePredicate field="age" operator="lessThan" value="12" /> </CompoundPredicate> </Attribute> <Attribute partialScore="40.0" reasonCode="CX2"> <Extension name="cellRef" value="$C$12" /> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="age" operator="greaterOrEqual" value="13" /> <SimplePredicate field="age" operator="lessThan" value="44" /> </CompoundPredicate> </Attribute> <Attribute partialScore="25.0"> <Extension name="cellRef" value="$C$13" /> <SimplePredicate field="age" operator="greaterOrEqual" value="45" /> </Attribute> </Characteristic> <Characteristic name="OccupationScore" baselineScore="0.0"> <Extension name="cellRef" value="$B$16" /> <Attribute partialScore="-10.0" reasonCode="CX2"> <Extension name="description" value="skydiving is a risky occupation" /> <Extension name="cellRef" value="$C$18" /> <SimpleSetPredicate field="occupation" booleanOperator="isIn"> <Array n="2" type="string">SKYDIVER ASTRONAUT</Array> </SimpleSetPredicate> </Attribute> <Attribute partialScore="10.0"> <Extension name="cellRef" value="$C$19" /> <SimpleSetPredicate field="occupation" booleanOperator="isIn"> <Array n="2" type="string">TEACHER INSTRUCTOR</Array> </SimpleSetPredicate> </Attribute> <Attribute partialScore="5.0"> <Extension name="cellRef" value="$C$20" /> <SimplePredicate field="occupation" operator="equal" value="PROGRAMMER" /> </Attribute> </Characteristic> <Characteristic name="ResidenceStateScore" baselineScore="0.0" reasonCode="RES"> <Extension name="cellRef" value="$B$22" /> <Attribute partialScore="-10.0"> <Extension name="cellRef" value="$C$24" /> <SimplePredicate field="residenceState" operator="equal" value="AP" /> </Attribute> <Attribute partialScore="10.0"> <Extension name="cellRef" value="$C$25" /> <SimplePredicate field="residenceState" operator="equal" value="KN" /> </Attribute> <Attribute partialScore="5.0"> <Extension name="cellRef" value="$C$26" /> <SimplePredicate field="residenceState" operator="equal" value="TN" /> </Attribute> </Characteristic> <Characteristic name="ValidLicenseScore" baselineScore="0.0"> <Extension name="cellRef" value="$B$28" /> <Attribute partialScore="1.0" reasonCode="LX00"> <Extension name="cellRef" value="$C$30" /> <SimplePredicate field="validLicense" operator="equal" value="true" /> </Attribute> <Attribute partialScore="-1.0" reasonCode="LX00"> <Extension name="cellRef" value="$C$31" /> <SimplePredicate field="validLicense" operator="equal" value="false" /> </Attribute> </Characteristic> </Characteristics> </Scorecard> </Segment> <Segment id="2"> <True /> <TreeModel modelName="SampleTree" functionName="classification" missingValueStrategy="lastPrediction" noTrueChildStrategy="returnLastPrediction"> <MiningSchema> <MiningField name="age" usageType="active" /> <MiningField name="validLicense" usageType="active" /> <MiningField name="calculatedScore" usageType="active" /> <MiningField name="qualificationLevel" usageType="predicted" /> </MiningSchema> <Output> <OutputField name="qualification" displayName="Qualification Level" dataType="string" feature="predictedValue" targetField="qualificationLevel" /> </Output> <Node score="Well" id="1"> <True/> <Node score="Barely" id="2"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="age" operator="greaterOrEqual" value="16" /> <SimplePredicate field="validLicense" operator="equal" value="true" /> </CompoundPredicate> <Node score="Barely" id="3"> <SimplePredicate field="calculatedScore" operator="lessOrEqual" value="50.0" /> </Node> <Node score="Well" id="4"> <CompoundPredicate booleanOperator="and"> <SimplePredicate field="calculatedScore" operator="greaterThan" value="50.0" /> <SimplePredicate field="calculatedScore" operator="lessOrEqual" value="60.0" /> </CompoundPredicate> </Node> <Node score="Over" id="5"> <SimplePredicate field="calculatedScore" operator="greaterThan" value="60.0" /> </Node> </Node> <Node score="Unqualified" id="6"> <CompoundPredicate booleanOperator="surrogate"> <SimplePredicate field="age" operator="lessThan" value="16" /> <SimplePredicate field="calculatedScore" operator="lessOrEqual" value="40.0" /> <True /> </CompoundPredicate> </Node> </Node> </TreeModel> </Segment> </Segmentation> </MiningModel> </PMML>
PMML クラスタリングモデルの例
<?xml version="1.0" encoding="UTF-8"?> <PMML version="4.1" xmlns="http://www.dmg.org/PMML-4_1"> <Header> <Application name="KNIME" version="2.8.0"/> </Header> <DataDictionary numberOfFields="5"> <DataField name="sepal_length" optype="continuous" dataType="double"> <Interval closure="closedClosed" leftMargin="4.3" rightMargin="7.9"/> </DataField> <DataField name="sepal_width" optype="continuous" dataType="double"> <Interval closure="closedClosed" leftMargin="2.0" rightMargin="4.4"/> </DataField> <DataField name="petal_length" optype="continuous" dataType="double"> <Interval closure="closedClosed" leftMargin="1.0" rightMargin="6.9"/> </DataField> <DataField name="petal_width" optype="continuous" dataType="double"> <Interval closure="closedClosed" leftMargin="0.1" rightMargin="2.5"/> </DataField> <DataField name="class" optype="categorical" dataType="string"/> </DataDictionary> <ClusteringModel modelName="SingleIrisKMeansClustering" functionName="clustering" modelClass="centerBased" numberOfClusters="4"> <MiningSchema> <MiningField name="sepal_length" invalidValueTreatment="asIs"/> <MiningField name="sepal_width" invalidValueTreatment="asIs"/> <MiningField name="petal_length" invalidValueTreatment="asIs"/> <MiningField name="petal_width" invalidValueTreatment="asIs"/> <MiningField name="class" usageType="predicted"/> </MiningSchema> <ComparisonMeasure kind="distance"> <squaredEuclidean/> </ComparisonMeasure> <ClusteringField field="sepal_length" compareFunction="absDiff"/> <ClusteringField field="sepal_width" compareFunction="absDiff"/> <ClusteringField field="petal_length" compareFunction="absDiff"/> <ClusteringField field="petal_width" compareFunction="absDiff"/> <Cluster name="virginica" size="32"> <Array n="4" type="real">6.9125000000000005 3.099999999999999 5.846874999999999 2.1312499999999996</Array> </Cluster> <Cluster name="versicolor" size="41"> <Array n="4" type="real">6.23658536585366 2.8585365853658535 4.807317073170731 1.6219512195121943</Array> </Cluster> <Cluster name="setosa" size="50"> <Array n="4" type="real">5.005999999999999 3.4180000000000006 1.464 0.2439999999999999</Array> </Cluster> <Cluster name="unknown" size="27"> <Array n="4" type="real">5.529629629629629 2.6222222222222222 3.940740740740741 1.2185185185185188</Array> </Cluster> </ClusteringModel> </PMML>