Project: Suppliers

Data Validation

In most cases, additional data verification may not be required. Your digger works, scrape data, you download dataset and process it on your side. You can, for example, gather data from the website of the supplier and import it to your store. In case the your store software validates imported data, you probably should not use validation option on Diggernaut side. But if not, this option may become very handy, as you can filter out invalid data before such data gets to the dataset. All you need to do is to create validation JSON Schema.

Switch the panel of configuration editor to the Validator by clicking on the corresponding tab:

In order for the digger to start checking the data scraped from the website, it is necessary to place a validation schema to this editor window.

The validation schema is a JSON structure that describes the requirements for fields and data in them.

We are not going to palce here comprehensive manual on how to build validation JSON schema, but we will share few useful links with information that help you to master it.

  1. Official JSON Schema website - all essential information can be found here
  2. JSON Schema Validation - section describing JSON validation schema
  3. Simple examples
  4. Complex examples
  5. JSON schema validator - you can validate your JSON schema here before using in your digger

Below you can see example of JSON schema used in one real case :)

          {
    "$schema": "http://json-schema.org/draft-04/schema#",
    "title": "Validator v1",
    "description": "Supplier#1 validation schema",
    "type": "object",
    "required": ["offer"],
    "properties": {
        "offer": {
            "type": "object",
            "oneOf": [
                { "$ref": "#/definitions/Mattresses" },
                { "$ref": "#/definitions/Other" }
            ]
        }
    },
    "definitions": {
        "Mattresses": {
            "type": "object",
            "required": ["categoryId", "brand", "model", "original_url", "pictures", "hardness", "basis"],
            "properties": {
                "categoryId": {
                    "description": "Category ID for matresses (=1)",
                    "type": "string",
                    "pattern": "^1$"
                },
                "brand": {
                    "description": "Brand",
                    "type": "string",
                    "minLength": 1
                },
                "model": {
                    "description": "Product name",
                    "type": "string",
                    "minLength": 1
                },
                "original_url": {
                    "description": "Original URL to product",
                    "type": "string"
                },
                "pictures": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    },
                    "uniqueItems": true
                },
                "description": {
                    "description": "Product description",
                    "type": "string"
                },
                "sku": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "price": {
                                "type": "integer",
                                "minimum": 1
                            },
                            "old_price": {
                                "type": "integer",
                                "minimum": 0
                            }
                        }
                    },
                    "minItems": 1,
                    "uniqueItems": true,
                    "required": [ "price" ]
                },
                "sizepicture": {
                    "description": "Picture with sizes",
                    "type": "array"
                },
                "warranty": {
                    "description": "Warranty",
                    "type": "string",
                    "pattern": "[\\d\\.]+"
                },
                "lifetime": {
                    "description": "Lifetime",
                    "type": "string",
                    "pattern": "\\d+"
                },
                "traits": {
                    "description": "Details",
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "trait": {
                                "type": "string"
                            }
                        }
                    }
                },
                "youtubes": {
                    "description": "Youtube link",
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "youtube": {
                                "type": "string"
                            }
                        }
                    }
                },
                "is_child": {
                    "description": "For kids",
                    "type": "string"
                },
                "hardness": {
                    "description": "Hardness",
                    "enum": ["Hard", "Average", "Soft", "Different hardness", ""]
                },
                "hardness_side_1": {
                    "description": "1-st side hardness",
                    "enum": ["Hard", "Average", "Soft", "Different hardness", ""]
                },
                "hardness_side_2": {
                    "description": "2-st side hardness",
                    "enum": ["Hard", "Average", "Soft", "Different hardness", ""]
                },
                "load": {
                    "description": "Allowed loading",
                    "type": "string"
                },
                "weight_diff": {
                    "description": "Allowed weight difference",
                    "type": "string"
                },
                "is_roll": {
                    "description": "In roll",
                    "type": "string"
                },
                "is_round": {
                    "description": "Round form",
                    "type": "string"
                },
                "is_sezon": {
                    "description": "Winter-summer",
                    "type": "string"
                },
                "is_sofa": {
                    "description": "For the sofa",
                    "type": "string"
                },
                "transform": {
                    "description": "Transform",
                    "type": "string"
                },
                "layers": {
                    "description": "Layers",
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "layer": {
                                "description": "Layer",
                                "type": "string"
                            }
                        },
                        "required": [ "layer" ]
                    }
                },
                "basis": {
                    "description": "Base",
                    "enum": ["Pocket spring", "Bonnel", "Springless", ""]
                }
            }
        },
        "Other": {
            "type": "object",
            "required": ["categoryId", "brand", "model", "original_url", "pictures"],
             "properties": {
                "categoryId": {
                    "description": "Category ID for Other (3|4|6|7|8|9|10|11|12)",
                    "type": "string",
                    "pattern": "^3|4|6|7|8|9|10|11|12$"
                },
                "brand": {
                    "description": "Brand",
                    "type": "string",
                    "minLength": 1
                },
                "model": {
                    "description": "Product name",
                    "type": "string",
                    "minLength": 1
                },
                "original_url": {
                    "description": "Original URL to product",
                    "type": "string"
                },
                "pictures": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    },
                    "uniqueItems": true
                },
                "description": {
                    "description": "Product description",
                    "type": "string"
                },
                "sku": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "price": {
                                "type": "integer",
                                "minimum": 1
                            },
                            "old_price": {
                                "type": "integer",
                                "minimum": 0
                            }
                        }
                    },
                    "minItems": 1,
                    "uniqueItems": true,
                    "required": [ "price" ]
                },
                "sizepicture": {
                    "description": "Picture with sizes",
                    "type": "array"
                },
                "warranty": {
                    "description": "Warranty",
                    "type": "string",
                    "pattern": "[\\d\\.]+"
                },
                "lifetime": {
                    "description": "Lifetime",
                    "type": "string",
                    "pattern": "\\d+"
                },
                "traits": {
                    "description": "Details",
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "trait": {
                                "type": "string"
                            }
                        }
                    }
                },
                "youtubes": {
                    "description": "Youtube link",
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "youtube": {
                                "type": "string"
                            }
                        }
                    }
                },
                "is_child": {
                    "description": "For kids",
                    "type": "string"
                }
            }
       }
    }
}