Workflow: Splitout Code Create

Workflow Details

Download Workflow
{
    "meta": {
        "instanceId": "4a11afdb3c52fd098e3eae9fad4b39fdf1bbcde142f596adda46c795e366b326"
    },
    "nodes": [
        {
            "id": "f1b36f4b-6558-4e83-a999-e6f2d24e196c",
            "name": "OpenRouter Chat Model",
            "type": "@n8n\/n8n-nodes-langchain.lmChatOpenRouter",
            "position": [
                620,
                240
            ],
            "parameters": {
                "model": "openai\/gpt-4.1",
                "options": []
            },
            "typeVersion": 1
        },
        {
            "id": "89ca0a07-286f-4e68-9e85-0327a4859cc0",
            "name": "Structured Output Parser",
            "type": "@n8n\/n8n-nodes-langchain.outputParserStructured",
            "position": [
                900,
                240
            ],
            "parameters": {
                "schemaType": "manual",
                "inputSchema": "{\n  \"type\": \"array\",\n  \"items\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"name\": { \"type\": \"string\" },\n      \"description\": { \"type\": \"string\" },\n      \"rating\": { \"type\": \"number\" },\n      \"reviews\": { \"type\": \"integer\" },\n      \"price\": { \"type\": \"string\" }\n    },\n    \"required\": [\"name\", \"description\", \"rating\", \"reviews\", \"price\"]\n  }\n}"
            },
            "typeVersion": 1.1999999999999999555910790149937383830547332763671875
        },
        {
            "id": "e4800c1d-c0d8-4093-81ec-fc19ad0034cd",
            "name": "scrap url",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                240,
                60
            ],
            "parameters": {
                "url": "https:\/\/api.brightdata.com\/request",
                "method": "POST",
                "options": [],
                "sendBody": true,
                "sendHeaders": true,
                "bodyParameters": {
                    "parameters": [
                        {
                            "name": "zone",
                            "value": "web_unlocker1"
                        },
                        {
                            "name": "url",
                            "value": "={{ $json.url }}"
                        },
                        {
                            "name": "format",
                            "value": "raw"
                        }
                    ]
                },
                "headerParameters": {
                    "parameters": [
                        {
                            "name": "Authorization",
                            "value": "{{BRIGHTDATA_TOKEN}}"
                        }
                    ]
                }
            },
            "typeVersion": 4.20000000000000017763568394002504646778106689453125
        },
        {
            "id": "1a1f768f-615d-4035-81b0-63b860f8e6ac",
            "name": "Sticky Note1",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                160,
                -140
            ],
            "parameters": {
                "content": "## Web Scraper API\n\n[Inscription - Free Trial](https:\/\/get.brightdata.com\/website-scraper)"
            },
            "typeVersion": 1
        },
        {
            "id": "2f260d96-4fff-4a4f-af29-1e43f465d54c",
            "name": "When clicking \u2018Test workflow\u2019",
            "type": "n8n-nodes-base.manualTrigger",
            "position": [
                -440,
                200
            ],
            "parameters": [],
            "typeVersion": 1
        },
        {
            "id": "4be9033f-0b9f-466d-916e-88fbb2a80417",
            "name": "url",
            "type": "n8n-nodes-base.splitInBatches",
            "position": [
                20,
                200
            ],
            "parameters": {
                "options": []
            },
            "typeVersion": 3
        },
        {
            "id": "21b6d21c-b977-4175-9068-e0e2e19fa472",
            "name": "get urls to scrape",
            "type": "n8n-nodes-base.googleSheets",
            "position": [
                -200,
                200
            ],
            "parameters": {
                "options": [],
                "sheetName": "{{TRACK_SHEET_GID}}",
                "documentId": "{{WEB_SHEET_ID}}"
            },
            "credentials": {
                "googleSheetsOAuth2Api": {
                    "id": "KsXWRZTrfCUFrrHD",
                    "name": "Google Sheets"
                }
            },
            "typeVersion": 4.5
        },
        {
            "id": "25ef76ec-cf0d-422e-b060-68c49192a008",
            "name": "clean html",
            "type": "n8n-nodes-base.code",
            "position": [
                460,
                60
            ],
            "parameters": {
                "jsCode": "\/\/ CleanHtmlFunction.js\n\/\/ Purpose: n8n Function node to clean HTML: remove doctype, scripts, styles, head, comments, classes, extra blank lines, and non-whitelisted tags\n\nreturn items.map(item => {\n  const rawHtml = item.json.data;\n\n  \/\/ 1) remove doctype, scripts, styles, comments and head section, and strip class attributes\n  let cleaned = rawHtml\n    .replace(\/<!doctype html>\/gi, '')\n    .replace(\/<script[\\s\\S]*?<\\\/script>\/gi, '')\n    .replace(\/<style[\\s\\S]*?<\\\/style>\/gi, '')\n    .replace(\/<!--[\\s\\S]*?-->\/g, '')\n    .replace(\/<head[\\s\\S]*?<\\\/head>\/gi, '')\n    .replace(\/\\sclass=\"[^\"]*\"\/gi, '');\n\n  \/\/ 2) define whitelist of tags to keep\n  const allowedTags = [\n    'h1','h2','h3','h4','h5','h6',\n    'p','ul','ol','li',\n    'strong','em','a','blockquote',\n    'code','pre'\n  ];\n\n  \/\/ 3) strip out all tags not in the whitelist, reconstruct allowed tags cleanly\n  cleaned = cleaned.replace(\n    \/<\\\/?([a-z][a-z0-9]*)\\b[^>]*>\/gi,\n    (match, tagName) => {\n      const name = tagName.toLowerCase();\n      if (allowedTags.includes(name)) {\n        return match.startsWith('<\/') ? `<\/${name}>` : `<${name}>`;\n      }\n      return '';\n    }\n  );\n\n  \/\/ 4) collapse multiple blank or whitespace-only lines into a single newline\n  cleaned = cleaned.replace(\/(\\s*\\r?\\n\\s*){2,}\/g, '\\n');\n\n  \/\/ 5) trim leading\/trailing whitespace\n  cleaned = cleaned.trim();\n\n  return {\n    json: { cleanedHtml: cleaned }\n  };\n});"
            },
            "typeVersion": 2
        },
        {
            "id": "f72660d5-8427-4655-acbe-10365273c27b",
            "name": "extract data",
            "type": "@n8n\/n8n-nodes-langchain.chainLlm",
            "position": [
                680,
                60
            ],
            "parameters": {
                "text": "={{ $json.cleanedHtml }}",
                "messages": {
                    "messageValues": [
                        {
                            "message": "=You are an expert in web page scraping. Provide a structured response in JSON format. Only the response, without commentary.\n\nExtract the product information for {{ $(\u2018url\u2019).item.json.url.split(\u2019\/s?k=\u2019)[1].split(\u2019&\u2019)[0] }} present on the page.\n\nname\ndescription\nrating\nreviews\nprice"
                        }
                    ]
                },
                "promptType": "define",
                "hasOutputParser": true
            },
            "typeVersion": 1.600000000000000088817841970012523233890533447265625
        },
        {
            "id": "8b4af1bb-d7f8-456e-b630-ecd9b6e4bcdc",
            "name": "add results",
            "type": "n8n-nodes-base.googleSheets",
            "position": [
                1280,
                200
            ],
            "parameters": {
                "columns": {
                    "value": {
                        "name": "={{ $json.output.name }}",
                        "price": "={{ $json.output.price }}",
                        "rating": "={{ $json.output.rating }}",
                        "reviews": "={{ $json.output.reviews }}",
                        "description": "={{ $json.output.description }}"
                    },
                    "schema": [
                        {
                            "id": "name",
                            "type": "string"
                        },
                        {
                            "id": "description",
                            "type": "string"
                        },
                        {
                            "id": "rating",
                            "type": "string"
                        },
                        {
                            "id": "reviews",
                            "type": "string"
                        },
                        {
                            "id": "price",
                            "type": "string"
                        }
                    ],
                    "mappingMode": "defineBelow"
                },
                "options": [],
                "operation": "append",
                "sheetName": "{{RESULTS_SHEET_GID}}",
                "documentId": "{{WEB_SHEET_ID}}"
            },
            "credentials": {
                "googleSheetsOAuth2Api": {
                    "id": "KsXWRZTrfCUFrrHD",
                    "name": "Google Sheets"
                }
            },
            "typeVersion": 4.5
        },
        {
            "id": "7a5ba438-2ede-4d6c-b8fa-9a958ba1ef3e",
            "name": "Split items",
            "type": "n8n-nodes-base.splitOut",
            "position": [
                1060,
                60
            ],
            "parameters": {
                "include": "allOtherFields",
                "options": [],
                "fieldToSplitOut": "output"
            },
            "typeVersion": 1
        }
    ],
    "pinData": [],
    "connections": {
        "url": {
            "main": [
                [],
                [
                    {
                        "node": "scrap url",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "scrap url": {
            "main": [
                [
                    {
                        "node": "clean html",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "clean html": {
            "main": [
                [
                    {
                        "node": "extract data",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Split items": {
            "main": [
                [
                    {
                        "node": "add results",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "add results": {
            "main": [
                [
                    {
                        "node": "url",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "extract data": {
            "main": [
                [
                    {
                        "node": "Split items",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "get urls to scrape": {
            "main": [
                [
                    {
                        "node": "url",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "OpenRouter Chat Model": {
            "ai_languageModel": [
                [
                    {
                        "node": "extract data",
                        "type": "ai_languageModel",
                        "index": 0
                    }
                ]
            ]
        },
        "Structured Output Parser": {
            "ai_outputParser": [
                [
                    {
                        "node": "extract data",
                        "type": "ai_outputParser",
                        "index": 0
                    }
                ]
            ]
        },
        "When clicking \u2018Test workflow\u2019": {
            "main": [
                [
                    {
                        "node": "get urls to scrape",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        }
    }
}
Back to Workflows

Related Workflows

Telegram Webhook Send Webhook
View
Gmail GoogleDrive Create Triggered
View
Manual Xero Automate Triggered
View
Gmail GoogleDrive Import
View
Manual Stickynote Process Triggered
View
HubSpot Clearbit Update Triggered
View
Functionitem Executecommand Update Webhook
View
[n8n] - Shopify Orders to D365 Business Central Sales Orders / Sales Invoices
View
Create, update, and get a post in Ghost
View
🗨️Ollama Chat
View