Menu
Portfolio
Blog
Snippets
Login
Confirm
exit_to_app
Close
Entries
library_add
Utility(Event,Mouse): Log Movement X,Y
delete
Utility(txt): Basic Sentence Generator
delete
Utility(Event,Keyboard): Logger
delete
Utility(txt): Expanded Phrase Generator
delete
Scrape(General|Multi): Paginated Crawler
delete
Scrape(Sunbiz|Multi): Get Biz's By Zip
delete
Utility(Time): Test Load Speed
delete
Scrape(General|Single|NLP): Get Keyword Stats
delete
Scrape(General|Single): Get Text Near Links
delete
Utility(Event|Mouse): Press Down,Up, & Dragend
delete
Utility(Number): Between Range
delete
Utility(Event|Window): On Resize
delete
Scrape(Youtube|Single): Crawl Channel Videos
delete
Scrape(General|Multi): Tiny Link Crawler
delete
Utility(Time): Day of Week Report
delete
Utility(Number): Aspect Ratio Size Pairs
delete
Utility(Data): Sort Object Properties
delete
Scrape(General|Single): RiTa Sentences & Words
delete
Scrape(General|Single): Investigate Element Layers
delete
Scrape(NLP|Single): Using Compromise Plugins
delete
Utility(General): Remote Script Loader
delete
Scrape(General|Single|NLP): Compromise RiTa D3
delete
Scrape(General|Multi|Node): Grouped Node Crawler
delete
Scrape(Amazon|Multi): Crawl Product SERPs
delete
Scrape(Amazon|Multi): Get Paginated Brands
delete
Utility (Data): Download JSON in Browser
delete
Utility (Data): JSON AutoTypes
delete
Scrape(YouTube|Single): Video Page
delete
Utility (Text): Make Page Editable
delete
Utility (Text): Article Editor
delete
Scrape(General|Single|Text): Get Text On Click
delete
Utility (File): DnD File Parser (CSV,JSON,TXT)
delete
Scrape(General|Single): Get Links (Examples)
delete
Scrape(General|Single|Text): Get Sentences by Tag
delete
Utility (File): JSON to CSV via D3.js
delete
Scrape(General|Single): Auto Parse by Content Type
delete
Scrape(General|Single): Get Paragraphs & Sentences
delete
Scrape(Amazon|Multi): Get Reviews by ASIN
delete
Scrape(General|Single): Download Images on Page
delete
Utility(Event,Form): Custom Submit Function
delete
Utility (Fetch): Basic API Post Request
delete
Utility (Event,Form,Fetch): Form Data to API
delete
Utility (Time): Async Delay
delete
Utility (Time): Async Repeat Every N Secs
delete
Scrape(coj): Crawl Property SERPs
delete
Utility (Data): Promise Chain
delete
Utility (Fetch): Examples - JSON,Text,HTML
delete
Scrape(Amazon|Single): Product Review NLP
delete
Utility (Nodejs): Streaming Collections
delete
Scrape(Rate My Professor|Multi): Crawl Prof SERPs
delete
Utility (Time): JS Timer
delete
Utility (Text): Proper Case
delete
Scrape(Thingiverse API): Get Things via Search API
delete
Scrape(General|Single|Node): Get Node Attributes
delete
Scrape(General|Single|Node): Node Attributes + Text
delete
Scrape(Thesaurus): Get Words from SERPs
delete
Scrape(Walmart): Crawl Product SERPs
delete
Scrape(free3d): Crawl 3D Model SERPs
delete
Scrape(Aliexpress|Single): Get Products From SERP
delete
Scrape(simplify3d): Crawl Post SERPs
delete
Scrape(Twitter): Crawl Post Feed (infinite scroll)
delete
Scrape(DDuckGo|Single): Get Links from SERP
delete
Scrape(General|Single): Get Tokens String Distance
delete
Scrape(General|Single): Content Report
delete
Scrape(General|Single|Node): Node Recon (CSV)
delete
Utility (File): D3 JSON to CSV
delete
Scrape(coj|Multi): Crawl Property SERPs
delete
Scrape(coj|Single): sidenote
delete
Scrape(General|Single): Recursive Node Crawler
delete
Utility (Event,Window): Scroll to Root ScrollHeight
delete
Scrape(Indeed|Multi): Crawl Job SERPs
delete
Scrape(Thingiverse API): Get Things By Id
delete
Scrape(Thingiverse): Crawl Things by Category
delete
Scrape(Thingiverse API): Get Thing Batches by Id via DnD
delete
Scrape(YouTube|Single): Get Video Playlist
delete
Utility (Data): Join Thing Metrics & Meta
delete
Utility(Data): Get Nested Array Lengths
delete
Utility (Twitter): Hide Followed Profiles
delete
Utility (Time): YYYY-MM-DD HH:MM:SS
delete
Scrape(Thangs|Multi): Crawl 3D Model SERPs
delete
Scrape(PrusaPrints,Multi): Get Prints
delete
Scrape(Reddit,Single): Get Posts
delete
Userscript(Youtube): Scrape Channel Videos
delete
Userscript(Youtube): Tab Manager
delete
Scrape(Sunbiz|Multi): Biz Details
delete
Utility(Data):DnD View Types
delete
Scrape(General|Single|Node): Select Nodes by Attr
delete
Scrape(Aliexpress|Multi) Get Products via API
delete
Utility(Text): Strip Web Page CSS, Script, Events, Media
delete
Scrape(Youtube|Single) Get Subs
delete
Scrape(General|Single): SelectAll ReduceByProp
delete
Scrape(General|Single): SelectAll ReduceMultiProps
delete
Scrape(General|Multi): Tiny Link Crawler + Delay & Node Reports
delete
Scrape(P5|Multi): Get Examples
delete
Scrape(LinkedIn|Single): Find New Connections
delete
UserScript(linkedIn|Single) Get Jobs
delete
Utility (Time): Date From Days Ago
delete
Utility(General|Single) Keep Scrolling
delete
Scrape(YouTube) Videos From Search
delete
Utility(General|Single): getOffset
delete
Utility(Event,Form): Get Data On Form Input
delete
Utility(Event,Element): ResizeObserver
delete
COCO-SSD Object Categories
delete
Scrape(Wikipedia|Multi): What Links Here?
delete
Scrape(DDuckGo|Single): Download Images
delete
Scrape(General|Single|NLP): Compromise nGram
delete
Scrape(Amazon|Multi): Crawl Product SERPs
Edit Snippet
var collection = []; var searchKeywords = document.querySelector(`input[name="field-keywords"]`); searchKeywords = searchKeywords ? searchKeywords.value ? searchKeywords.value : "" : ""; (async () => { var dt = new Date(); dt = new Date(dt.getTime() - (dt.getTimezoneOffset()*60*1000)).toISOString().split(".")[0].split("T")[0].trim(); const cleanText = (s) => { return s ? s.replace(/[\?\!\(\)'"\{\}\;:]+/gim,"") //clean up multiple puncuation marks .replace(/[\s\t]+/gim," ") //remove extra white space, replacing tabs with space .replace(/[\n\r]+/gim,"\n").trim() : ""; } const getNodeText = (el,clean=true) => { var text = el.innerText ? el.innerText : el.value ? el.value : el.content ? el.content : ""; return clean ? cleanText(text) : text; } var options = { maxDelayMS:300, scripts: [ "https://unpkg.com/compromise", "https://unpkg.com/rita@2.0.14/dist/rita-web.js", "https://d3js.org/d3.v5.min.js" ] } function getRandomInt(max) { return Math.random() * max; } var delay = (ms) => new Promise((resolves) => { console.log(`Pausing for ${ms/1000} seconds`) setTimeout(resolves, ms) }) var ids = new Set(); var i = 0; var doc = document; const downloadCSV = (data, fileName) => { var csvDownload = d3.csvFormat(data); var exportFilename = fileName+".csv"; var csvData = new Blob([csvDownload], { type: 'text/csv;charset=utf-8;' }); var link = document.createElement('a'); link.href = window.URL.createObjectURL(csvData); link.setAttribute('download', exportFilename); document.body.appendChild(link); link.click(); document.body.removeChild(link); }; async function getDoc(url) { try{ var response = await fetch(url); var text = await response.text(); var parser = await new DOMParser(); var doc = await parser.parseFromString(text, "text/html"); processDoc(doc); }catch(e){ console.log({getDocError:e}) } } async function nextPage(doc) { try{ var next = doc.querySelector("li.a-last > a:nth-child(1)") || false; return await next ? getDoc(next.href) : false; }catch(e){ console.log({nextPage:e}) } } async function processDoc(doc) { i++; await delay(getRandomInt(options.maxDelayMS)); console.log(`processing doc #${i} titled: ${doc.title}`); try{ var els = doc.querySelectorAll(".s-result-item:not(.AdHolder)"); for await (let el of els) { var obj = {}; try { var title = el.querySelector("h2"); obj.productLink = title.querySelector("a").href.split("/ref=")[0] + "/"; obj.id = obj.productLink.split("//")[1].split("/").slice(-2)[0]; if (!ids.has(obj.id)) { var price = el.querySelector(".a-offscreen"); obj.price = price ? +(price.innerText.replace(/[$,]/gim, "")) : ""; if(obj.price > 0){ ids.add(obj.id); var img = el.querySelector(".s-image"); obj.title = cleanText(img.getAttribute("alt")); var brandTags = obj.title.toLowerCase() .match(/suntop|HZST3D|3D Warhorse|DURAMIC|solutech|adroitone intelligently powerful|premium compatibles inc.|honeycomb filaments|silhouette america|filamentdirect.com|daewon industries|big kid creations|balance world inc|scribbler 3d pen|hqa 3d filaments|filament outlet|onlinefilament|nar cartridges|flashforge usa|black magic 3d|aurora-jessica|zen toolworks|sewell direct|matterhackers|fenner drives|creatron inc.|bruce & shark|stronghero3d|spider maker|push plastic|primacreator|paramount 3d|mg chemicals|fidgetfidget|eolas prints|blackmagic3d|amazonbasics|aio robotics|xyzprinting|reprap guru|proto-pasta|micro-swiss|ld products|kabeldirekt|hjc-jessica|go-3d print|gizmo dorks|fillamentum|dishykooker|cabina home|bumat elite|bigtreetech|areyourshop|win-tinten|trigorilla|techtongda|taulman 3d|supplies3d|speedyinks|shamrock58|sf sunfuny|radioshack|quantum 3d|orientools|newcomdigi|new matter|micro-mark|formfutura|flashforge|easy light|deltamaker|balitensen|artudatech|yoyi yoyi|unitystar|ultimaker|taulman3d|superfila|snapmaker|skywalker|shengtian|sainsmart|reprapper|rainbowme|prusament|polymaker|paramount|novamaker|monoprice|mallofusa|jg aurora|hobbyking|goobetter|gazechimp|festnight|festnight|ex-energy|colorfabb|bephamart|amplelife|zhengpin|xinkebot|wyzworks|walfront|verbatim|velleman|twotrees|tiertime|supply3d|superele|sunhokey|smartbuy|rosenice|repraper|pxmalion|purement|printoxe|polaroid|overture|overture|ninjatek|msunlord|mearteve|makerbot|lee fung|leapfrog|jgaurora|jg maker|jamiewin|isanmate|hatchbox|geeetech|fudeecon|foxsmart|flurries|filacube|enotepad|enomaker|elephant|docooler|craftbot|bewinner|bamtack!|aspectek|anycubic|zyltech|zortrax|z-synka|wiiboox|up fila|ultnice|toyvian|tecboss|taulman|tactink|segaden|robo 3d|repkord|raise3d|priline|phoneix|perseus|oo-kuma|makerpi|lulzbot|lulufun|juboury|imlucky|i-chony|hello3d|gudteks|generic|fugetek|flameer|filabot|electop|dfrobot|delaman|comgrow|zjchao|zjchao|zi-rui|zeepro|vortex|uxcell|ukcoco|ueetek|tuscom|ttyt3d|tronxy|toogoo|toogoo|tianse|taylor|stobok|sooway|sindoh|shzons|shaxon|semoic|redrex|recrec|plasil|nulaxy|noulei|mynt3d|myfeel|mosunx|mika3d|meterk|lrifue|longer|keweis|kesoto|inland|iglide|ieagle|gbsell|fysetc|fasmov|eryone|dremel|dikale|cctree|bsnovt|blusea|bczamd|ashata|amolen|akozon|aikeec|aibecy|afinia|yousu|yling|xvico|xmaha|wesen|weiye|tunez|sunlu|soyan|shina|modar|mkoem|melca|manli|leorx|kodak|homyl|gimax|daier|bumat|autek|atk3d|andux|amz3d|amx3d|akoak|ziro|yoyi|tzou|rich|ogry|ifun|ic3d|gp3d|fosa|esun|eray|ecyc|eagy|do3d|cc3d|bibo|anet|twb|m3d|k&c|e3d|cel|c&e|aim|mq|es/gim); brandTags = brandTags ? new Set([...brandTags]) : []; obj.brandTags = brandTags ? [...brandTags].join("|").toLowerCase() : ""; var typeTags = obj.title.match(/wood|pla|abs|tpu|petg|pett|pet|pva|hips|tpe|glow|amphora|uv|resin|dark|sandstone|conductive|silk|fiber|clean|carbon|nylon|magnetic|food[- ]*safe|3d *pen|re[- ]?fill|sample|(multi(ple))?[\d- ]?color[eds]*|flex(ible)?|[\d ]*pack( |s)/gim); typeTags = typeTags ? new Set([...typeTags].map(s => s.trim())) : []; obj.typeTags = typeTags ? [...typeTags].join("|").toLowerCase() : ""; var colorTags = obj.title.match(/white|black|gray|blue|brown|clear|green|orange|pink|purple|red|silver|yellow|beige|transparent|translucent|glow|dark|light|marble|rainbow|silk|gold|silver|bronze|copper|metallic|shiny|colors|wood/gim) colorTags = colorTags ? new Set([...colorTags]) : []; obj.colorTags = colorTags ? [...colorTags].join("|").toLowerCase() : ""; var units = obj.title.match(/(\d+[a-z]* [x×] )?(((?!3D)(\+\/- )?[\d\.]+[\s\.]*(Linear)?(kg|mm|lbs|lb|kilo|pound|millimeter|grams|g|feet|ft|inch|cm|centimeter)((×|x))?)+|(?!3D)(1.75|2.85|3(.00)?))/gim); units = units ? new Set([...units]) : []; obj.units = units ? [...units].join("|").toLowerCase() : "" var notEnoughInfo = obj.typeTags.length == 0 && obj.units.length == 0; if(!notEnoughInfo){ var has3dPen = obj.typeTags.indexOf("|3d pen") > -1; var hasFeet = obj.units.indexOf("ft") > -1 || obj.units.indexOf("feet") > -1; var inGrams = obj.units.match(/(?!k)\d+g/gim); obj.hasSpool = obj.title.toLowerCase().indexOf("spool") > -1; if(hasFeet) { //console.log("Do you even metric, bro?",obj) obj.typeTags.replace("|3d pen","|3d pen refill") obj.isSpool = obj.hasSpool ? true : false; }else{ obj.typeTags = obj.typeTags.replace("|3d pen","") obj.isSpool = true; } obj.thumbnail = img.src; var stars = el.querySelector('*[aria-label*="out of"]'); if(stars){ try{ var reviews = el.querySelector("[href*='#customerReviews']"); obj.reviews = reviews ? +(reviews.innerText.replace(/[,]/gim,"")) : 0; obj.stars = stars ? +(getNodeText(stars).split(" ")[0]) : ""; }catch(e){ console.log({e}); obj.reviews = 0; obj.stars = 0; } } var tag = el.querySelector(".a-size-base.a-link-normal.a-text-bold"); obj.tag = tag ? getNodeText(tag) : ""; obj.searchKeywords = searchKeywords; collection.push(obj); }else{ console.log("Skipping this garbage", obj) } } } } catch (err) { console.log({ err }) } } }catch(e){ console.log({ err }) } if (doc.querySelector("li.a-last > a:nth-child(1)")) { var nextDoc = await nextPage(doc); nextDoc ? processDoc(nextDoc) : null; } else { downloadCSV(collection,`amazon_productSERPs_${searchKeywords.split(" ").join("-")}_${dt}`) console.log({ collection, ids:[...ids] }) } return "doc processed"; } const run = (options) => { processDoc(doc); } const loadScripts = (scripts) => { var scriptCountdown = scripts.length; var loadScript = (url) => { var scriptsLoaded = () => { console.log("scriptsLoaded",{scriptCountdown}); scriptCountdown == 0 ? run(options) : null; return true; } console.log("loadScript",{url}) var imported = document.createElement('script'); imported.src = url; imported.addEventListener("load", () => { scriptCountdown--; scriptsLoaded(); }); document.head.appendChild(imported); } scripts.forEach(loadScript) } loadScripts(options.scripts); })()