🔥🔥🔥🔥🔥🔥 JD_SCRAP.py || py .\jd_scrap.py 🔥🔥🔥🔥🔥🔥🔥🔥

January 23, 2021

🔥🔥🔥🔥🔥🔥 JD_SCRAP.py || py .\jd_scrap.py 🔥🔥🔥🔥🔥🔥🔥🔥

To run :- < py .\jd_scrap.py >

First

from bs4 import BeautifulSoup
import urllib
import urllib.request
import requests
import csv


def innerHTML(element):
    return element.decode_contents(formatter="html")


def get_name(body):
    return body.find('span', {'class': 'jcn'}).a.string


def which_digit(html):
    mappingDict = {'icon-ji': 9,
                   'icon-dc': '+',
                   'icon-fe': '(',
                   'icon-hg': ')',
                   'icon-ba': '-',
                   'icon-lk': 8,
                   'icon-nm': 7,
                   'icon-po': 6,
                   'icon-rq': 5,
                   'icon-ts': 4,
                   'icon-vu': 3,
                   'icon-wx': 2,
                   'icon-yz': 1,
                   'icon-acb': 0,
                   }
    return mappingDict.get(html, '')


def get_phone_number(body):
    i = 0
    phoneNo = "No Number!"
    try:

        for item in body.find('p', {'class': 'contact-info'}):
            i += 1
            if(i == 2):
                phoneNo = ''
                try:
                    for element in item.find_all(class_=True):
                        classes = []
                        classes.extend(element["class"])
                        phoneNo += str((which_digit(classes[1])))
                except:
                    pass
    except:
        pass
    body = body['data-href']
    soup = BeautifulSoup(body, 'html.parser')
    for a in soup.find_all('a', {"id": "whatsapptriggeer"}):
        # print (a)
        phoneNo = str(a['href'][-10:])

    return phoneNo


def get_rating(body):
    rating = 0.0
    text = body.find('span', {'class': 'star_m'})
    if text is not None:
        for item in text:
            rating += float(item['class'][0][1:])/10

    return rating


def get_rating_count(body):
    text = body.find('span', {'class': 'rt_count'}).string

    # Get only digits
    rating_count = ''.join(i for i in text if i.isdigit())
    return rating_count


def get_address(body):
    return body.find('span', {'class': 'mrehover'}).text.strip()


def get_location(body):
    text = body.find('a', {'class': 'rsmap'})
    if text == None:
        return
    text_list = text['onclick'].split(",")

    latitutde = text_list[3].strip().replace("'", "")
    longitude = text_list[4].strip().replace("'", "")

    return latitutde + ", " + longitude


page_number = 1
service_count = 1


fields = ['Name', 'Phone', 'Rating', 'Rating Count', 'Address', 'Location']
# -----> Type the name of your excel or csv file.
out_file = open('biometric_indore.csv', 'w')
csvwriter = csv.DictWriter(out_file, delimiter=',', fieldnames=fields)

# Write fields first
#csvwriter.writerow(dict((fn,fn) for fn in fields))
while True:
    # Check if reached end of result
    if page_number > 50:
        break

    url = "https://www.justdial.com/Indore/Biometric-Attendance-Recording-System-Dealers/nct-10932847/page-%s" % (
        page_number)
    print(url)
    req = urllib.request.Request(
        url, headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"})
    page = urllib.request.urlopen(req)
    # page=urllib2.urlopen(url)

    soup = BeautifulSoup(page.read(), "html.parser")
    services = soup.find_all('li', {'class': 'cntanr'})

    # Iterate through the 10 results in the page
    for service_html in services:

        # Parse HTML to fetch data
        dict_service = {}
        name = get_name(service_html)
        print(name)
        phone = get_phone_number(service_html)
        rating = get_rating(service_html)
        count = get_rating_count(service_html)
        address = get_address(service_html)
        location = get_location(service_html)
        if name != None:
            dict_service['Name'] = name
        if phone != None:
            print('getting phone number')
            dict_service['Phone'] = phone
        if rating != None:
            dict_service['Rating'] = rating
        if count != None:
            dict_service['Rating Count'] = count
        if address != None:
            dict_service['Address'] = address
        if location != None:
            dict_service['Address'] = location

        # Write row to CSV
        csvwriter.writerow(dict_service)

        print("#" + str(service_count) + " ", dict_service)
        service_count += 1

    page_number += 1

out_file.close()

again

from bs4 import BeautifulSoup
import urllib
import urllib.request
import requests
import csv


def innerHTML(element):
    return element.decode_contents(formatter="html")


def get_name(body):
    return body.find('span', {'class': 'jcn'}).a.string


def which_digit(html):
    mappingDict = {'icon-ji': 9,
                   'icon-dc': '+',
                   'icon-fe': '(',
                   'icon-hg': ')',
                   'icon-ba': '-',
                   'icon-lk': 8,
                   'icon-nm': 7,
                   'icon-po': 6,
                   'icon-rq': 5,
                   'icon-ts': 4,
                   'icon-vu': 3,
                   'icon-wx': 2,
                   'icon-yz': 1,
                   'icon-acb': 0,
                   }
    return mappingDict.get(html, '')


def get_phone_number(body):
    i = 0
    phoneNo = "No Number!"
    try:

        for item in body.find('p', {'class': 'contact-info'}):
            i += 1
            if(i == 2):
                phoneNo = ''
                try:
                    for element in item.find_all(class_=True):
                        classes = []
                        classes.extend(element["class"])
                        phoneNo += str((which_digit(classes[1])))
                except:
                    pass
    except:
        pass
    body = body['data-href']
    soup = BeautifulSoup(body, 'html.parser')
    for a in soup.find_all('a', {"id": "whatsapptriggeer"}):
        # print (a)
        phoneNo = str(a['href'][-10:])

    return phoneNo


def get_rating(body):
    rating = 0.0
    text = body.find('span', {'class': 'star_m'})
    if text is not None:
        for item in text:
            rating += float(item['class'][0][1:])/10

    return rating


def get_rating_count(body):
    text = body.find('span', {'class': 'rt_count'}).string

    # Get only digits
    rating_count = ''.join(i for i in text if i.isdigit())
    return rating_count


def get_address(body):
    return body.find('span', {'class': 'mrehover'}).text.strip()


def get_location(body):
    text = body.find('a', {'class': 'rsmap'})
    if text == None:
        return
    text_list = text['onclick'].split(",")

    latitutde = text_list[3].strip().replace("'", "")
    longitude = text_list[4].strip().replace("'", "")

    return latitutde + ", " + longitude


page_number = 1
service_count = 1


fields = ['Name', 'Phone', 'Rating', 'Rating Count', 'Address', 'Location']
# -----> Type the name of your excel or csv file.
out_file = open('biometric_indore.csv', 'w')
csvwriter = csv.DictWriter(out_file, delimiter=',', fieldnames=fields)

# Write fields first
#csvwriter.writerow(dict((fn,fn) for fn in fields))
while True:
    # Check if reached end of result
    if page_number > 50:
        break
# enter the url by removing the number from page-2 to page-
    url = "https://www.justdial.com/Indore/Biometric-Attendance-Recording-System-Dealers/nct-10932847/page-%s" % (
        page_number)
    print(url)
    req = urllib.request.Request(
        url, headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"})
    page = urllib.request.urlopen(req)
    # page=urllib2.urlopen(url)

    soup = BeautifulSoup(page.read(), "html.parser")
    services = soup.find_all('li', {'class': 'cntanr'})

    # Iterate through the 10 results in the page
    for service_html in services:

        # Parse HTML to fetch data
        dict_service = {}
        name = get_name(service_html)
        print(name)
        phone = get_phone_number(service_html)
        rating = get_rating(service_html)
        count = get_rating_count(service_html)
        address = get_address(service_html)
        location = get_location(service_html)
        if name != None:
            dict_service['Name'] = name
        if phone != None:
            print('getting phone number')
            dict_service['Phone'] = phone
        if rating != None:
            dict_service['Rating'] = rating
        if count != None:
            dict_service['Rating Count'] = count
        if address != None:
            dict_service['Address'] = address
        if location != None:
            dict_service['Address'] = location

        # Write row to CSV
        csvwriter.writerow(dict_service)

        print("#" + str(service_count) + " ", dict_service)
        service_count += 1

    page_number += 1

out_file.close()

Search This Blog

Personal Reference

🔥🔥🔥🔥🔥🔥 JD_SCRAP.py || py .\jd_scrap.py 🔥🔥🔥🔥🔥🔥🔥🔥

Comments

Post a Comment

Popular Posts