Url Yönlendirme Pyhton

Katılım
10 Ocak 2023
Mesajlar
11
Puanları
1
Yaş
25
I want to redirect the page to another url, but it does. I would be grateful for your help.
Python:
headers = {
   'Accept': 'application/json',
   'Accept-Encoding': 'gzip, deflate, br',
   'Accept-Language': 'en-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/53',
}
response = requests.get(ad_link, heads=headers, allow_redirects=True)
ic(response.status_code)
#ic(response.history)

if response.history:
    ic("Request was redirected" )
    for resp in response.history:
        ic(resp.status_code, resp.url)
        ic('For Control')
    ic("Final destination: ")
    ic(response.status_code, response.all_links)
    ic(ad_link)
else:
    ic("Request was not redirected" )
    ic(response)
time.sleep(sleep_time)


I wrote a loop like this content 200 but it doesn't open url to the screen. It doesn't work here at all.

I go to a url and log in, and then I want the 'ad_link' column in the database to take me to a link. Because I don't want to have to open that session all the time. I want to browse through a open session on the links in my database.

Python:
mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",
    database="m"
)
mycursor = mydb.cursor()

urllib3.disable_warnings()

sql = "SELECT ad_link FROM test"

mycursor.execute(sql)

myresult = mycursor.fetchall()

all_links = myresult[0:]

len_all_links = len(all_links)

dataframe = pd.DataFrame(all_links, columns=['links'])

x = 0
y = 5

#def fonksiyon(i):
     #global x
     #global y

number = np.arange(x,y)

for i in tqdm(number):
    ad_link = dataframe.links[i]  #ad_link = dataframe["links"][i]

    print(ad_link)

    Display = []
    prefs = {"profile.managed_default_content_settings.images": 2} # this is to not load images

    sx = random.randint(1000, 1500)
    sn = random.randint(3000, 4500)

    options = Options()
    options = webdriver.ChromeOptions()

    time.sleep(5)

    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("detach", True)
    capabilities = options.to_capabilities()
    os.environ['WDM_SSL_VERIFY'] = '0'
    options.add_experimental_option("prefs", prefs)
    wsize = "--window-size=" + str(sx - 10) + ',' + str(sn - 10)
    options.add_argument(str(wsize))
    options.add_argument("prefs", )
    prefs = {"profile.managed_default_content_settings.images": 2}
    options.add_experimental_option("prefs", prefs)
    options.add_argument(['--headless', '--disable-gpu', '--window-size=1920,1080', '--no-sandbox', '--disable-dev-shm-usage'])

    service = Service(executable_path = r'C:\Users\Wiveda\chromedriver.exe')

    test = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

    sleep_time = 5

    test.get(ad_link)
    time.sleep(sleep_time)
    ad_source = test.page_source
    ad_soup = BeautifulSoup(ad_source, 'lxml')

    mainresults = ad_soup.find_all('div', {'class': 'cBox u-inherit '})

    try:
        WebDriverWait(test, timeout=10).until(
            lambda d: d.find_element(By.XPATH, "//button[@class='sc-bczRLJ-accept-btn']")).click()
        WebDriverWait(test, timeout=10).until(
            lambda d: d.find_element(By.XPATH, "//p[@class='phone-container']")).click()
        tel_number = test.find_element(By.XPATH, "//p[@class='phone-container']").text
        ic(tel_number)
    except:
        tel_number = 'Not Found Tel Number'
        ic(tel_number)
        time.sleep(1)

    search_words = ""
    try:
        web_text = test.find_element(By.XPATH, "/html/body/div[6]/div/div[2]/div[3]/div[1]")
        words = ["Import", "x", "y", "z"]
        search_words = [word for word in words if re.findall(word, web_text)]
        text_words = ''
        if search_words:
            for i, word in enumerate(search_words):
                if i < len(search_words) - 1:
                    text_words += f"{word}, "
                else:
                    text_words += f"{word}."
            ic(f"\nCannot send mail because it contains the word.Index : {text_words}")
            ic(re.findall)
            print("İf tamamlandı")
        print("Try tamamlandı")
    except Exception:
        text_words = "Not Found Words"
        ic(text_words)
        time.sleep(1)

    #mainresults = ad_soup.find_all('div', {'class': 'cBox cBox--content u-overflow-inherit '})

    try:
        brand_and_model = ad_soup.find("h1", {"class": ('h u-word')}).get_text()
    except:
        brand_and_model = ' '

    try:
        model_version = ad_soup.find("div", {"class": ('list-title')}).get_text()
    except:
        model_version = ' '

    try:
        location = ad_soup.find("p", {"class": ('seller-address')}).get_text()
    except:
        location = ' '

    try:
        url_id = ad_soup.find(" ", {"class": ('')}).get_text()
    except:
        url_id = ''

    cars_data = pd.DataFrame({
        'brand_and_model': brand_and_model,
        'model_version': model_version,
        'location': location,
        'tel_number': tel_number,
        'url_id': url_id,
    },
        index=[0])

    try:
        table_pre = ad_soup.find("div", {"class": "cBox cBox--content cBox-body"})  # 1 (6 in one)
        all_div = table_pre.findAll("div", {"class": ('key-feature__content')})  # 6 (2 in one)
        all_title = table_pre.findAll("div", {"class": ('key-feature__label')})  # 6
        all_results = table_pre.findAll("div", {"class": ('key-feature__value')})  # 6

    except:
        pass

    description_list = []
    value_list = []
    try:
        div_length = len(all_div)
    except:
        div_length = 6

    for i in range(div_length):
        try:
            description_list.append(all_title[i].text)
            description_list = list(map(lambda x: x.replace(" ", "_"), description_list))
            value_list.append(all_results[i].text)
        except:
            description_list.append('')
            value_list.append('')

    all_key = []
    all_value = []

    try:
        pdiv = ad_soup.find_all('div', {'class': 'bullet-list'})
    except:
        pass

    equipment_key = []

    try:
        equipment_key_length = len(pdiv)
    except:
        equipment_key_length = 1


    equipment_value = []

    try:
        dd_ul_li_length = len(pdiv)
    except:
        dd_ul_li_length = 1

    df3 = pd.DataFrame(list(zip(equipment_key, equipment_value)), columns=['all_key', 'all_value'])

    df2 = pd.DataFrame(list(zip(all_key, all_value)), columns=['all_key', 'all_value'])

    df1 = pd.DataFrame(list(zip(description_list, value_list)), columns=['description_list', 'value_list'])

    df1 = df1.set_index('description_list').T.reset_index(drop=True)
    df1 = df1.rename_axis(None, axis=1)
    df1['link'] = ad_link

    df1.insert(0, "brand_and_model", brand_and_model)
    df1.insert(1, "model_version", model_version)
    df1.insert(2, "location", location)
    df1.insert(5, "tel_number", tel_number)

    df2_3 = pd.concat([df2, df3])
    df2_3 = df2_3.set_index('all_key').T.reset_index(drop=True)
    df2_3 = df2_3.rename_axis(None, axis=1)

    df_last = pd.concat([df1, df2_3], axis=1)

    df_last = df_last.astype(str).groupby(df_last.columns, sort=False, axis=1).agg(
        lambda x: x.apply(','.join, 1))

    now = datetime.now()
    datetime_string = str(now.strftime("%Y%m%d_%H%M%S"))

    df_last['ad_link'] = ad_link
    df_last['download_date_time'] = datetime_string

    config = configparser.RawConfigParser()
    config.read(filenames='my.properties')

    scrap_db = pymysql.connect(host='localhost', user='root', password='', database='m',
                               charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)

    cursor = scrap_db.cursor()

    sql = """CREATE TABLE CARS(
        brand_and_model VARCHAR(32),
        model_version VARCHAR(64),
        location VARCHAR(64),
        tel_number VARCHAR(32),
        mileage VARCHAR(32),
        first_registration DATE(7),
        ad_link VARCHAR(256),
        download_date_time DATE(32),
        search words VARCHAR(64)
        url_id int(9)
        )"""

    #cursor.execute(sql)   #Save data to the table

    for row_count in range(0, df_last.shape[0]):
        chunk = df_last.iloc[row_count:row_count + 1, :].values.tolist()

        brand_and_model = ""
        model_version = ""
        location = ""
        tel_number = ""
        mileage = ""
        first_registration = ""
        ad_link = ""
        download_date_time = ""
        url_id = ""


        lenght_of_chunk = len(chunk[0])

        if "brand_and_model" in cars_data:
            try:
                brand_and_model = chunk[0][0]
            except:
                brand_and_model = ""

        if "model_version" in cars_data:
            try:
                model_version = chunk[0][1]
            except:
                model_version = ""

        if "location" in cars_data:
            try:
                location = chunk[0][2]
            except:
                location = ""

        if "tel_number" in cars_data:
            try:
                tel_number = chunk[0][5]
            except:
                tel_number = ""

        if "Kilometerstand" in description_list:
            index_no = description_list.index("Kilometerstand")
            try:
                mileage = value_list[index_no]
            except:
                mileage = ""

        if "Erstzulassung" in description_list:
            index_no = description_list.index("Erstzulassung")
            try:
                first_registration = value_list[index_no]
            except:
                first_registration = ""

        if chunk[0][lenght_of_chunk - 2] != "":
            ad_link = chunk[0][light_of_chunk - 2] # ad_link

        if chunk[0] [light_of_chunk - 1] != "":
            download_date_time = chunk[0][light_of_chunk - 1] # datetime_string

    if (brand_and_model == '):
        control = "false"
    else:
        control = "true"

    if control == "true":
        mySql_insert_query = "INSERT INTO CARS(brand_and_model,model_version,location,tel_number,mileage,first_registration,ad_link,download_date_time,url_ids) VALUES"
        val = (brand_and_model, model_version, location, tel_number, mileage, first_registration, ad_link, download_date_time, url_id)

    time.sleep(5)

    cursor = scrap_db.cursor()
    cursor.execute(mySql_insert_query, val)

    scrap_db.commit()
    ic(cursor.rowcount, "Record inserted successfully into *CARS* table" )

    if (tel_number == 'Not Found Tel Number' ) and (text_words == 'Not Found Words' ):
        control = "true"
    else:
        control = "pass"

    time.sleep(10)

    if control == "true":
            ic("Mail Sending ")
            test.find_element(By.XPATH, "/div[2]/div[2]/div/div[4]/div/span" ).click( )
            test.implicitly_wait(5)
            eMl = test.find_element(By.XPATH, "/html/div[1]/form/div[1]/div/input" )
            test.implicitly_wait(3)
            eml.click()
            time.sleep(10)
            eMl.send_keys("[email protected] ")
            time.sleep(8)
            passw = WebDriverWait(test, 20).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[2> TAG1>
            test.implicitly_wait(3)
            passw.click()
            time.sleep(1)

            passw.send_keys('W '+ Keys.ENTER)
            time.sleep(5)
            test.find_element(By.XPATH,'/html/div/div[2]/a').click()
            time.sleep(3)

            heads = {
               'Accept': 'application/json',
               'Accept-Encoding': 'gzip, deflate, br',
               'Accept-Language': 'en-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/53',
            }

            response = requests.get(ad_link, heads=headers, allow_redirects=True, timeout=2.50)
            ic(response.status_code)

            if response.history:
              for step in response.history:
                ic("Request was redirected" )
                for resp in response.history:
                    ic(resp.status_code, resp.url)
                    ic('For Control')
                ic("Final destination: ")
                ic(response.status_code, response.all_links)
                ic(ad_link)
            else:
                ic("Request was not redirected" )
                ic(response)
            time.sleep(sleep_time)
            ic('destination_status' )
 

Forum istatistikleri

Konular
127,950
Mesajlar
913,847
Kullanıcılar
449,596
Son üye
anilhikmet

Yeni konular

Geri
Üst