lalo2salamanca
Üye
- Katılım
- 10 Ocak 2023
- Mesajlar
- 11
- Puanları
- 1
- Yaş
- 25
I want to redirect the page to another url, but it does. I would be grateful for your help.
I wrote a loop like this content 200 but it doesn't open url to the screen. It doesn't work here at all.
I go to a url and log in, and then I want the 'ad_link' column in the database to take me to a link. Because I don't want to have to open that session all the time. I want to browse through a open session on the links in my database.
Python:
headers = {
'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/53',
}
response = requests.get(ad_link, heads=headers, allow_redirects=True)
ic(response.status_code)
#ic(response.history)
if response.history:
ic("Request was redirected" )
for resp in response.history:
ic(resp.status_code, resp.url)
ic('For Control')
ic("Final destination: ")
ic(response.status_code, response.all_links)
ic(ad_link)
else:
ic("Request was not redirected" )
ic(response)
time.sleep(sleep_time)
I wrote a loop like this content 200 but it doesn't open url to the screen. It doesn't work here at all.
I go to a url and log in, and then I want the 'ad_link' column in the database to take me to a link. Because I don't want to have to open that session all the time. I want to browse through a open session on the links in my database.
Python:
mydb = mysql.connector.connect(
host="localhost",
user="root",
password="",
database="m"
)
mycursor = mydb.cursor()
urllib3.disable_warnings()
sql = "SELECT ad_link FROM test"
mycursor.execute(sql)
myresult = mycursor.fetchall()
all_links = myresult[0:]
len_all_links = len(all_links)
dataframe = pd.DataFrame(all_links, columns=['links'])
x = 0
y = 5
#def fonksiyon(i):
#global x
#global y
number = np.arange(x,y)
for i in tqdm(number):
ad_link = dataframe.links[i] #ad_link = dataframe["links"][i]
print(ad_link)
Display = []
prefs = {"profile.managed_default_content_settings.images": 2} # this is to not load images
sx = random.randint(1000, 1500)
sn = random.randint(3000, 4500)
options = Options()
options = webdriver.ChromeOptions()
time.sleep(5)
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_experimental_option("detach", True)
capabilities = options.to_capabilities()
os.environ['WDM_SSL_VERIFY'] = '0'
options.add_experimental_option("prefs", prefs)
wsize = "--window-size=" + str(sx - 10) + ',' + str(sn - 10)
options.add_argument(str(wsize))
options.add_argument("prefs", )
prefs = {"profile.managed_default_content_settings.images": 2}
options.add_experimental_option("prefs", prefs)
options.add_argument(['--headless', '--disable-gpu', '--window-size=1920,1080', '--no-sandbox', '--disable-dev-shm-usage'])
service = Service(executable_path = r'C:\Users\Wiveda\chromedriver.exe')
test = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
sleep_time = 5
test.get(ad_link)
time.sleep(sleep_time)
ad_source = test.page_source
ad_soup = BeautifulSoup(ad_source, 'lxml')
mainresults = ad_soup.find_all('div', {'class': 'cBox u-inherit '})
try:
WebDriverWait(test, timeout=10).until(
lambda d: d.find_element(By.XPATH, "//button[@class='sc-bczRLJ-accept-btn']")).click()
WebDriverWait(test, timeout=10).until(
lambda d: d.find_element(By.XPATH, "//p[@class='phone-container']")).click()
tel_number = test.find_element(By.XPATH, "//p[@class='phone-container']").text
ic(tel_number)
except:
tel_number = 'Not Found Tel Number'
ic(tel_number)
time.sleep(1)
search_words = ""
try:
web_text = test.find_element(By.XPATH, "/html/body/div[6]/div/div[2]/div[3]/div[1]")
words = ["Import", "x", "y", "z"]
search_words = [word for word in words if re.findall(word, web_text)]
text_words = ''
if search_words:
for i, word in enumerate(search_words):
if i < len(search_words) - 1:
text_words += f"{word}, "
else:
text_words += f"{word}."
ic(f"\nCannot send mail because it contains the word.Index : {text_words}")
ic(re.findall)
print("İf tamamlandı")
print("Try tamamlandı")
except Exception:
text_words = "Not Found Words"
ic(text_words)
time.sleep(1)
#mainresults = ad_soup.find_all('div', {'class': 'cBox cBox--content u-overflow-inherit '})
try:
brand_and_model = ad_soup.find("h1", {"class": ('h u-word')}).get_text()
except:
brand_and_model = ' '
try:
model_version = ad_soup.find("div", {"class": ('list-title')}).get_text()
except:
model_version = ' '
try:
location = ad_soup.find("p", {"class": ('seller-address')}).get_text()
except:
location = ' '
try:
url_id = ad_soup.find(" ", {"class": ('')}).get_text()
except:
url_id = ''
cars_data = pd.DataFrame({
'brand_and_model': brand_and_model,
'model_version': model_version,
'location': location,
'tel_number': tel_number,
'url_id': url_id,
},
index=[0])
try:
table_pre = ad_soup.find("div", {"class": "cBox cBox--content cBox-body"}) # 1 (6 in one)
all_div = table_pre.findAll("div", {"class": ('key-feature__content')}) # 6 (2 in one)
all_title = table_pre.findAll("div", {"class": ('key-feature__label')}) # 6
all_results = table_pre.findAll("div", {"class": ('key-feature__value')}) # 6
except:
pass
description_list = []
value_list = []
try:
div_length = len(all_div)
except:
div_length = 6
for i in range(div_length):
try:
description_list.append(all_title[i].text)
description_list = list(map(lambda x: x.replace(" ", "_"), description_list))
value_list.append(all_results[i].text)
except:
description_list.append('')
value_list.append('')
all_key = []
all_value = []
try:
pdiv = ad_soup.find_all('div', {'class': 'bullet-list'})
except:
pass
equipment_key = []
try:
equipment_key_length = len(pdiv)
except:
equipment_key_length = 1
equipment_value = []
try:
dd_ul_li_length = len(pdiv)
except:
dd_ul_li_length = 1
df3 = pd.DataFrame(list(zip(equipment_key, equipment_value)), columns=['all_key', 'all_value'])
df2 = pd.DataFrame(list(zip(all_key, all_value)), columns=['all_key', 'all_value'])
df1 = pd.DataFrame(list(zip(description_list, value_list)), columns=['description_list', 'value_list'])
df1 = df1.set_index('description_list').T.reset_index(drop=True)
df1 = df1.rename_axis(None, axis=1)
df1['link'] = ad_link
df1.insert(0, "brand_and_model", brand_and_model)
df1.insert(1, "model_version", model_version)
df1.insert(2, "location", location)
df1.insert(5, "tel_number", tel_number)
df2_3 = pd.concat([df2, df3])
df2_3 = df2_3.set_index('all_key').T.reset_index(drop=True)
df2_3 = df2_3.rename_axis(None, axis=1)
df_last = pd.concat([df1, df2_3], axis=1)
df_last = df_last.astype(str).groupby(df_last.columns, sort=False, axis=1).agg(
lambda x: x.apply(','.join, 1))
now = datetime.now()
datetime_string = str(now.strftime("%Y%m%d_%H%M%S"))
df_last['ad_link'] = ad_link
df_last['download_date_time'] = datetime_string
config = configparser.RawConfigParser()
config.read(filenames='my.properties')
scrap_db = pymysql.connect(host='localhost', user='root', password='', database='m',
charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)
cursor = scrap_db.cursor()
sql = """CREATE TABLE CARS(
brand_and_model VARCHAR(32),
model_version VARCHAR(64),
location VARCHAR(64),
tel_number VARCHAR(32),
mileage VARCHAR(32),
first_registration DATE(7),
ad_link VARCHAR(256),
download_date_time DATE(32),
search words VARCHAR(64)
url_id int(9)
)"""
#cursor.execute(sql) #Save data to the table
for row_count in range(0, df_last.shape[0]):
chunk = df_last.iloc[row_count:row_count + 1, :].values.tolist()
brand_and_model = ""
model_version = ""
location = ""
tel_number = ""
mileage = ""
first_registration = ""
ad_link = ""
download_date_time = ""
url_id = ""
lenght_of_chunk = len(chunk[0])
if "brand_and_model" in cars_data:
try:
brand_and_model = chunk[0][0]
except:
brand_and_model = ""
if "model_version" in cars_data:
try:
model_version = chunk[0][1]
except:
model_version = ""
if "location" in cars_data:
try:
location = chunk[0][2]
except:
location = ""
if "tel_number" in cars_data:
try:
tel_number = chunk[0][5]
except:
tel_number = ""
if "Kilometerstand" in description_list:
index_no = description_list.index("Kilometerstand")
try:
mileage = value_list[index_no]
except:
mileage = ""
if "Erstzulassung" in description_list:
index_no = description_list.index("Erstzulassung")
try:
first_registration = value_list[index_no]
except:
first_registration = ""
if chunk[0][lenght_of_chunk - 2] != "":
ad_link = chunk[0][light_of_chunk - 2] # ad_link
if chunk[0] [light_of_chunk - 1] != "":
download_date_time = chunk[0][light_of_chunk - 1] # datetime_string
if (brand_and_model == '):
control = "false"
else:
control = "true"
if control == "true":
mySql_insert_query = "INSERT INTO CARS(brand_and_model,model_version,location,tel_number,mileage,first_registration,ad_link,download_date_time,url_ids) VALUES"
val = (brand_and_model, model_version, location, tel_number, mileage, first_registration, ad_link, download_date_time, url_id)
time.sleep(5)
cursor = scrap_db.cursor()
cursor.execute(mySql_insert_query, val)
scrap_db.commit()
ic(cursor.rowcount, "Record inserted successfully into *CARS* table" )
if (tel_number == 'Not Found Tel Number' ) and (text_words == 'Not Found Words' ):
control = "true"
else:
control = "pass"
time.sleep(10)
if control == "true":
ic("Mail Sending ")
test.find_element(By.XPATH, "/div[2]/div[2]/div/div[4]/div/span" ).click( )
test.implicitly_wait(5)
eMl = test.find_element(By.XPATH, "/html/div[1]/form/div[1]/div/input" )
test.implicitly_wait(3)
eml.click()
time.sleep(10)
eMl.send_keys("[email protected] ")
time.sleep(8)
passw = WebDriverWait(test, 20).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[2> TAG1>
test.implicitly_wait(3)
passw.click()
time.sleep(1)
passw.send_keys('W '+ Keys.ENTER)
time.sleep(5)
test.find_element(By.XPATH,'/html/div/div[2]/a').click()
time.sleep(3)
heads = {
'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/53',
}
response = requests.get(ad_link, heads=headers, allow_redirects=True, timeout=2.50)
ic(response.status_code)
if response.history:
for step in response.history:
ic("Request was redirected" )
for resp in response.history:
ic(resp.status_code, resp.url)
ic('For Control')
ic("Final destination: ")
ic(response.status_code, response.all_links)
ic(ad_link)
else:
ic("Request was not redirected" )
ic(response)
time.sleep(sleep_time)
ic('destination_status' )