Python代码-UUpython

python调用ffmpeg进行视频转码

2024-03-29uupython阅读(2425)

# -*- coding:utf-8 -*-
import wx
import subprocess
import os
import sys
import re
#解决额外资源打包问题
def get_path(relative_path):
    try:
        base_path = sys._MEIPASS
    except AttributeError:
        base_path = os.path.abspath(".")
 
    return os.path.normpath(os.path.join(base_path, relative_path))
 
ffmpeg_path =get_path("assets/ffmpeg.exe")#资源的路径
#弹窗消息
def show_message(message):
    """
    显示消息框。
 
    参数：
    message (str): 要显示的消息。
    """
    app = wx.App(False)
    dlg = wx.MessageDialog(None, message, "提示", wx.OK | wx.ICON_INFORMATION)
    dlg.ShowModal()
    dlg.Destroy()
    app.MainLoop()
#转mp4
def convert_to_mp4(input_video_path, output_directory):
    """
    将输入的视频文件转换为 MP4 格式。
 
    参数：
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    """
    video_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_mp4_path = os.path.join(output_directory, f"{video_name}.mp4")
    if os.path.exists(output_mp4_path):
        show_message(f"目录中存在同名文件 '{output_mp4_path}'，请删除或重命名后重试。")
        return
    result=subprocess.run([ffmpeg_path, "-i", input_video_path, "-c:v", "libx264", "-preset", "slow", "-crf", "23", "-c:a", "aac", "-b:a", "192k", "-strict", "experimental", output_mp4_path])
    if result.returncode == 0:
        show_message("转码成功！")
    else:
        show_message(f"转码失败！错误代码：{result.returncode}")
#转mp3
def convert_to_mp3(input_video_path, output_directory):
    """
    将输入的视频文件转换为 MP3 格式。
 
    参数：
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    """
    video_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_mp3_path = os.path.join(output_directory, f"{video_name}.mp3")
    if os.path.exists(output_mp3_path):
        show_message(f"目录中存在同名文件 '{output_mp3_path}'，请删除或重命名后重试。")
        return
    result=subprocess.run([ffmpeg_path, "-i", input_video_path, "-vn", "-acodec", "libmp3lame", output_mp3_path])
    if result.returncode == 0:
        show_message("转码成功！")
    else:
        show_message(f"转码失败！错误代码：{result.returncode}")
#转avi
def convert_to_avi(input_video_path, output_directory):
    """
    将输入的视频文件转换为 AVI 格式。
 
    参数：
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    """
    video_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_avi_path = os.path.join(output_directory, f"{video_name}.avi")
    if os.path.exists(output_avi_path):
        show_message(f"目录中存在同名文件 '{output_avi_path}'，请删除或重命名后重试。")
        return
    result=subprocess.run([ffmpeg_path, "-i", input_video_path, "-c:v", "libx264", "-preset", "slow", "-crf", "23", "-c:a", "pcm_s16le", output_avi_path])
    if result.returncode == 0:
        show_message("转码成功！")
    else:
        show_message(f"转码失败！错误代码：{result.returncode}")
#转wmv
def convert_to_wmv(input_video_path, output_directory):
    """
    将输入的视频文件转换为 WMV 格式。
 
    参数：
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    """
    video_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_wmv_path = os.path.join(output_directory, f"{video_name}.wmv")
    if os.path.exists(output_wmv_path):
        show_message(f"目录中存在同名文件 '{output_wmv_path}'，请删除或重命名后重试。")
        return
    result=subprocess.run([ffmpeg_path, "-i", input_video_path, "-c:v", "wmv2", "-b:v", "1024k", "-c:a", "wmav2", output_wmv_path])
    if result.returncode == 0:
        show_message("转码成功！")
    else:
        show_message(f"转码失败！错误代码：{result.returncode}")
#转gif
def convert_to_gif(input_video_path, output_directory, start_time, end_time, fps=10):
    """
    将输入的视频文件转换为 GIF 格式。
 
    参数：
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    start_time (int): 视频开始时间（以秒为单位）。
    end_time (int): 视频结束时间（以秒为单位）。
    fps (int): GIF 的帧率，默认为 10。
    """
    video_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_gif_path = os.path.join(output_directory, f"{video_name}_segment.gif")
    if os.path.exists(output_gif_path):
        show_message(f"目录中存在同名文件 '{output_gif_path}'，请删除或重命名后重试。")
        return
    result=subprocess.run([ffmpeg_path, "-ss", str(start_time), "-i", input_video_path, "-t", str(end_time - start_time), "-vf", f"fps={fps}", output_gif_path])
    if result.returncode == 0:
        show_message("转码成功！")
    else:
        show_message(f"转码失败！错误代码：{result.returncode}")
#转mov
def convert_to_mov(input_video_path, output_directory):
    """
    将输入的视频文件转换为 MOV 格式。
 
    参数：
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    """
    video_name = os.path.splitext(os.path.basename(input_video_path))[0]
    output_mov_path = os.path.join(output_directory, f"{video_name}.mov")
    if os.path.exists(output_mov_path):
        show_message(f"目录中存在同名文件 '{output_mov_path}'，请删除或重命名后重试。")
        return
    result=subprocess.run([ffmpeg_path, "-i", input_video_path, "-c:v", "libx264", "-preset", "slow", "-crf", "23", output_mov_path])
    if result.returncode == 0:
        show_message("转码成功！")
    else:
        show_message(f"转码失败！错误代码：{result.returncode}")
 
# 函数集合
def function_dispatcher(number,input_video_path,output_directory,start_time,end_time):
    """
    根据用户选择的数字执行相应的视频转换函数。
 
    参数：
    number (int): 用户选择的数字，代表要执行的转换函数。
    input_video_path (str): 输入视频文件的路径。
    output_directory (str): 输出目录的路径。
    start_time (int): 视频开始时间（以秒为单位）。
    end_time (int): 视频结束时间（以秒为单位）。
    """
    functions = {
        0: convert_to_mp4,
        1: convert_to_avi,
        2: convert_to_wmv,
        3: convert_to_mp3,
        4: convert_to_gif,
        5: convert_to_mov,
    }
    if number in functions:
        if number == 4:  # 如果选中的是转换为 GIF
            if start_time is not None and end_time is not None:
                functions[number](input_video_path, output_directory, start_time, end_time)
            else:
                # 在这种情况下，可以选择不传递 start_time 和 end_time 参数，或者传递默认值
                functions[number](input_video_path, output_directory, 0, None)
        else:
            functions[number](input_video_path, output_directory)
#gif所需视频长度格式
def convert_time_to_seconds(time_str):
    """
    将时间字符串转换为秒数。
 
    参数：
    time_str (str): 表示时间的字符串，可以是 HH:MM:SS 格式或整数表示的秒数。
 
    返回：
    int: 转换后的秒数。
    """
    if isinstance(time_str, int) or time_str.isdigit():
        # 如果输入是整数或者全是数字，则直接解析为整数，代表秒数
        return int(time_str)
    elif ':' in time_str:
        # 如果输入包含冒号，则尝试将其解析为时间格式
        parts = time_str.split(':')
        if len(parts) == 3:
            try:
                hours = int(parts[0])
                minutes = int(parts[1])
                seconds = int(parts[2])
                # 计算总秒数
                return hours * 3600 + minutes * 60 + seconds
            except ValueError:
                # 如果解析失败，则说明时间格式不正确
                raise ValueError("Invalid time format")
        else:
            # 如果冒号数量不是三个，则抛出异常
            raise ValueError("Invalid time format")
    else:
        # 如果既不是整数，也不是带冒号的时间格式，则抛出异常
        raise ValueError("Invalid time format")
#转换时间字符串
def format_duration(duration):
    """
    格式化视频持续时间为 HH:MM:SS 格式的字符串。
 
    参数：
    duration (float): 视频的持续时间（以秒为单位）。
 
    返回：
    str: 格式化后的时间字符串。
    """
    hours = int(duration // 3600)
    minutes = int((duration % 3600) // 60)
    seconds = int(duration % 60)
    return f'{hours:02d}:{minutes:02d}:{seconds:02d}'
#获取视频长度
def chixu_video(input_video_path):
    """
    获取视频文件的持续时间，并将其格式化为 HH:MM:SS 的字符串。
 
    参数：
    input_video_path (str): 视频文件的路径。
 
    返回：
    str: 格式化后的视频持续时间字符串。
    """
    if not input_video_path:
        return "00:00:00"
    # 运行 FFmpeg 命令获取视频时长信息
    command = [ffmpeg_path, '-i', input_video_path]
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    output = result.stderr
    # 使用正则表达式从输出中提取视频时长信息
    duration_match = re.search(r'Duration:\s*(\d+):(\d+):(\d+)', output)
    if duration_match:
        hours = int(duration_match.group(1))
        minutes = int(duration_match.group(2))
        seconds = int(duration_match.group(3))
        formatted_duration = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
        return formatted_duration
    else:
        return "00:00:00"
class Frame(wx.Frame):
    def __init__(self):
        wx.Frame.__init__(self, None, title='原创：吾爱qianaonan', size=(430, 239), name='frame', style=541072384)
        self.启动窗口 = wx.Panel(self)
        self.Centre()
        self.编辑框1 = wx.TextCtrl(self.启动窗口, size=(293, 29), pos=(8, 5), value='', name='text', style=16)
        self.编辑框1.SetForegroundColour((128, 128, 128, 255))
        self.按钮1 = wx.Button(self.启动窗口, size=(80, 32), pos=(315, 3), label='选择文件', name='button')
        self.标签1 = wx.StaticText(self.启动窗口, size=(80, 24), pos=(10, 44), label='转换成', name='staticText',style=2321)
        self.单选框2 = wx.RadioButton(self.启动窗口, size=(58, 24), pos=(8, 72), name='radioButton', label='MP4')
        self.单选框3 = wx.RadioButton(self.启动窗口, size=(40, 24), pos=(73, 72), name='radioButton', label='AVI')
        self.单选框4 = wx.RadioButton(self.启动窗口, size=(58, 24), pos=(136, 72), name='radioButton', label='WMV')
        self.单选框5 = wx.RadioButton(self.启动窗口, size=(61, 24), pos=(209, 72), name='radioButton', label='MP3')
        self.单选框6 = wx.RadioButton(self.启动窗口, size=(42, 24), pos=(275, 72), name='radioButton', label='GIF')
        self.单选框7 = wx.RadioButton(self.启动窗口, size=(80, 24), pos=(327, 72), name='radioButton', label='MOV')
        self.按钮2 = wx.Button(self.启动窗口, size=(80, 32), pos=(317, 107), label='保存目录', name='button')
        self.编辑框2 = wx.TextCtrl(self.启动窗口, size=(293, 29), pos=(8, 107), value='未选择目录则是源文件相同目录', name='text', style=16)
        self.编辑框2.SetForegroundColour((128, 128, 128, 255))
        self.按钮3 = wx.Button(self.启动窗口, size=(80, 32), pos=(254, 156), label='开始转换', name='button')
        self.Bind(wx.EVT_BUTTON, self.OnSelectFile, self.按钮1)  # 绑定选择文件按钮的事件
        self.Bind(wx.EVT_BUTTON, self.OnSelectFolder, self.按钮2)
        self.按钮3.Bind(wx.EVT_BUTTON, self.按钮3_按钮被单击)
        self.标签2 = wx.StaticText(self.启动窗口,size=(195, 36),pos=(15, 153),label='请谨慎使用gif转换，一般是视频过\n长会导致程序卡死',name='staticText',style=17)
        self.标签2.SetForegroundColour((255, 0, 0, 255))
        self.标签3 = wx.StaticText(self.启动窗口,size=(80, 17),pos=(254, 56),label='10帧/秒',name='staticText',style=2321)
        self.标签4 = wx.StaticText(self.启动窗口,size=(80, 46),pos=(335, 151),label='视频越长\n花费时间越长',name='staticText',style=2321)
        self.标签4.SetForegroundColour((255, 0, 0, 255))
        self.编辑框3 = wx.TextCtrl(self.启动窗口, size=(80, 18), pos=(303, 35), value='', name='text',style=0)
        self.编辑框3.SetForegroundColour((128, 128, 128, 255))
        self.编辑框4 = wx.TextCtrl(self.启动窗口, size=(80, 18), pos=(304, 54), value='', name='text',style=0)
        self.编辑框4.SetForegroundColour((128, 128, 128, 255))
        self.编辑框4.Bind(wx.EVT_KILL_FOCUS, self.编辑框4_失去焦点)
        self.编辑框4.Bind(wx.EVT_SET_FOCUS, self.onSetFocus)
        self.标签5 = wx.StaticText(self.启动窗口, size=(26, 17), pos=(269, 38), label='开始', name='staticText',style=2321)
        self.标签6 = wx.StaticText(self.启动窗口, size=(28, 14), pos=(268, 56), label='结束', name='staticText',style=2321)
        self.编辑框3.Hide()
        self.编辑框4.Hide()
        self.标签5.Hide()
        self.标签6.Hide()
        self.单选框2.Bind(wx.EVT_RADIOBUTTON, self.onRadioButton)
        self.单选框3.Bind(wx.EVT_RADIOBUTTON, self.onRadioButton)
        self.单选框4.Bind(wx.EVT_RADIOBUTTON, self.onRadioButton)
        self.单选框5.Bind(wx.EVT_RADIOBUTTON, self.onRadioButton)
        self.单选框6.Bind(wx.EVT_RADIOBUTTON, self.onRadioButton)
        self.单选框7.Bind(wx.EVT_RADIOBUTTON, self.onRadioButton)
    def 编辑框4_失去焦点(self,event):
        self.编辑框4.SetValue(str(chixu_video(self.编辑框1.GetValue())))
    def onSetFocus(self, event):
        self.编辑框4.SetValue("")  # 清空文本框的值
        self.编辑框4.SetForegroundColour(wx.BLACK)  # 设置黑色
    def onRadioButton(self, event):
        selectedRadioButton = event.GetEventObject()
        if selectedRadioButton in [self.单选框6] and selectedRadioButton.GetValue():
            self.标签3.Hide()
            self.编辑框3.Show()
            self.编辑框4.Show()
            self.编辑框3.SetValue('0')
            self.编辑框4.SetValue(str(chixu_video(self.编辑框1.GetValue())))
            self.标签5.Show()
            self.标签6.Show()
        else:
            self.标签3.Show()
            self.编辑框3.Hide()
            self.编辑框4.Hide()
            self.标签5.Hide()
            self.标签6.Hide()
    def GetSelectedRadioButtonIndex(self):
        radio_buttons = [self.单选框2, self.单选框3, self.单选框4, self.单选框5, self.单选框6, self.单选框7]
        for i, button in enumerate(radio_buttons):
            if button.GetValue():
                return i
        return None
    def OnSelectFile(self, event):
        wildcard = "Video Files (*.mp4;*.avi;*.wmv;*.mov)|*.mp4;*.avi;*.wmv;*.mov|All Files (*.*)|*.*"  # 文件类型过滤器
        dialog = wx.FileDialog(self, "选择文件", wildcard=wildcard,
                               style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST | wx.FD_MULTIPLE)
        if dialog.ShowModal() == wx.ID_CANCEL:
            return
        paths = dialog.GetPaths()
        directories = paths  # 提取文件路径
        self.编辑框1.SetValue("\n".join(directories))  # 在编辑框中显示目录，使用换行分隔多个目录
        if self.单选框6.GetValue():
            self.编辑框4.SetValue(str(chixu_video(self.编辑框1.GetValue())))
        dialog.Destroy()
    def OnSelectFolder(self, event):
        dialog = wx.DirDialog(self, "选择文件夹", style=wx.DD_DEFAULT_STYLE | wx.DD_DIR_MUST_EXIST)
        if dialog.ShowModal() == wx.ID_CANCEL:
            return
        folder_path = dialog.GetPath()
        self.编辑框2.SetValue(folder_path)  # 在编辑框2中显示所选文件夹路径
        dialog.Destroy()
    def 按钮3_按钮被单击(self, event):
        if self.编辑框1.GetValue().strip() == '' :
            wx.MessageBox('你有东西没选中，请重新设置', '警告', wx.OK | wx.ICON_WARNING)
            return
 
        if self.GetSelectedRadioButtonIndex() is None:
            wx.MessageBox('你有东西没选中，请重新设置', '警告', wx.OK | wx.ICON_WARNING)
            return
        selected_index = self.GetSelectedRadioButtonIndex()  # 输出单选项位置位置
        input_path1 = self.编辑框1.GetValue()
        if self.编辑框2.GetValue().strip()=='未选择目录则是源文件相同目录':
            output_directory1=os.path.dirname(self.编辑框1.GetValue())
        else:
            output_directory1 = self.编辑框2.GetValue()
        start_time_str = self.编辑框3.GetValue()
        end_time_str = self.编辑框4.GetValue()
        if start_time_str.strip() == '' or end_time_str.strip() == '':
            start_time = None
            end_time = None
        else:
            start_time = convert_time_to_seconds(start_time_str)
            end_time = convert_time_to_seconds(end_time_str)
        function_dispatcher(selected_index, input_path1, output_directory1, start_time, end_time)
class myApp(wx.App):
    def OnInit(self):
        self.frame = Frame()
        self.frame.Show(True)
        return True
 
if __name__ == '__main__':
    app = myApp()
    app.MainLoop()

爬取读万卷小说网

2024-03-27uupython阅读(228639)

from selenium import webdriver
import requests, re, os, time, shutil, threading, queue
from lxml import etree
import pandas as pd
import random

def get_user_agent():
    headers_list = [
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
        "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
        "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
        "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
        "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
        "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
        "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
        "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
        "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
    ]
    return headers_list
'''def get_proxy():
    proxy = [
        'http://182.140.244.163:8118',
        'http://113.124.86.180:9999',
       'http://117.64.237.42:9999',
        'http://182.34.102.48:9999',
        'http://183.236.123.242:8060',
        'http://27.192.203.80:9000',
        'http://114.231.8.242:8888',
        'http://36.134.91.82:8888',
        'http://222.132.57.105:9000',
        'http://61.216.156.222:60808',
        'http://182.34.20.110:9999',
        'http://60.205.132.71:80',
    ]
    return proxy
/'''
headers = {
    'user-agent': random.choice(get_user_agent()),
}
'''proxy = {
   'http': random.choice(get_proxy()),}
/'''

def extract_link_suffix(url):
    # 查找最后一个斜杠的位置
    last_slash_index = url.rfind('/')
    if last_slash_index != -1:
        # 提取斜杠之后的部分作为后缀
        return url[last_slash_index + 1:]
    else:
        # 如果没有斜杠，则直接返回整个URL（这种情况可能很少见）
        return url

# 搜索小说，并选择所需要下载的小说
def search_novel():
    chrome_options = webdriver.ChromeOptions()
    #后台静默运行
    chrome_options.add_argument('--headless')
    print('浏览器已打开')
    browser = webdriver.Chrome(options=chrome_options)
    #browser = webdriver.Chrome()
    name_input = input('输入小说名或作者：')
    browser.get(f'http://www.duwanjuan.info/modules/article/search.php?q={name_input}')
    time.sleep(6)
    # 输出网页源代码
    html = browser.page_source
    browser.close()
    # print('浏览器已关闭')
    html = etree.HTML(html)
    name = html.xpath("//div[@id='jieqi_page_contents']/div[@class='c_row']/div/div/span[@class='c_subject']/a/text()")[:10]
    chapter = html.xpath("//div[@class='c_tag']/span[@class='c_value']/a/text()")[:10]
    link = html.xpath("//div[@id='jieqi_page_contents']/div[@class='c_row']/div/div/a/@href")[:10]
    # 提取每个链接的后缀部分
    link_suffixes = [extract_link_suffix(l) for l in link]
    author = html.xpath("//div[@class='c_tag']/span[contains(text(), '作者：')]/following-sibling::span[1]/text()")[:10]
    num = [i + 1 for i in range(0, len(name))]
    data = {'序号': num, '小说': name, '作者': author,'最新章节':chapter,'链接':link_suffixes}
    df = pd.DataFrame(data)
    if df.empty:
        print('搜索数据为空，请重新搜索')
        search_novel()
    else:
        print(df)
        sx_input = int(input('请输入序号选择下载的小说：'))
        novel_link = link[sx_input - 1]
        return novel_link

# 定义一个函数来获取小说章节目录的URL和章节名
def get_chapter_urls(url, visited_urls, value):
    global tot_title
    global book_name
    response = requests.get(url, headers=headers)
    response.encoding = response.apparent_encoding
    html = etree.HTML(response.text)
    chapter_elements = html.xpath("//div[@class='index']//li[@class='chapter']/a")
    chapter_elements.pop(10)
    tot_title = html.xpath("//div[@class='index']//li[@class='chapter']/a/text()")
    bk = html.xpath("//div[@class='main']/div[@class='headlink cf']/h1/text()[1]")
    # 从列表中提取字符串
    if bk:  # 确保bk不为空
        text = bk[0]  # 提取列表中的第一个元素
    else:
        text = ""  # 如果bk为空，则设置text为空字符串

    # 正则表达式，匹配方括号及其内容，但使用括号捕获括号内的内容
    pattern = r"\['(.*?)'\]"
    # 使用re.search来查找匹配项，如果找到，则提取捕获组中的内容
    match = re.search(pattern, text)
    if match:
        book_name = match.group(1)  # 提取捕获组中的内容
    else:
        book_name = text  # 如果没有找到匹配项，则保留原始text值
    chapter_urls = []
    for element in chapter_elements:
        chapter_name = element.text
        chapter_url = element.get('href')
        if chapter_url not in visited_urls:
            value += 1
            chapter_urls.append((chapter_name, chapter_url, value))
            visited_urls.add(chapter_url)
    return chapter_urls


# 定义一个函数来获取小说具体章节的内容
def get_chapter_content(url):
try:
    response = requests.get(url, headers=headers,verify=False,timeout=15)
    response.encoding = response.apparent_encoding
    html = etree.HTML(response.text)
    content_element = html.xpath("//div[@id='acontent']/text()")
    pattern = r'\r\n     \xa0\xa0\xa0\xa0|\s|\(|\)|\读万卷 www.duwanjuan.info'
    content = [re.sub(pattern, '', sub_text) for sub_text in content_element]
    return content
except requests.RequestException as e:
     print(f"Error occurred while fetching content from {url}: {e}")
     return []



# 定义一个函数来处理每个章节的爬取任务
def process_chapter(chapter_queue):
    global time_start
    time_start = time.time()
    while not chapter_queue.empty():
        chapter_name, chapter_url, value = chapter_queue.get()
        print("正在爬取章节：", chapter_name)
        try:
            content = get_chapter_content(chapter_url)
        except Exception as e:
            print(f"获取章节内容失败：{e}")
            content = []
        # 在这里可以将内容保存到文件或进行其他处理
        folder_path = f'{book_name}'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        with open(f'{book_name}/{value}.txt', 'w', encoding='utf-8') as f:
            f.write('\n' + chapter_name + '\n')
            for data in content:
                f.write(data + '\n')
            f.write('\n\n')
        chapter_queue.task_done()
        time.sleep(6)


# 合并下载的TXT文件
def merge_txt_files(folder_path, output_file):
    txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
    txt_files.sort(key=lambda x: int(x[:-4]))

    with open(output_file, 'w', encoding='utf-8') as outfile:
        for txt_file in txt_files:
            with open(os.path.join(folder_path, txt_file), 'r', encoding='utf-8') as infile:
                content = infile.read()
                outfile.write(content)


def search_continue():
    input_continue = input('请输入y/n选择是否继续下载小说：')
    if input_continue == 'y':
        main()
    else:
        return

def main():
    directory_url = search_novel()
    # 获取小说章节目录的URL和章节名
    visited_urls = set()
    value = 0
    chapter_urls = get_chapter_urls(directory_url, visited_urls, value)
    # 创建一个队列来存储待爬取的章节信息
    chapter_queue = queue.Queue()
    for chapter_name, chapter_url, value in chapter_urls:

        chapter_queue.put((chapter_name, chapter_url, value))
    # 创建多个线程来并发爬取章节内容
    print('=' * 64)
    print('线程数建议在10-30之间，避免对目标服务器造成过大压力')
    sum = int(input('输入线程数：'))
    num_threads = sum  # 设置线程数量，根据需要进行调整
    threads = []
    for i in range(num_threads):
        thread = threading.Thread(target=process_chapter, args=(chapter_queue,))
        thread.daemon = False
        thread.start()
        threads.append(thread)
    # 等待所有线程完成任务
    chapter_queue.join()
    # 等待所有线程结束
    for thread in threads:
        thread.join()
    print("所有章节爬取完成！")
    time_end = time.time()
    print('章节爬取花费时间：', time_end - time_start)
    print('=' * 64)
    print('开始合并所有TXT文件')
    folder_path_1 = f'{book_name}/'  # 请替换为实际文件夹路径
    output_file = f'{book_name}.txt'  # 输出文件名
    merge_txt_files(folder_path_1, output_file)
    print('合并所有TXT文件成功')
    print(f'{book_name}下载成功')
    shutil.rmtree(book_name)
    print('=' * 64)
    search_continue()

# 主程序入口
if __name__ == "__main__":

    main()

用python演奏《起风了》

2024-03-27uupython阅读(2688)

import ctypes
import threading
import time
winmm = ctypes.windll.winmm
  
class Scale:
    Rest = 0
    C8 = 108
    B7 = 107
    A7s = 106
    A7 = 105
    G7s = 104
    G7 = 103
    F7s = 102
    F7 = 101
    E7 = 100
    D7s = 99
    D7 = 98
    C7s = 97
    C7 = 96
    B6 = 95
    A6s = 94
    A6 = 93
    G6s = 92
    G6 = 91
    F6s = 90
    F6 = 89
    E6 = 88
    D6s = 87
    D6 = 86
    C6s = 85
    C6 = 84
    B5 = 83
    A5s = 82
    A5 = 81
    G5s = 80
    G5 = 79
    F5s = 78
    F5 = 77
    E5 = 76
    D5s = 75
    D5 = 74
    C5s = 73
    C5 = 72
    B4 = 71
    A4s = 70
    A4 = 69
    G4s = 68
    G4 = 67
    F4s = 66
    F4 = 65
    E4 = 64
    D4s = 63
    D4 = 62
    C4s = 61
    C4 = 60
    B3 = 59
    A3s = 58
    A3 = 57
    G3s = 56
    G3 = 55
    F3s = 54
    F3 = 53
    E3 = 52
    D3s = 51
    D3 = 50
    C3s = 49
    C3 = 48
    B2 = 47
    A2s = 46
    A2 = 45
    G2s = 44
    G2 = 43
    F2s = 42
    F2 = 41
    E2 = 40
    D2s = 39
    D2 = 38
    C2s = 37
    C2 = 36
    B1 = 35
    A1s = 34
    A1 = 33
    G1s = 32
    G1 = 31
    F1s = 30
    F1 = 29
    E1 = 28
    D1s = 27
    D1 = 26
    C1s = 25
    C1 = 24
    B0 = 23
    A0s = 22
    A0 = 21
  
class Voice:
    X1 = Scale.C2
    X2 = Scale.D2
    X3 = Scale.E2
    X4 = Scale.F2
    X5 = Scale.G2
    X6 = Scale.A2
    X7 = Scale.B2
    L1 = Scale.C3
    L2 = Scale.D3
    L3 = Scale.E3
    L4 = Scale.F3
    L5 = Scale.G3
    L6 = Scale.A3
    L7 = Scale.B3
    M1 = Scale.C4
    M2 = Scale.D4
    M3 = Scale.E4
    M4 = Scale.F4
    M5 = Scale.G4
    M6 = Scale.A4
    M7 = Scale.B4
    H1 = Scale.C5
    H2 = Scale.D5
    H3 = Scale.E5
    H4 = Scale.F5
    H5 = Scale.G5
    H6 = Scale.A5
    H7 = Scale.B5
    LOW_SPEED = 500
    MIDDLE_SPEED = 400
    HIGH_SPEED = 300
    _ = 0xFF
  
def playNote(hmo, channel, instrument, note,velocity, duration=0):
    # 设置乐器
    msg = 0xC0 | channel | (instrument << 8)
    winmm.midiOutShortMsg(hmo, msg)
  
    # 发送 Note On 消息
    msg = 0x90 | channel | (note << 8) | (velocity << 16)
    winmm.midiOutShortMsg(hmo, msg)
  
    if duration:
        # 暂停 duration 秒钟
        time.sleep(duration)
  
        # 发送 Note Off 消息
        msg = 0x80 | channel | (note << 8) | (velocity << 16)
        winmm.midiOutShortMsg(hmo, msg)
  
    return channel | (instrument << 8) | (note << 8) | (velocity << 16) + 0x90
  
def go(handle,instrument,velocity):
    wind=[400,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L3,0,Voice.M5,Voice.M3,300,Voice.L2,Voice.L5,2,Voice._,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L2,0,Voice.M5,Voice.M3,Voice.M2,Voice.M3,Voice.M1,Voice.M2,Voice.L7,Voice.M1,300,Voice.L5,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L3,0,Voice.M5,Voice.M3,300,Voice.L2,Voice.L5,2,Voice._,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L2,0,Voice.M5,Voice.M3,Voice.M2,Voice.M3,Voice.M1,Voice.M2,Voice.L7,Voice.M1,300,Voice.L5,
     0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L3,0,Voice.M5,Voice.M3,300,Voice.L2,Voice.L5,2,Voice._,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L2,0,Voice.M5,Voice.M3,Voice.M2,Voice.M3,Voice.M1,Voice.M2,Voice.L7,Voice.M1,300,Voice.L5,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L3,0,Voice.M5,Voice.M3,300,Voice.L2,Voice.L5,2,Voice._,
     0,Voice.M6,Voice.M3,Voice.M2,Voice.L6,Voice.M3,Voice.L6,Voice.M2,Voice.M3,Voice.L6,Voice._,Voice._,Voice._,
     Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,Voice.M3,Voice.M5,0,Voice.M3,700,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,Voice.M2,Voice.M3,Voice.M2,Voice.M1,300,Voice.L5,Voice._,
     Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,Voice.M3,Voice.M5,0,Voice.M3,700,300,Voice.M2,700,0,Voice.M3,300,Voice.M2,0,Voice.M1,700,300,Voice.M2,Voice._,Voice._,Voice._,
     Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,Voice.M3,Voice.M5,0,Voice.M3,700,300,Voice.M2,700,0,Voice.M3,300,Voice.M2,0,Voice.M1,700,300,Voice.L6,Voice._,
     0,Voice.M3,Voice.M2,Voice.M1,Voice.M2,300,Voice.M1,Voice._,0,Voice.M3,Voice.M2,Voice.M1,Voice.M2,300,Voice.M1,700,0,Voice.L5,Voice.M3,Voice.M2,Voice.M1,Voice.M2,300,Voice.M1,Voice._,Voice._,Voice._,
     Voice.M1,Voice.M2,Voice.M3,Voice.M1,Voice.M6,0,Voice.M5,Voice.M6,300,Voice._,700,0,Voice.M1,300,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice._,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice.M3,0,Voice.H1,Voice.H2,Voice.H1,Voice.M7,300,Voice.M6,Voice.M5,Voice.M6,0,Voice.M5,Voice.M6,Voice._,Voice.M5,Voice.M6,Voice.M5,300,Voice.M6,0,Voice.M5,Voice.M2,300,Voice._,0,Voice.M5,700,300,Voice.M3,Voice._,Voice._,Voice._,
     Voice.M1,Voice.M2,Voice.M3,Voice.M1,Voice.M6,0,Voice.M5,Voice.M6,300,Voice._,700,0,Voice.M1,300,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice._,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice.M3,0,Voice.H1,Voice.H2,Voice.H1,Voice.M7,300,Voice.M6,Voice.M5,Voice.M6,0,Voice.H3,Voice.H3,300,Voice._,Voice.M5,Voice.M6,0,Voice.H3,Voice.H3,300,Voice._,0,Voice.M5,700,300,Voice.M6,Voice._,Voice._,Voice._,Voice._,Voice._,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H2,0,Voice.H1,Voice.M6,300,Voice._,0,Voice.H1,Voice.H1,300,Voice.H2,0,Voice.H1,300,Voice.M6,700,0,Voice._,300,Voice.H1,700,Voice.H3,Voice._,0,Voice.H3,Voice.H4,Voice.H3,Voice.H2,Voice.H3,300,Voice.H2,700,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,300,Voice._,Voice.H3,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,700,300,Voice.H3,700,Voice.H2,0,Voice.H1,Voice.M6,700,300,
     Voice.H3,700,Voice.H2,0,Voice.H1,300,Voice.M6,700,Voice.H1,Voice.H1,Voice._,Voice._,Voice._,Voice._,Voice._,
     0,Voice.M6,300,Voice.H3,700,Voice.H2,0,Voice.H1,Voice.M6,700,300,Voice.H3,Voice.H2,700,300,0,Voice.H1,Voice.M6,300,700,Voice.H1,Voice.H1,Voice._,Voice._,
     0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L3,0,Voice.M5,Voice.M3,300,Voice.L2,Voice.L5,2,Voice._,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L2,0,Voice.M5,Voice.M3,Voice.M2,Voice.M3,Voice.M1,Voice.M2,Voice.L7,Voice.M1,300,Voice.L5,0,Voice.L7,Voice.M1,Voice.M2,Voice.M3,300,Voice.L3,0,Voice.M5,Voice.M3,300,Voice.L2,Voice.L5,2,Voice._,
     0,Voice.M6,Voice.M3,Voice.M2,Voice.L6,Voice.M3,Voice.L6,Voice.M2,Voice.M3,Voice.L6,Voice._,Voice._,Voice._,
     Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,Voice.M3,Voice.M5,0,Voice.M3,700,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,Voice.M2,Voice.M3,Voice.M2,Voice.M1,300,Voice.L5,Voice._,
     Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,Voice.M3,Voice.M5,0,Voice.M3,700,300,Voice.M2,700,0,Voice.M3,300,Voice.M2,0,Voice.M1,700,300,Voice.M2,Voice._,Voice._,Voice._,
     Voice.M2,700,0,Voice.M1,300,Voice.M2,700,0,Voice.M1,300,Voice.M2,Voice.M3,Voice.M5,0,Voice.M3,700,300,Voice.M2,700,0,Voice.M3,300,Voice.M2,0,Voice.M1,700,300,Voice.L6,Voice._,
     0,Voice.M3,Voice.M2,Voice.M1,Voice.M2,300,Voice.M1,Voice._,0,Voice.M3,Voice.M2,Voice.M1,Voice.M2,300,Voice.M1,700,0,Voice.L5,Voice.M3,Voice.M2,Voice.M1,Voice.M2,300,Voice.M1,Voice._,Voice._,Voice._,
     Voice.M1,Voice.M2,Voice.M3,Voice.M1,Voice.M6,0,Voice.M5,Voice.M6,300,Voice._,700,0,Voice.M1,300,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice._,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice.M3,0,Voice.H1,Voice.H2,Voice.H1,Voice.M7,300,Voice.M6,Voice.M5,Voice.M6,0,Voice.M5,Voice.M6,Voice._,Voice.M5,Voice.M6,Voice.M5,300,Voice.M6,0,Voice.M5,Voice.M2,300,Voice._,0,Voice.M5,700,300,Voice.M3,Voice._,Voice._,Voice._,
     Voice.M1,Voice.M2,Voice.M3,Voice.M1,Voice.M6,0,Voice.M5,Voice.M6,300,Voice._,700,0,Voice.M1,300,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice._,Voice.M7,0,Voice.M6,Voice.M7,300,Voice._,Voice.M3,0,Voice.H1,Voice.H2,Voice.H1,Voice.M7,300,Voice.M6,Voice.M5,Voice.M6,0,Voice.H3,Voice.H3,300,Voice._,Voice.M5,Voice.M6,0,Voice.H3,Voice.H3,300,Voice._,0,Voice.M5,700,300,Voice.M6,Voice._,Voice._,Voice._,Voice._,Voice._,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H2,0,Voice.H1,Voice.M6,300,Voice._,0,Voice.H1,Voice.H1,300,Voice.H2,0,Voice.H1,300,Voice.M6,700,0,Voice._,300,Voice.H1,700,Voice.H3,Voice._,0,Voice.H3,Voice.H4,Voice.H3,Voice.H2,Voice.H3,300,Voice.H2,700,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,300,Voice._,Voice.H3,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,700,300,Voice.H3,700,Voice.H2,0,Voice.H1,Voice.M6,700,300,
     Voice.H3,700,Voice.H2,0,Voice.H1,300,Voice.M6,700,Voice.H1,Voice.H1,Voice._,Voice._,Voice._,Voice._,Voice._,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H2,0,Voice.H1,Voice.M6,300,Voice._,0,Voice.H1,Voice.H1,300,Voice.H2,0,Voice.H1,300,Voice.M6,700,0,Voice._,300,Voice.H1,700,Voice.H3,Voice._,0,Voice.H3,Voice.H4,Voice.H3,Voice.H2,Voice.H3,300,Voice.H2,700,
     Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,300,Voice._,Voice.H3,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,700,300,Voice.H3,700,Voice.H2,0,Voice.H1,Voice.M6,700,300,
     Voice.H3,700,Voice.H2,0,Voice.H1,300,Voice.M6,700,Voice.H1,Voice.H1,Voice._,Voice._,Voice._,Voice._,Voice._,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H3,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H6,Voice.H5,300,Voice._,0,Voice.H2,Voice.H3,300,Voice.H2,0,Voice.H1,Voice.M6,300,Voice._,0,Voice.H1,Voice.H1,300,Voice.H2,0,Voice.H1,300,Voice.M6,700,0,Voice._,300,Voice.H1,700,Voice.H3,Voice._,0,Voice.H3,Voice.H4,Voice.H3,Voice.H2,Voice.H3,300,Voice.H2,700,
     Voice.H1,Voice.H2,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,300,Voice._,Voice.H3,Voice.H3,0,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,Voice._,Voice.H6,Voice.H5,700,300,Voice.H3,700,Voice.H2,0,Voice.H1,Voice.M6,700,300,
     Voice.H3,700,Voice.H2,0,Voice.H1,300,Voice.M6,700,Voice.H1,Voice.H1,Voice._,Voice._,Voice._,Voice._,Voice._,
     0,Voice.M6,300,Voice.H3,700,Voice.H2,0,Voice.H1,Voice.M6,700,300,Voice.H3,Voice.H2,700,300,0,Voice.H1,Voice.M6,300,700,Voice.H1,Voice.H1,Voice._,Voice._,Voice._,Voice._,Voice._,Voice._,Voice._,-1
    ]
  
  
    sleep = 0.35
  
    for i in wind:
        if i==-1:
            break
        if i == 0:
            sleep = 0.172
            continue
        if i == 700:
            time.sleep(0.172)
            continue
        if i == 300:
            sleep = 0.35
            continue
        if i == Voice._:
            time.sleep(0.25)
            continue
        back=playNote(handle, 0, instrument, i, velocity,sleep)
        print(back,end="\n")
        #time.sleep(sleep)
  
  
# MIDI_MAPPER 常量
MIDI_MAPPER = 0xFFFFFFFF
  
# 创建一个变量来接收 MIDI 输出句柄
hMidiOut = ctypes.c_void_p()
  
# 调用 midiOutOpen 函数
result = winmm.midiOutOpen(ctypes.byref(hMidiOut), MIDI_MAPPER, 0, 0, 0)
if result == 0:
    print("midiOutOpen succeeded")
    #go(hMidiOut,107)
    thread1 = threading.Thread(target=go,args=(hMidiOut,78,90))
    thread2 = threading.Thread(target=go,args=(hMidiOut,0,127))
    thread1.start()
    thread2.start()
else:
    print("midiOutOpen failed")
  
thread1.join()
thread2.join()
# 关闭 MIDI 输出
winmm.midiOutClose(hMidiOut)

取蓝奏云直链

2024-03-27uupython阅读(2475)

import requests
import re
import json
def re_domain(url):
    pattern_domain = r"https?://([^/]+)"
    match = re.search(pattern_domain, url)
    if match:
        domain = match.group(1)
        return domain
    else:
        return None
url = "https://wwt.lanzouu.com/icuiF1o31f8d"
domain = re_domain(url)
headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0"
    }
response = requests.get(url, headers=headers)
iframe_pattern = re.compile(r'<iframe\s+class="ifr2"\s+name="\d+"\s+src="([^"]+)"\s+frameborder="0"\s+scrolling="no"></iframe>')
matches = iframe_pattern.findall(response.text)
response2 = requests.get(f"https://{domain}{matches[1]}", headers=headers)
pattern = r"'sign'\s*:\s*'([^']+)'"
sign = re.search(pattern, response2.text).group(1)
pattern2 = r"url\s*:\s*'([^']+)'"
url2 = re.search(pattern2, response2.text).group(1)
data = {
    'action': 'downprocess',
    'signs': '?ctdf',
    'sign': sign,
    'websign': '',
    'websignkey': 'bL27',
    'ves': 1
}
headers = {
    "Referer": matches[1],
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0"
}
response3 = requests.post(f"https://{domain}{url2}", headers=headers, data=data)
data = json.loads(response3.text)
full_url = data['dom'] + "/file/" + data['url']
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Microsoft Edge\";v=\"122\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"cookie": "down_ip=1"
}
response4 = requests.get(full_url, headers=headers, allow_redirects=False)
print(response4.headers['Location'])

利用selenium爬取淘宝主页商品图、商品信息和商品价格

2023-11-16uupython阅读(2936)

import re
import os
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service

url = 'https://www.taobao.com/'
header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) \
        AppleWebKit/537.36 (KHTML, like Gecko) \
        Chrome/35.0.1916.114 Safari/537.36'}
options = Options()
options.add_experimental_option("debuggerAddress", "127.0.0.1:12306")
service_ = Service(executable_path=r'I:\chromedriver_win64\chromedriver.exe',port=0)
driver = webdriver.Chrome(service=service_)
driver.get(url)
driver.maximize_window()

def get_content(i):
    img = driver.find_element(by='xpath',
                              value='/html/body/div[6]/div/div/div/div/div[%d]/a/div[1]/img' % i).get_attribute(
        'src')
    img_response = requests.get(img,headers=header)
    img_name = "%d.jpg" % i
                # 判断taobao_img文件夹是否存在
    if ~os.path.exists('taobao_img'):
        os.mkdir('taobao_img')
    with open('taobao_img/' + img_name, mode='wb') as f:
        f.write(img_response.content)
    f.close()
    g = open('taobao.csv', mode='a', encoding='utf-8')
    title = driver.find_element(by='xpath',
                                value='/html/body/div[6]/div/div/div/div/div[%d]/a/div[2]/div' % i).get_attribute(
        'innerHTML')
    title = title.replace(
        '<img src="//img.alicdn.com/imgextra/i1/O1CN01rHZjwm1kc1MDCvBIO_!!6000000004703-2-tps-38-20.png">','')
    g.write(title)
    price = driver.find_element(by='xpath',
                                value='/html/body/div[6]/div/div/div/div/div[%d]/a/div[3]' % i).get_attribute(
        'innerHTML')
    price = price.strip()

    if re.match('<span class="price-value"><em>¥</em>', price):
        price = price.strip('<span class="price-value"><em>¥</em>')
        price = price.replace("</span>", "")
        g.write(',' + price + '\n')
        g.close()

print("----程序至少运行60秒，请耐心等待------")

# 滚动使页面加载完成
for i in range(2000):
    driver.execute_script('scrollTo(0,%d)' % (i * 10))

open('taobao.csv', mode='w', encoding='utf-8')
for i in range(1, 300):  # 300为最大数，不可更大
    get_content(i)

print('运行结束')

[GUI]汇总多个 Excel 文件中的数据到一个新的 Excel 文件中

2023-09-02uupython阅读(3045)

这段代码是一个使用 PyQt5（一个 Python 的 GUI 库）编写的桌面应用程序，用于汇总多个 Excel 文件中的数据到一个新的 Excel 文件中。让我来为你解析一下代码的主要部分：

首先，导入了必要的模块和库：

   import sys
   import os
   from PyQt5 import QtWidgets
   from PyQt5.QtGui import QIcon
   import xlrd
   import datetime
   import time
   from openpyxl import Workbook
   from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment

创建了一个继承自 QtWidgets.QMainWindow 的类 MainGUI，用于构建 GUI 界面。
在 MainGUI 类中，初始化了应用程序窗口、设置了窗口标题和初始大小，并创建了主要的窗口部件。
主要窗口部件包括两个文本输入框 self.input 和 self.output，两个按钮 self.input_btn 和 self.output_btn 用于选择输入和输出文件夹，一个列表部件 self.show_result 用于显示操作结果，以及一个按钮 self.run 用于执行数据汇总操作。
绑定了按钮的点击事件，分别调用了 Choice_dir_input、Choice_dir_output 和 Summary_data 函数。
Choice_dir_input 和 Choice_dir_output 函数通过调用 QtWidgets.QFileDialog.getExistingDirectory 弹出文件夹选择对话框，让用户选择输入和输出文件夹，并将选择的文件夹路径显示在文本输入框中。
Get_data 函数用于从单个 Excel 文件中提取数据，使用 xlrd 库来读取 Excel 文件。
Get_file_path 函数用于获取指定文件夹下所有的 .xls 文件。
Get_current_time 函数用于获取当前的日期和时间。
Summary_data 函数是核心功能，用于汇总多个 Excel 文件的数据到一个新的 Excel 文件中。它会调用前面的函数来获取数据，然后使用 openpyxl 库创建一个新的 Excel 工作簿，将数据写入工作表，并设置单元格样式。
最后，在 main 函数中创建了应用程序对象，设置应用程序图标，创建 MainGUI 窗口对象，并启动应用程序的事件循环。

这段代码是一个简单的桌面应用程序，用于处理 Excel 文件数据的汇总，通过 GUI 提供了文件选择和操作结果显示的功能。

import sys
import os
from PyQt5 import QtWidgets
from PyQt5.QtGui import QIcon
import xlrd
import datetime
import time
from openpyxl import Workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
 
class MainGUI(QtWidgets.QMainWindow):
    def __init__(self):
        super().__init__()        
        self.setWindowTitle("领料明细汇总")
        self.resize(800, 400)
        self.main_widget = QtWidgets.QWidget()        
        self.main_widget_layout = QtWidgets.QGridLayout()
        self.main_widget.setLayout(self.main_widget_layout)
 
        self.input = QtWidgets.QLineEdit()
        self.input_btn = QtWidgets.QPushButton("选择输入文件夹")
        self.output = QtWidgets.QLineEdit()
        self.output_btn = QtWidgets.QPushButton("选择输出文件夹")
        self.show_result = QtWidgets.QListWidget()
        self.run = QtWidgets.QPushButton("执行汇总")
 
        self.main_widget_layout.addWidget(self.input,0,0,1,2)
        self.main_widget_layout.addWidget(self.input_btn, 0, 2, 1, 1)
        self.main_widget_layout.addWidget(self.output,1,0,1,2)
        self.main_widget_layout.addWidget(self.output_btn, 1, 2, 1, 1)
        self.main_widget_layout.addWidget(self.run, 2, 2, 1, 1)
        self.main_widget_layout.addWidget(self.show_result, 3, 0, 3, 3)
 
        self.setCentralWidget(self.main_widget)
         
        self.input_btn.clicked.connect(self.Choice_dir_input) #将"选择输入文件夹"按钮绑定Choice_dir_input函数
        self.output_btn.clicked.connect(self.Choice_dir_output) #将"选择输出文件夹"按钮绑定Choice_dir_output函数
        self.run.clicked.connect(self.Summary_data) #“执行汇总”按钮绑定Summary_data函数
 
    def Choice_dir_input(self):
        #选择目录操作
        dir_path = QtWidgets.QFileDialog.getExistingDirectory(self, "请选择文件夹路径", "D:\\")
        #将选择的目录显示在文本编辑框中
        self.input.setText(dir_path)
         
    def Choice_dir_output(self):
        dir_path = QtWidgets.QFileDialog.getExistingDirectory(self, "请选择文件夹路径", "D:\\")
        self.output.setText(dir_path)
         
    def Get_data(self, file):
        '''获取单个Excel文件中的资料'''
        wb = xlrd.open_workbook(file)
        ws = wb.sheets()[0]
        data = {}
        for row in range(7, ws.nrows-2):
            card_id = ws.cell(2, 16).value
            car = ws.cell(3, 16).value
            dt = ws.cell(row, 0).value
            if type(dt) is float:
                date_time = xlrd.xldate.xldate_as_datetime(dt, 0)
            else:
                date_time = datetime.datetime.strptime(dt,'%Y-%m-%d %H:%M:%S')
            business = ws.cell(row, 2).value
            model = ws.cell(row, 3).value
            qty = ws.cell(row, 4).value
            unit_price = ws.cell(row, 6).value
            price = ws.cell(row, 8).value
            reward = ws.cell(row, 9).value
            discount = ws.cell(row, 11).value
            balance = ws.cell(row, 13).value
            location = str(ws.cell(row, 15).value).strip()
            operator = ws.cell(row, 17).value
            date = date_time.date()
            time = date_time.time()
            info_list=[card_id,car,date_time,business,model,qty,unit_price,price,reward,discount,
                       balance,location,operator,date,time]
            data.setdefault(date,[])
            if info_list[3] != "备注":
                data[date].append(info_list)
        #增加当日加油次数        
        for key in data.keys():
            for i in data[key]:
                i.append(len(data[key]))
        return data
     
    def Get_file_path(self,path):        
        files=[]
        for file in os.listdir(path):
            if file.endswith(".xls"): #排除文件夹内的其它干扰文件
                files.append(path+"\\"+file)
        return files
     
    def Get_current_time(self):
        time_stamp = time.time()  # 当前时间的时间戳
        local_time = time.localtime(time_stamp)  #
        str_time = time.strftime('%Y-%m-%d %H.%M.%S', local_time)
        return str_time
     
    def Summary_data(self,files):
        thin = Side(border_style="thin", color="000000")#定义边框粗细及颜色
        title = ['部门', '部门编号', '时间', '业务类型', '品种', '数量', '单价', '金额', '额外值',
         '调整', '剩余', '库位', '操作员', '领取日期', '领取时间', '领取次数']
 
        wb = Workbook() 
        ws = wb.active
        ws.merge_cells("A1:P1")
        ws.cell(1,1).value = "领料明细汇总表"
        ws.cell(1,1).font = Font(name=u'黑体',bold=True,size=18)
        ws.row_dimensions[1].height  = 22.2
        ws.cell(1,1).alignment = Alignment(horizontal="center", vertical="center")
        ws.append(title)
 
        #插入数据
        files = self.Get_file_path(self.input.text()) #获取文本编辑框中的输入文件目录，并获取目录下的xls文件
        for file in files:
            data = self.Get_data(file)
            for key in data.keys():
                for i in data[key]:
                    ws.append(i)
            f = QtWidgets.QListWidgetItem(f"{file} 的内容已加入总表.") # 创建一个显示项
            self.show_result.addItem(f) # 将结果添加到部件中
 
        #设置字号，对齐，缩小字体填充，加边框
        #Font(bold=True)可加粗字体
        for row_number in range(2, ws.max_row+1):
            for col_number in range(1,17):
                c = ws.cell(row=row_number,column=col_number)
                c.font = Font(size=9)
                c.border = Border(top=thin, left=thin, right=thin, bottom=thin)
                c.alignment = Alignment(horizontal="left", vertical="center")
 
        col_name= list("ABCDEFGHIJKLMNOP")
        col_width = [8, 8, 16, 8, 16, 8, 8, 9.8, 8, 8, 8, 11, 8.3, 9, 8, 8]
        for i in range(len(col_name)):
            ws.column_dimensions[col_name[i]].width = col_width[i]
 
        ws.column_dimensions.group('I','K',hidden=True)
        ws.column_dimensions.group('N','O',hidden=True)
 
        wb.save(f"{self.output.text()}\\领料明细汇总表{self.Get_current_time()}.xlsx")
        f = QtWidgets.QListWidgetItem(f"\n领料明细汇总表{self.Get_current_time()}.xlsx 已生成，请去输出文件夹查看.") # 创建一个显示项
        self.show_result.addItem(f) # 将结果添加到部件中
 
             
def main():
    app = QtWidgets.QApplication(sys.argv)
    app.setWindowIcon(QIcon("PO.ico"))#设置界面左上角图标
    gui = MainGUI()
    gui.show()
    sys.exit(app.exec_())
 
 
if __name__ == '__main__':
    main()

生成指定格式的虚拟手机号码

2023-09-02uupython阅读(3640)

这段代码用于生成指定格式的虚拟手机号码，并将这些手机号码保存到一个文本文件中。下面是代码的主要功能和逻辑：

generate_phone_number(province, prefix, suffix) 函数用于生成一个虚拟手机号码，其中 province 表示省份代码，prefix 表示前三位号段，suffix 表示后两位号码。生成的号码格式为 province-prefix-6位随机数字-suffix。
generate_unique_phone_numbers(num_numbers, province, prefix, suffix) 函数用于生成指定数量的唯一虚拟手机号码。它通过调用 generate_phone_number 函数生成号码，并使用集合 phone_numbers 来确保生成的号码唯一。直到集合中的号码数量达到指定的 num_numbers 时，循环生成号码。
save_phone_numbers_to_file(phone_numbers, filename) 函数用于将生成的手机号码保存到指定的文本文件中。它遍历 phone_numbers 列表，并将每个号码写入文件中，每行一个号码。
在代码中设置了自定义的省份代码 (custom_province)、前三位号段 (custom_prefix) 和后两位号码 (custom_suffix)。
调用 generate_unique_phone_numbers 生成了 100 个唯一的虚拟手机号码，并将它们保存到名为 “phone_numbers.txt” 的文本文件中。

最终，代码会在控制台上输出 “Generated Unique Phone Numbers saved to phone_numbers.txt”，指示已生成并保存虚拟手机号码。如果输入的号码格式不正确，则会输出 “Error: Invalid input”。

完整的源码如下：

import random

def generate_phone_number(province, prefix, suffix):
    if len(province) != 2 or len(prefix) != 3 or len(suffix) != 2:
        return None
   
    middle_digits = ''.join(str(random.randint(0, 9)) for _ in range(6))
    phone_number = f"{province}-{prefix}-{middle_digits}-{suffix}"
    return phone_number

def generate_unique_phone_numbers(num_numbers, province, prefix, suffix):
    phone_numbers = set()
    while len(phone_numbers) < num_numbers:
        phone_number = generate_phone_number(province, prefix, suffix)
        if phone_number:
            phone_numbers.add(phone_number)
    return list(phone_numbers)

def save_phone_numbers_to_file(phone_numbers, filename):
    with open(filename, 'w') as file:
        for number in phone_numbers:
            file.write(number + '\n')

custom_province = "HE"  #省份代码
custom_prefix = "151"   # 前三位
custom_suffix = "27"    # 后二位
output_file = "phone_numbers.txt"

generated_numbers = generate_unique_phone_numbers(100, custom_province, custom_prefix, custom_suffix)
if generated_numbers:
    save_phone_numbers_to_file(generated_numbers, output_file)
    print("Generated Unique Phone Numbers saved to", output_file)
else:
    print("Error: Invalid input")

你可以将这段代码保存为一个 `.py` 文件，然后运行它。运行后，它将生成并保存指定数量的虚拟手机号码到名为 “phone_numbers.txt” 的文本文件中。确保设置了 `custom_province`、`custom_prefix` 和 `custom_suffix` 来自定义号码的格式。

使用python和opencv实现的颜色位置定位与具体色值提取

2023-09-02uupython阅读(4169)

这段代码用于分析图像中的颜色信息并根据颜色值的平均值来判断是接近黄色还是接近紫色。具体步骤如下：

读取图像：

使用OpenCV的cv2.imread方法读取指定路径下的图像。

定义需要识别的颜色及其阈值范围：

颜色以RGB颜色空间的阈值范围表示，存在一个颜色字典color_dist，其中包括了需要识别的颜色（’pink’和’yellow’）及其对应的阈值范围。

对图像进行高斯模糊和颜色空间转换：

使用高斯模糊来平滑图像。
将图像从BGR颜色空间转换为HSV颜色空间，以便更好地处理颜色信息。

使用腐蚀操作：

使用cv2.erode方法对HSV图像进行腐蚀操作，以消除噪点，使边缘更加清晰。

查找颜色区域块：

针对每个需要识别的颜色，使用cv2.inRange方法找到在阈值范围内的颜色区域。
使用cv2.findContours方法查找颜色区域的轮廓。

获取最大颜色区域：

从所有颜色区域中选择面积最大的区域，将其用矩形框出。

随机生成20个点位：

随机生成20个点位坐标，用于计算平均颜色值。

计算20个点位的平均颜色值：

遍历这些点位，获取每个点位的颜色值，然后计算这些颜色值的平均值。

判断平均颜色值接近哪种颜色：

通过计算平均颜色值与黄色和紫色的颜色距离，判断平均颜色值接近哪种颜色。

输出结果：

根据颜色判断结果，输出是阴性还是阳性。
绘制矩形框和随机点位到图像上，以可视化显示结果。

显示图像：
- 使用OpenCV的窗口来显示处理后的图像。

代码中的cv2.namedWindow和cv2.imshow方法用于创建窗口并显示图像。最终，根据平均颜色值的判断结果，会在控制台输出相应信息，并在图像上绘制矩形框和随机点位以可视化显示结果。

import cv2
import numpy as np
 
# 读取图像
# image = cv2.imread("./test.png")
image = cv2.imread("./t2.jpg")
ball_color = ['yellow', 'pink'] # 需要识别的颜色 支持多个颜色
color_dist = {
    'pink': {'Lower': np.array([150, 43, 46]), 'Upper': np.array([175, 255, 255])},
    'yellow': {'Lower': np.array([21, 43, 46]), 'Upper': np.array([34, 255, 255])},
    'purple': {'Lower': np.array([125, 43, 46]), 'Upper': np.array([155, 255, 255])}
}
 
gs_frame = cv2.GaussianBlur(image, (5, 5), 0)  # 高斯模糊
hsv = cv2.cvtColor(gs_frame, cv2.COLOR_BGR2HSV)  # 转化成HSV图像
erode_hsv = cv2.erode(hsv, None, iterations=2)  # 腐蚀 粗的变细
colorArr = []
for colorValue in ball_color: #获取所有匹配的区域块
    inRange_hsv = cv2.inRange(erode_hsv, color_dist[colorValue]['Lower'], color_dist[colorValue]['Upper'])
    c = cv2.findContours(inRange_hsv.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
    colorArr += c
mask = max(colorArr, key=cv2.contourArea) #获取最大色块
rect = cv2.minAreaRect(mask)
box = cv2.boxPoints(rect)
box = np.intp(box)
 
# 计算边界框的中心点坐标
center_x = int((box[0][0] + box[2][0]) / 2)
center_y = int((box[0][1] + box[2][1]) / 2)
 
# 随机生成20个点位
points = []
for _ in range(20):
    x = np.random.randint(center_x - 8, center_x + 8)
    y = np.random.randint(center_y - 8, center_y + 8)
    points.append((x, y))
 
# 计算20个点位的平均颜色值
avg_color = np.zeros(3)
for point in points:
    color = hsv[point[1], point[0]]
    avg_color += color
avg_color = tuple(map(int, avg_color / len(points)))
 
# 打印平均颜色值
print("平均颜色值：", avg_color)
 
# 判断平均颜色值接近黄色还是接近紫色
yellow_dist = np.linalg.norm(avg_color - color_dist['yellow']['Lower'])
purple_dist = np.linalg.norm(avg_color - color_dist['purple']['Lower'])
 
# 接口应在这里返回结果
if yellow_dist < purple_dist:
    print("平均颜色值接近黄色----阴性")
 
else:
    print("平均颜色值接近紫色----阳性")
 
# 绘制矩形和随机点位
cv2.drawContours(image, [np.intp(box)], -1, (0, 255, 255), 2)
for point in points:
    cv2.circle(image, point, 3, (0, 0, 255), -1)
 
# 显示图像
cv2.namedWindow("img", cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
cv2.imshow('img', image)
cv2.waitKey(0)

爬取生态环境中心每日核辐射检测

2023-09-02uupython阅读(3313)

这段代码用于从网页上抓取数据，并将数据保存到文本文件中。主要步骤如下：

发送HTTP请求获取网页内容：

使用requests.get方法发送GET请求，获取指定网页的HTML内容。

使用XPath解析HTML内容：

使用lxml库的etree.HTML方法将HTML文本解析成HTML树。
使用XPath表达式定位网页中的数据。

遍历HTML元素获取数据：

使用XPath表达式选中需要抓取的数据元素。
遍历每个元素，提取城市、辐射值和检测时间等数据。
将提取的数据构建成字典，并将字典添加到名为datas的列表中。

写入数据到文本文件：

打开一个文本文件以写入模式，文件名格式为data_日期.txt。
遍历datas列表，将每个数据字典的内容格式化为字符串，并写入文本文件。
使用datetime.datetime.now().strftime("%Y%m%d")获取当前日期作为文件名的一部分。

总体来说，这段代码通过HTTP请求和XPath解析，从指定网页上抓取了数据，然后将数据保存到文本文件中，文件名包括日期信息。

import datetime
import time
 
import requests
from lxml import etree
 
response = requests.get('https://data.rmtc.org.cn/gis/listtype0M.html')
html_parse = etree.HTML(response.text)
lis = html_parse.xpath('//*[@class="datali"]')
datas = []
for li in lis:
    hr_url = li.xpath('.//div[@class="divname"]/a/@href')[0]
    hr_name = (li.xpath('.//div[@class="divname"]/a/text()')[0].replace('\r', '')
               .replace('\n', '')
               .replace('\t', '').strip())
    city = hr_name.split('(')[0]
    time.sleep(0.2)
    response = requests.get(f'https://data.rmtc.org.cn/gis/{hr_url}')
    html_children_parse = etree.HTML(response.text)
    children_lis = html_children_parse.xpath('//*[@class="datali"]')
    for cl in children_lis:
        hr_children_name = (cl.xpath('.//div[@class="divname"]/text()')[0].replace('\r', '')
                            .replace('\n', '')
                            .replace('\t', '').strip())
        hr_children_val = cl.xpath('.//div[@class="divval"]/span[@class="label"]/text()')[0]
        hr_children_time = cl.xpath('.//div[@class="divval"]/span[@class="showtime"]/text()')[0]
        datas.append({'province': city.strip(), 'city': hr_children_name, 'val': hr_children_val, 'check_time': hr_children_time})
    time.sleep(0.2)
 
with open(f'data_{datetime.datetime.now().strftime("%Y%m%d")}.txt', 'wt', encoding='UTF-8') as f:
    for d in datas:
        f.write(f'城市：{d["province"]}-{d["city"]}，辐射值：{d["val"]}，检测时间：{d["check_time"]}\r\n')

青少年读本打包下载

2023-08-29uupython阅读(3291)

这段代码是一个用于批量下载青少年读本文件的脚本，它使用了多线程的方式进行文件下载。以下是代码的主要功能和流程：

导入所需模块：导入了requests、os和ThreadPoolExecutor模块。
定义青少年读本文件列表URL：url变量存储了获取青少年读本文件列表的URL。
定义下载目录：download_dir变量存储了下载文件的目录路径。
定义下载集合：download_lists变量用于存储下载任务，每个任务由文件的下载链接和保存路径组成。
定义download_list函数：该函数发送HTTP请求获取青少年读本文件列表的JSON数据，然后遍历每个文件项，提取文件的下载链接和保存路径，并将任务添加到download_lists集合中。
定义download函数：该函数接收文件的下载链接和保存路径作为参数，通过requests.get方法下载文件内容，然后保存到指定的输出路径。
主程序入口：在if __name__ == '__main__':中，首先调用download_list函数获取下载任务列表，然后使用ThreadPoolExecutor创建线程池，最大线程数为10。遍历download_lists中的每个任务，使用executor.submit方法提交下载任务给线程池。

这段代码实现了并发下载多个文件的功能，通过多线程的方式加速文件下载过程。需要注意的是，在使用多线程下载时，要注意设置适当的线程数，以避免对目标服务器造成过多的并发请求。同时，也要注意文件保存路径的创建，确保保存文件的目录存在。

import requests
import os
from concurrent.futures import ThreadPoolExecutor
 
# 青少年读本文件列表
url = 'https://s-file-1.ykt.cbern.com.cn/reading/api/zh-CN/14fba334-5e8b-4523-8b60-1f3abde6f60c/elearning_library/v1/libraries/42b4e538-7319-47cb-9d10-12fb58b78420/contents/actions/full/adapter/f848d8b521af3a6c13474ae97d117e7f428208b77884aa2dd46599d81a4ae1a5/files/0.json'
# 下载目录
download_dir = './ebooks'
# 下载集合
download_lists = []
 
 
def download_list():
    response = requests.get(url).json()
    for item in response:
        # 拼接文件访问地址
        file_url = f"https://s-file-1.ykt.cbern.com.cn/reading/api_static/smart_ebooks/{item['unit_id']}.json"
        # 获取文件真实下载链接
        download_url = requests.get(file_url).json()['ebook_third_file']
        # 拼接文件完整保存路径
        output = f"{download_dir}/{item['tags'][0]['title']}/{item['title']}.pdf".replace(' ', '') \
            .replace('：', '·').replace(':', '·').replace("“", "").replace("”", "")
        # 添加到下载集合
        download_lists.append((download_url, output))
 
 
def download(download_url, output):
    # 获取文件目录
    file_download_dir = os.path.dirname(output)
    # 判断目录是否存在，不存在则创建
    if not os.path.exists(file_download_dir):
        os.makedirs(file_download_dir)
    # 下载文件
    resp = requests.get(download_url)
    # 保存文件
    with open(output, 'wb') as f:
        f.write(resp.content)
        print(f"下载完成：{output}")
 
 
if __name__ == '__main__':
    download_list()
    # 多线程下载
    with ThreadPoolExecutor(max_workers=10) as executor:
        for arg in download_lists:
            executor.submit(download, *arg)

1
2
3
4
...
下一页
共 8 页

Python代码