ERROR:Service.service:Error parsing resume: "'ContactInformation'"
"detail": "Failed to parse the file: An error occurred in parsed_resume: \"'ContactInformation'\""
from typing import List, Optional, Union, Any, re
import json
class ContactInformation(BaseModel):
Name: Optional[str] = None
Email: Optional[str] = None
Contact: Optional[str] = None
Links: Optional[List[str]] = None
class Experience(BaseModel):
title: Optional[str] = None
company: Optional[str] = None
duration: Optional[str] = None
class Education(BaseModel):
course: Optional[str] = None
branch: Optional[str] = None
institute: Optional[str] = None
class Projects(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
link: Optional[str] = None
class OutputFormat(BaseModel):
ContactInformation: Optional[Any] = None
AboutMe: Optional[Any] = None
Experiences: Optional[List[Any]] = None
Educations: Optional[List[Any]] = None
Skills: Optional[List[Any]] = None
Certificates: Optional[List[Any]] = None
Projects: Optional[List[Any]] = None
Achievements: Optional[List[Any]] = None
Volunteer: Optional[List[Any]] = None
def __init__(self, model_name=model_1, fine_tune_model_path: str = None):
# Initialize LLM service with specified model
if fine_tune_model_path:
# Load fine-tuned model from local directory
self.tokenizer = AutoTokenizer.from_pretrained(fine_tune_model_path)
self.model = AutoModel.from_pretrained(fine_tune_model_path)
else:
# Load base model
self.llm_service = HuggingFaceHub(
repo_id="google/flan-t5-base",
huggingfacehub_api_token=huggingface_api_key,
model_kwargs={
"temperature": 0.5,
"max_new_tokens": 200
} # Model parameters for consistent output
)
def parsed_resume(self, resume_txt: str):
df = pd.read_csv(r"C:\Users\Sarthak\PycharmProjects\JobAxle\Service\data\for_model_resume_dataset.csv")
print(df['prompt'][0])
examples = [
{'prompt':df['prompt'][0], "completion":df['completion'][0]},
{'prompt': df['prompt'][1], "completion": df['completion'][1]}
]
print('Examples:',examples[0])
example_formatter_template = """
{prompt}
{completion}\n
"""
example_prompt = PromptTemplate(
input_variables=["prompt", "completion"],
template=example_formatter_template,
)
parser = PydanticOutputParser(pydantic_object=OutputFormat)
few_shot_prompt_template = FewShotPromptTemplate(
examples=examples,
example_prompt=example_prompt,
suffix="""
Parse the given resume text, ensuring the output in JSON format:
Resume:
{resume}
{format_instructions}
Output as JSON below:
completion:""",
input_variables=["resume"],
example_separator="\n",
partial_variables={"format_instructions": parser.get_format_instructions()}
)
print("Few-Shot Prompt Template with Examples and JSON Instructions:\n", few_shot_prompt_template)
prompt_template = PromptTemplate(
input_variables=['resume'],
template=Prompt_2
)
# print(few_shot_prompt_template)
# Initialize the LLM chain
chain = LLMChain(
llm=self.llm_service,
prompt=few_shot_prompt_template,
verbose=True
)
print("Chain:", chain)
print(resume_txt)
try:
response = chain.invoke({'resume': resume_txt}, verbose=True)
print(response)
logger.info('Model Response: %s', response)
print("Type of Response:",type(response))
# Invoke the chain and get a response
response_json = self.process_response(response)
parsed_json = self.structure_response(response_json)
return OutputFormat(**parsed_json) # Return as OutputFormat object
except Exception as e:
logger.error("Error parsing resume: %s", e)
raise Exception(f"An error occurred in parsed_resume: {e}")
def process_response(self, response_text: str) -> Dict:
"""Process LLM response into JSON format."""
response_json = json.dumps(response_text).strip()
# Remove extraneous characters
if response_json.startswith("```json"):
response_json = response_json[len("```json"):].strip()
if response_json.endswith("```"):
response_json = response_json[:-len("```")].strip()
response_json = remove_trailing_commas(response_json)
try:
return json.loads(response_json)
except json.JSONDecodeError as e:
logger.error("JSON decoding error: %s. Response text: %s", e, response_json)
raise ValueError("Failed to parse response as valid JSON")
def structure_response(self, parsed_json: Dict) -> Dict:
"""
Structure response JSON to match the OutputFormat schema, ensuring lists for 'Experiences' and 'Educations'.
"""
# Ensure ContactInformation, Experiences, and Educations are present in the expected structure
parsed_json["ContactInformation"] = parsed_json.get("ContactInformation", {})
if isinstance(parsed_json.get("Experiences"), dict):
parsed_json["Experiences"] = [parsed_json["Experiences"]]
else:
parsed_json["Experiences"] = parsed_json.get("Experiences", [])
if isinstance(parsed_json.get("Educations"), dict):
parsed_json["Educations"] = [parsed_json["Educations"]]
else:
parsed_json["Educations"] = parsed_json.get("Educations", [])
parsed_json["Projects"] = parsed_json.get("Projects", [])
return parsed_json
df 包含提示、补全功能,以resume_text 作为提示,以 json 输出作为补全。
或者除了在低端规格的笔记本电脑上执行该任务之外,还有其他选择吗?我需要帮助,因为我无法访问大参数模型。有人吗??
可能原因一:
法学硕士有时不会产生一致的输出。模型生成的输出中可能不存在键“ContactInformation”。所以首先进行调试并直接检查LLM的输出。
试试这个 parsed_json.get("ContactInformation", {})
如果密钥不存在,它将添加空字典。
可能原因2:
LLM 生成的输出甚至可能不是 JSON。在这种情况下,它将无法在 JOSN 上使用 get 方法。