license: openrail
datasets:
- irds/codesearchnet
- giganticode/java-cmpx-v1
- nickrosh/Evol-Instruct-Code-80k-v1
- bigcode/starcoderdata
- bigcode/the-stack
- bigcode/the-stack-smol
- Cdaprod/AI-Developer-Prompts
- code_x_glue_ct_code_to_text
- codeparrot/github-code
- codeparrot/github-code-clean
- code_x_glue_cc_code_completion_line
- >-
autoevaluate/autoeval-eval-jeffdshen__inverse_superglue_mixedp1-jeffdshen__inverse-63643c-1665558893
- bentrevett/multi30k
- edbeeching/decision_transformer_gym_replay
- psyche/common_crawl
- Birchlabs/openai-prm800k-solutions-only
- openchat/openchat_sharegpt4_dataset
- Open-Orca/OpenOrca
- cjvt/slownet
- para_crawl
- zeroshot/twitter-financial-news-sentiment
- laugustyniak/political-advertising-pl
- code_search_net
- sukaka/novelai-webui
- P1ayer-1/chatgpt-conversations-chatlogs.net
- daniel2588/sarcasm
- psmathur/orca_minis_uncensored_dataset
- player1537/Bloom-560m-trained-on-Wizard-Vicuna-Uncensored-trained-on-Based
- shahules786/prosocial-nsfw-reddit
- Thewillonline/reddit-sarcasm
- datasciencemmw/current-data
- Oniichat/bluemoon_roleplay_chat_data_300k_messages
- dell-research-harvard/AmericanStories
- b-mc2/sql-create-context
- rahulmallah/autotrain-data-emotion-detection
- theblackcat102/multiround-programming-convo
- Lsavints/software_knowledgebase
- RazinAleks/SO-Python_QA-Web_Development_class
- codeparrot/apps
- branles14/ultrachat-uncensored_full
- vlsp-2023-vllm/en-to-vi-formal-informal-tranlations
- fraug-library/english_contractions_extensions
- spencer/software_slacks
- Abirate/english_quotes
- Nexdata/American_English_Natural_Dialogue_Speech_Data
- Nexdata/Latin_American_Speaking_English_Speech_Data_by_Mobile_Phone
- Nexdata/American_English_Speech_Data_by_Mobile_Phone_Reading
- Nexdata/American_English_Speech_Synthesis_Corpus-Female
- rombodawg/LimitlessCodeTraining
- RikoteMaster/Emotion_Recognition_4_llama2
- Villian7/Emotions_Data
- alanland/llama2-self-cognition
- CognitiveScience/coscidata
- bibidentuhanoi/gideon_self_cognition
- gollark/consciousness
- juletxara/visual-spatial-reasoning
- lintang/numerical_reasoning_arithmetic
- reasoning-machines/gsm-hard
- open-source-metrics/reinforcement-learning-checkpoint-downloads
- igbo_english_machine_translation
- US-Artificial-Intelligence/algemap
- rombodawg/2XUNCENSORED_alpaca_840k_Evol_USER_ASSIS
- griffin/chain_of_density
- >-
shirsh10mall/LLM_Instruct_Learning_Project_Preprocessed_Tokenized_Open_Orca_Dataset_Flan_T5
- Thaweewat/chain-of-thought-74k-th
- AlekseyKorshuk/chain-of-thoughts-chatml-deduplicated
- dair-ai/emotion
- hita/social-behavior-emotions
- Bingsu/Human_Action_Recognition
- anjandash/java-8m-methods-v1
- nadiamaqbool81/java_code_instructions_1.178k_alpaca
- DavidMOBrien/8000-java
- rombodawg/LimitlessCodeTraining_1k-Python-Javascript_GuanacoFormat
- angie-chen55/javascript-github-code
- kye/all-lucidrain-python-3
- Fraser/python-state-changes
- ammarnasr/the-stack-ruby-clean
- ammarnasr/the-stack-rust-clean
- seyyedaliayati/solidity-dataset
- jkhedri/psychology-dataset
- KonradSzafer/stackoverflow_linux
- vikp/textbook_quality_programming
- rombodawg/LosslessMegaCodeTrainingV3_MINI
- BelleGroup/multiturn_chat_0.8M
- smangrul/code-chat-assistant-v1
- goendalf666/sales-textbook_for_convincing_and_selling
- readerbench/ConversationalAgent-Ro
- beurkinger/autotrain-data-human-action-recognition
- jpwahle/autoencoder-paraphrase-dataset
- jpwahle/autoregressive-paraphrase-dataset
- teknium/GPT4-LLM-Cleaned
- Anthropic/model-written-evals
- openai_humaneval
- kye/all-google-ai-python-code
- kye/all-openai-github-code
- EleutherAI/lambada_openai
- CShorten/ML-ArXiv-Papers
- WaltonFuture/InstructionGPT-4
- open-llm-leaderboard/details_AIDC-ai-business__Marcoroni-70B
- seansullivan/INT-Business-Syllabus
- theoldmandthesea/17k_business_book
- SunRise228/business-doc
- gauravshrm211/VC-startup-evaluation-for-investment
- TuningAI/Startups_V1
- TuningAI/Startups_V2
- AdiOO7/llama-2-finance
- scillm/scientific_papers
- gokuls/wiki_book_corpus_complete_processed_bert_dataset
- the_pile_books3
- go_emotions
- yizhongw/self_instruct
- codeparrot/self-instruct-starcoder
- Amani27/massive_translation_dataset
- huggingface/transformers-metadata
- hf-internal-testing/transformers-metadata
language:
- en
- it
- fr
- pt
- la
- ru
- ro
- el
- ja
- zh
- ga
- cy
- gd
- de
metrics:
- accuracy
- bertscore
- bleu
- code_eval
- character
- brier_score
- cer
- chrf
- charcut_mt
- bleurt
tags:
- code
- text-generation-inference
library_name: transformers
pipeline_tag: text-generation